This is page 1 of 20. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .github
│ ├── actions
│ │ └── setup-python-env
│ │ └── action.yml
│ ├── dependabot.yml
│ └── workflows
│ ├── ci.yml
│ ├── deploy-docs.yml
│ ├── main.yml.disabled
│ ├── on-release-main.yml
│ └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│ ├── apis
│ │ ├── error-codes.md
│ │ ├── overview.md
│ │ └── python-sdk.md
│ ├── assets
│ │ ├── biomcp-cursor-locations.png
│ │ ├── favicon.ico
│ │ ├── icon.png
│ │ ├── logo.png
│ │ ├── mcp_architecture.txt
│ │ └── remote-connection
│ │ ├── 00_connectors.png
│ │ ├── 01_add_custom_connector.png
│ │ ├── 02_connector_enabled.png
│ │ ├── 03_connect_to_biomcp.png
│ │ ├── 04_select_google_oauth.png
│ │ └── 05_success_connect.png
│ ├── backend-services-reference
│ │ ├── 01-overview.md
│ │ ├── 02-biothings-suite.md
│ │ ├── 03-cbioportal.md
│ │ ├── 04-clinicaltrials-gov.md
│ │ ├── 05-nci-cts-api.md
│ │ ├── 06-pubtator3.md
│ │ └── 07-alphagenome.md
│ ├── blog
│ │ ├── ai-assisted-clinical-trial-search-analysis.md
│ │ ├── images
│ │ │ ├── deep-researcher-video.png
│ │ │ ├── researcher-announce.png
│ │ │ ├── researcher-drop-down.png
│ │ │ ├── researcher-prompt.png
│ │ │ ├── trial-search-assistant.png
│ │ │ └── what_is_biomcp_thumbnail.png
│ │ └── researcher-persona-resource.md
│ ├── changelog.md
│ ├── CNAME
│ ├── concepts
│ │ ├── 01-what-is-biomcp.md
│ │ ├── 02-the-deep-researcher-persona.md
│ │ └── 03-sequential-thinking-with-the-think-tool.md
│ ├── developer-guides
│ │ ├── 01-server-deployment.md
│ │ ├── 02-contributing-and-testing.md
│ │ ├── 03-third-party-endpoints.md
│ │ ├── 04-transport-protocol.md
│ │ ├── 05-error-handling.md
│ │ ├── 06-http-client-and-caching.md
│ │ ├── 07-performance-optimizations.md
│ │ └── generate_endpoints.py
│ ├── faq-condensed.md
│ ├── FDA_SECURITY.md
│ ├── genomoncology.md
│ ├── getting-started
│ │ ├── 01-quickstart-cli.md
│ │ ├── 02-claude-desktop-integration.md
│ │ └── 03-authentication-and-api-keys.md
│ ├── how-to-guides
│ │ ├── 01-find-articles-and-cbioportal-data.md
│ │ ├── 02-find-trials-with-nci-and-biothings.md
│ │ ├── 03-get-comprehensive-variant-annotations.md
│ │ ├── 04-predict-variant-effects-with-alphagenome.md
│ │ ├── 05-logging-and-monitoring-with-bigquery.md
│ │ └── 06-search-nci-organizations-and-interventions.md
│ ├── index.md
│ ├── policies.md
│ ├── reference
│ │ ├── architecture-diagrams.md
│ │ ├── quick-architecture.md
│ │ ├── quick-reference.md
│ │ └── visual-architecture.md
│ ├── robots.txt
│ ├── stylesheets
│ │ ├── announcement.css
│ │ └── extra.css
│ ├── troubleshooting.md
│ ├── tutorials
│ │ ├── biothings-prompts.md
│ │ ├── claude-code-biomcp-alphagenome.md
│ │ ├── nci-prompts.md
│ │ ├── openfda-integration.md
│ │ ├── openfda-prompts.md
│ │ ├── pydantic-ai-integration.md
│ │ └── remote-connection.md
│ ├── user-guides
│ │ ├── 01-command-line-interface.md
│ │ ├── 02-mcp-tools-reference.md
│ │ └── 03-integrating-with-ides-and-clients.md
│ └── workflows
│ └── all-workflows.md
├── example_scripts
│ ├── mcp_integration.py
│ └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│ ├── check_docs_in_mkdocs.py
│ ├── check_http_imports.py
│ └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│ └── biomcp
│ ├── __init__.py
│ ├── __main__.py
│ ├── articles
│ │ ├── __init__.py
│ │ ├── autocomplete.py
│ │ ├── fetch.py
│ │ ├── preprints.py
│ │ ├── search_optimized.py
│ │ ├── search.py
│ │ └── unified.py
│ ├── biomarkers
│ │ ├── __init__.py
│ │ └── search.py
│ ├── cbioportal_helper.py
│ ├── circuit_breaker.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── articles.py
│ │ ├── biomarkers.py
│ │ ├── diseases.py
│ │ ├── health.py
│ │ ├── interventions.py
│ │ ├── main.py
│ │ ├── openfda.py
│ │ ├── organizations.py
│ │ ├── server.py
│ │ ├── trials.py
│ │ └── variants.py
│ ├── connection_pool.py
│ ├── constants.py
│ ├── core.py
│ ├── diseases
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ └── search.py
│ ├── domain_handlers.py
│ ├── drugs
│ │ ├── __init__.py
│ │ └── getter.py
│ ├── exceptions.py
│ ├── genes
│ │ ├── __init__.py
│ │ └── getter.py
│ ├── http_client_simple.py
│ ├── http_client.py
│ ├── individual_tools.py
│ ├── integrations
│ │ ├── __init__.py
│ │ ├── biothings_client.py
│ │ └── cts_api.py
│ ├── interventions
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ └── search.py
│ ├── logging_filter.py
│ ├── metrics_handler.py
│ ├── metrics.py
│ ├── oncokb_helper.py
│ ├── openfda
│ │ ├── __init__.py
│ │ ├── adverse_events_helpers.py
│ │ ├── adverse_events.py
│ │ ├── cache.py
│ │ ├── constants.py
│ │ ├── device_events_helpers.py
│ │ ├── device_events.py
│ │ ├── drug_approvals.py
│ │ ├── drug_labels_helpers.py
│ │ ├── drug_labels.py
│ │ ├── drug_recalls_helpers.py
│ │ ├── drug_recalls.py
│ │ ├── drug_shortages_detail_helpers.py
│ │ ├── drug_shortages_helpers.py
│ │ ├── drug_shortages.py
│ │ ├── exceptions.py
│ │ ├── input_validation.py
│ │ ├── rate_limiter.py
│ │ ├── utils.py
│ │ └── validation.py
│ ├── organizations
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ └── search.py
│ ├── parameter_parser.py
│ ├── query_parser.py
│ ├── query_router.py
│ ├── rate_limiter.py
│ ├── render.py
│ ├── request_batcher.py
│ ├── resources
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ ├── instructions.md
│ │ └── researcher.md
│ ├── retry.py
│ ├── router_handlers.py
│ ├── router.py
│ ├── shared_context.py
│ ├── thinking
│ │ ├── __init__.py
│ │ ├── sequential.py
│ │ └── session.py
│ ├── thinking_tool.py
│ ├── thinking_tracker.py
│ ├── trials
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ ├── nci_getter.py
│ │ ├── nci_search.py
│ │ └── search.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── cancer_types_api.py
│ │ ├── cbio_http_adapter.py
│ │ ├── endpoint_registry.py
│ │ ├── gene_validator.py
│ │ ├── metrics.py
│ │ ├── mutation_filter.py
│ │ ├── query_utils.py
│ │ ├── rate_limiter.py
│ │ └── request_cache.py
│ ├── variants
│ │ ├── __init__.py
│ │ ├── alphagenome.py
│ │ ├── cancer_types.py
│ │ ├── cbio_external_client.py
│ │ ├── cbioportal_mutations.py
│ │ ├── cbioportal_search_helpers.py
│ │ ├── cbioportal_search.py
│ │ ├── constants.py
│ │ ├── external.py
│ │ ├── filters.py
│ │ ├── getter.py
│ │ ├── links.py
│ │ ├── oncokb_client.py
│ │ ├── oncokb_models.py
│ │ └── search.py
│ └── workers
│ ├── __init__.py
│ ├── worker_entry_stytch.js
│ ├── worker_entry.js
│ └── worker.py
├── tests
│ ├── bdd
│ │ ├── cli_help
│ │ │ ├── help.feature
│ │ │ └── test_help.py
│ │ ├── conftest.py
│ │ ├── features
│ │ │ └── alphagenome_integration.feature
│ │ ├── fetch_articles
│ │ │ ├── fetch.feature
│ │ │ └── test_fetch.py
│ │ ├── get_trials
│ │ │ ├── get.feature
│ │ │ └── test_get.py
│ │ ├── get_variants
│ │ │ ├── get.feature
│ │ │ └── test_get.py
│ │ ├── search_articles
│ │ │ ├── autocomplete.feature
│ │ │ ├── search.feature
│ │ │ ├── test_autocomplete.py
│ │ │ └── test_search.py
│ │ ├── search_trials
│ │ │ ├── search.feature
│ │ │ └── test_search.py
│ │ ├── search_variants
│ │ │ ├── search.feature
│ │ │ └── test_search.py
│ │ └── steps
│ │ └── test_alphagenome_steps.py
│ ├── config
│ │ └── test_smithery_config.py
│ ├── conftest.py
│ ├── data
│ │ ├── ct_gov
│ │ │ ├── clinical_trials_api_v2.yaml
│ │ │ ├── trials_NCT04280705.json
│ │ │ └── trials_NCT04280705.txt
│ │ ├── myvariant
│ │ │ ├── myvariant_api.yaml
│ │ │ ├── myvariant_field_descriptions.csv
│ │ │ ├── variants_full_braf_v600e.json
│ │ │ ├── variants_full_braf_v600e.txt
│ │ │ └── variants_part_braf_v600_multiple.json
│ │ ├── oncokb_mock_responses.json
│ │ ├── openfda
│ │ │ ├── drugsfda_detail.json
│ │ │ ├── drugsfda_search.json
│ │ │ ├── enforcement_detail.json
│ │ │ └── enforcement_search.json
│ │ └── pubtator
│ │ ├── pubtator_autocomplete.json
│ │ └── pubtator3_paper.txt
│ ├── integration
│ │ ├── test_oncokb_integration.py
│ │ ├── test_openfda_integration.py
│ │ ├── test_preprints_integration.py
│ │ ├── test_simple.py
│ │ └── test_variants_integration.py
│ ├── tdd
│ │ ├── articles
│ │ │ ├── test_autocomplete.py
│ │ │ ├── test_cbioportal_integration.py
│ │ │ ├── test_fetch.py
│ │ │ ├── test_preprints.py
│ │ │ ├── test_search.py
│ │ │ └── test_unified.py
│ │ ├── conftest.py
│ │ ├── drugs
│ │ │ ├── __init__.py
│ │ │ └── test_drug_getter.py
│ │ ├── openfda
│ │ │ ├── __init__.py
│ │ │ ├── test_adverse_events.py
│ │ │ ├── test_device_events.py
│ │ │ ├── test_drug_approvals.py
│ │ │ ├── test_drug_labels.py
│ │ │ ├── test_drug_recalls.py
│ │ │ ├── test_drug_shortages.py
│ │ │ └── test_security.py
│ │ ├── test_biothings_integration_real.py
│ │ ├── test_biothings_integration.py
│ │ ├── test_circuit_breaker.py
│ │ ├── test_concurrent_requests.py
│ │ ├── test_connection_pool.py
│ │ ├── test_domain_handlers.py
│ │ ├── test_drug_approvals.py
│ │ ├── test_drug_recalls.py
│ │ ├── test_drug_shortages.py
│ │ ├── test_endpoint_documentation.py
│ │ ├── test_error_scenarios.py
│ │ ├── test_europe_pmc_fetch.py
│ │ ├── test_mcp_integration.py
│ │ ├── test_mcp_tools.py
│ │ ├── test_metrics.py
│ │ ├── test_nci_integration.py
│ │ ├── test_nci_mcp_tools.py
│ │ ├── test_network_policies.py
│ │ ├── test_offline_mode.py
│ │ ├── test_openfda_unified.py
│ │ ├── test_pten_r173_search.py
│ │ ├── test_render.py
│ │ ├── test_request_batcher.py.disabled
│ │ ├── test_retry.py
│ │ ├── test_router.py
│ │ ├── test_shared_context.py.disabled
│ │ ├── test_unified_biothings.py
│ │ ├── thinking
│ │ │ ├── __init__.py
│ │ │ └── test_sequential.py
│ │ ├── trials
│ │ │ ├── test_backward_compatibility.py
│ │ │ ├── test_getter.py
│ │ │ └── test_search.py
│ │ ├── utils
│ │ │ ├── test_gene_validator.py
│ │ │ ├── test_mutation_filter.py
│ │ │ ├── test_rate_limiter.py
│ │ │ └── test_request_cache.py
│ │ ├── variants
│ │ │ ├── constants.py
│ │ │ ├── test_alphagenome_api_key.py
│ │ │ ├── test_alphagenome_comprehensive.py
│ │ │ ├── test_alphagenome.py
│ │ │ ├── test_cbioportal_mutations.py
│ │ │ ├── test_cbioportal_search.py
│ │ │ ├── test_external_integration.py
│ │ │ ├── test_external.py
│ │ │ ├── test_extract_gene_aa_change.py
│ │ │ ├── test_filters.py
│ │ │ ├── test_getter.py
│ │ │ ├── test_links.py
│ │ │ ├── test_oncokb_client.py
│ │ │ ├── test_oncokb_helper.py
│ │ │ └── test_search.py
│ │ └── workers
│ │ └── test_worker_sanitization.js
│ └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```
# Files
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
```yaml
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: "v4.4.0"
4 | hooks:
5 | - id: check-case-conflict
6 | - id: check-merge-conflict
7 | - id: check-toml
8 | - id: check-yaml
9 | - id: end-of-file-fixer
10 | - id: trailing-whitespace
11 |
12 | - repo: https://github.com/astral-sh/ruff-pre-commit
13 | rev: "v0.6.3"
14 | hooks:
15 | - id: ruff
16 | args: [--exit-non-zero-on-fix]
17 | - id: ruff-format
18 |
19 | - repo: local
20 | hooks:
21 | - id: update-endpoints-doc
22 | name: Update THIRD_PARTY_ENDPOINTS.md
23 | entry: uv run python scripts/generate_endpoints_doc.py
24 | language: system
25 | pass_filenames: false
26 | files: 'src/biomcp/utils/endpoint_registry\.py$'
27 | - id: check-http-imports
28 | name: Check for direct HTTP library imports
29 | entry: uv run python scripts/check_http_imports.py
30 | language: system
31 | pass_filenames: false
32 | always_run: true
33 | files: '\.py$'
34 | - id: check-docs-in-mkdocs
35 | name: Check documentation files are in mkdocs.yml
36 | entry: uv run python scripts/check_docs_in_mkdocs.py
37 | language: system
38 | pass_filenames: false
39 | files: '^docs/.*\.md$|^mkdocs\.yml$'
40 |
41 | - repo: https://github.com/pre-commit/mirrors-prettier
42 | rev: "v3.0.3"
43 | hooks:
44 | - id: prettier
45 |
```
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
1 | docs/source
2 |
3 | # From https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore
4 |
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 | cover/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 | db.sqlite3-journal
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | .pybuilder/
80 | target/
81 |
82 | # Jupyter Notebook
83 | .ipynb_checkpoints
84 |
85 | # IPython
86 | profile_default/
87 | ipython_config.py
88 |
89 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
90 | __pypackages__/
91 |
92 | # Celery stuff
93 | celerybeat-schedule
94 | celerybeat.pid
95 |
96 | # SageMath parsed files
97 | *.sage.py
98 |
99 | # Environments
100 | .env
101 | .venv
102 | env/
103 | venv/
104 | ENV/
105 | env.bak/
106 | venv.bak/
107 |
108 | # Spyder project settings
109 | .spyderproject
110 | .spyproject
111 |
112 | # Rope project settings
113 | .ropeproject
114 |
115 | # mkdocs documentation
116 | /site
117 |
118 | # ruff
119 | .ruff_cache
120 |
121 | # mypy
122 | .mypy_cache/
123 | .dmypy.json
124 | dmypy.json
125 |
126 | # Pyre type checker
127 | .pyre/
128 |
129 | # pytype static type analyzer
130 | .pytype/
131 |
132 | # Cython debug symbols
133 | cython_debug/
134 |
135 | # Vscode config files
136 | .vscode/
137 |
138 | # PyCharm
139 | .idea/
140 |
141 | # LLMs Notes
142 | llms/
143 | vault/
144 |
145 | .DS_Store
146 | /node_modules/
147 |
148 | CLAUDE.md
149 | lzyank.toml
150 | experiment/
151 | alphagenome
152 | spike/
153 |
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
1 | # BioMCP: Biomedical Model Context Protocol
2 |
3 | BioMCP is an open source (MIT License) toolkit that empowers AI assistants and
4 | agents with specialized biomedical knowledge. Built following the Model Context
5 | Protocol (MCP), it connects AI systems to authoritative biomedical data
6 | sources, enabling them to answer questions about clinical trials, scientific
7 | literature, and genomic variants with precision and depth.
8 |
9 | [](https://www.youtube.com/watch?v=bKxOWrWUUhM)
10 |
11 | ## MCPHub Certification
12 |
13 | BioMCP is certified by [MCPHub](https://mcphub.com/mcp-servers/genomoncology/biomcp). This certification ensures that BioMCP follows best practices for Model Context Protocol implementation and provides reliable biomedical data access.
14 |
15 | ## Why BioMCP?
16 |
17 | While Large Language Models have broad general knowledge, they often lack
18 | specialized domain-specific information or access to up-to-date resources.
19 | BioMCP bridges this gap for biomedicine by:
20 |
21 | - Providing **structured access** to clinical trials, biomedical literature,
22 | and genomic variants
23 | - Enabling **natural language queries** to specialized databases without
24 | requiring knowledge of their specific syntax
25 | - Supporting **biomedical research** workflows through a consistent interface
26 | - Functioning as an **MCP server** for AI assistants and agents
27 |
28 | ## Biomedical Data Sources
29 |
30 | BioMCP integrates with multiple biomedical data sources:
31 |
32 | ### Literature Sources
33 |
34 | - **PubTator3/PubMed** - Peer-reviewed biomedical literature with entity annotations
35 | - **bioRxiv/medRxiv** - Preprint servers for biology and health sciences
36 | - **Europe PMC** - Open science platform including preprints
37 |
38 | ### Clinical & Genomic Sources
39 |
40 | - **ClinicalTrials.gov** - Clinical trial registry and results database
41 | - **NCI Clinical Trials Search API** - National Cancer Institute's curated cancer trials database
42 | - Advanced search filters (biomarkers, prior therapies, brain metastases)
43 | - Organization and intervention databases
44 | - Disease vocabulary with synonyms
45 | - **BioThings Suite** - Comprehensive biomedical data APIs:
46 | - **MyVariant.info** - Consolidated genetic variant annotation
47 | - **MyGene.info** - Real-time gene annotations and information
48 | - **MyDisease.info** - Disease ontology and synonym information
49 | - **MyChem.info** - Drug/chemical annotations and properties
50 | - **TCGA/GDC** - The Cancer Genome Atlas for cancer variant data
51 | - **1000 Genomes** - Population frequency data via Ensembl
52 | - **cBioPortal** - Cancer genomics portal with mutation occurrence data
53 | - **OncoKB** - Precision oncology knowledge base for clinical variant interpretation (demo server with BRAF, ROS1, TP53)
54 | - Therapeutic implications and FDA-approved treatments
55 | - Oncogenicity and mutation effect annotations
56 | - Works immediately without authentication
57 |
58 | ### Regulatory & Safety Sources
59 |
60 | - **OpenFDA** - FDA regulatory and safety data:
61 | - **Drug Adverse Events (FAERS)** - Post-market drug safety reports
62 | - **Drug Labels (SPL)** - Official prescribing information
63 | - **Device Events (MAUDE)** - Medical device adverse events, with genomic device filtering
64 |
65 | ## Available MCP Tools
66 |
67 | BioMCP provides 24 specialized tools for biomedical research:
68 |
69 | ### Core Tools (3)
70 |
71 | #### 1. Think Tool (ALWAYS USE FIRST!)
72 |
73 | **CRITICAL**: The `think` tool MUST be your first step for ANY biomedical research task.
74 |
75 | ```python
76 | # Start analysis with sequential thinking
77 | think(
78 | thought="Breaking down the query about BRAF mutations in melanoma...",
79 | thoughtNumber=1,
80 | totalThoughts=3,
81 | nextThoughtNeeded=True
82 | )
83 | ```
84 |
85 | The sequential thinking tool helps:
86 |
87 | - Break down complex biomedical problems systematically
88 | - Plan multi-step research approaches
89 | - Track reasoning progress
90 | - Ensure comprehensive analysis
91 |
92 | #### 2. Search Tool
93 |
94 | The search tool supports two modes:
95 |
96 | ##### Unified Query Language (Recommended)
97 |
98 | Use the `query` parameter with structured field syntax for powerful cross-domain searches:
99 |
100 | ```python
101 | # Simple natural language
102 | search(query="BRAF melanoma")
103 |
104 | # Field-specific search
105 | search(query="gene:BRAF AND trials.condition:melanoma")
106 |
107 | # Complex queries
108 | search(query="gene:BRAF AND variants.significance:pathogenic AND articles.date:>2023")
109 |
110 | # Get searchable fields schema
111 | search(get_schema=True)
112 |
113 | # Explain how a query is parsed
114 | search(query="gene:BRAF", explain_query=True)
115 | ```
116 |
117 | **Supported Fields:**
118 |
119 | - **Cross-domain**: `gene:`, `variant:`, `disease:`
120 | - **Trials**: `trials.condition:`, `trials.phase:`, `trials.status:`, `trials.intervention:`
121 | - **Articles**: `articles.author:`, `articles.journal:`, `articles.date:`
122 | - **Variants**: `variants.significance:`, `variants.rsid:`, `variants.frequency:`
123 |
124 | ##### Domain-Based Search
125 |
126 | Use the `domain` parameter with specific filters:
127 |
128 | ```python
129 | # Search articles (includes automatic cBioPortal integration)
130 | search(domain="article", genes=["BRAF"], diseases=["melanoma"])
131 |
132 | # Search with mutation-specific cBioPortal data
133 | search(domain="article", genes=["BRAF"], keywords=["V600E"])
134 | search(domain="article", genes=["SRSF2"], keywords=["F57*"]) # Wildcard patterns
135 |
136 | # Search trials
137 | search(domain="trial", conditions=["lung cancer"], phase="3")
138 |
139 | # Search variants
140 | search(domain="variant", gene="TP53", significance="pathogenic")
141 | ```
142 |
143 | **Note**: When searching articles with a gene parameter, cBioPortal data is automatically included:
144 |
145 | - Gene-level summaries show mutation frequency across cancer studies
146 | - Mutation-specific searches (e.g., "V600E") show study-level occurrence data
147 | - Cancer types are dynamically resolved from cBioPortal API
148 |
149 | #### 3. Fetch Tool
150 |
151 | Retrieve full details for a single article, trial, or variant:
152 |
153 | ```python
154 | # Fetch article details (supports both PMID and DOI)
155 | fetch(domain="article", id="34567890") # PMID
156 | fetch(domain="article", id="10.1101/2024.01.20.23288905") # DOI
157 |
158 | # Fetch trial with all sections
159 | fetch(domain="trial", id="NCT04280705", detail="all")
160 |
161 | # Fetch variant details
162 | fetch(domain="variant", id="rs113488022")
163 | ```
164 |
165 | **Domain-specific options:**
166 |
167 | - **Articles**: `detail="full"` retrieves full text if available
168 | - **Trials**: `detail` can be "protocol", "locations", "outcomes", "references", or "all"
169 | - **Variants**: Always returns full details
170 |
171 | ### Individual Tools (21)
172 |
173 | For users who prefer direct access to specific functionality, BioMCP also provides 21 individual tools:
174 |
175 | #### Article Tools (2)
176 |
177 | - **article_searcher**: Search PubMed/PubTator3 and preprints
178 | - **article_getter**: Fetch detailed article information (supports PMID and DOI)
179 |
180 | #### Trial Tools (5)
181 |
182 | - **trial_searcher**: Search ClinicalTrials.gov or NCI CTS API (via source parameter)
183 | - **trial_getter**: Fetch all trial details from either source
184 | - **trial_protocol_getter**: Fetch protocol information only (ClinicalTrials.gov)
185 | - **trial_references_getter**: Fetch trial publications (ClinicalTrials.gov)
186 | - **trial_outcomes_getter**: Fetch outcome measures and results (ClinicalTrials.gov)
187 | - **trial_locations_getter**: Fetch site locations and contacts (ClinicalTrials.gov)
188 |
189 | #### Variant Tools (2)
190 |
191 | - **variant_searcher**: Search MyVariant.info database
192 | - **variant_getter**: Fetch comprehensive variant details
193 |
194 | #### NCI-Specific Tools (6)
195 |
196 | - **nci_organization_searcher**: Search NCI's organization database
197 | - **nci_organization_getter**: Get organization details by ID
198 | - **nci_intervention_searcher**: Search NCI's intervention database (drugs, devices, procedures)
199 | - **nci_intervention_getter**: Get intervention details by ID
200 | - **nci_biomarker_searcher**: Search biomarkers used in trial eligibility criteria
201 | - **nci_disease_searcher**: Search NCI's controlled vocabulary of cancer conditions
202 |
203 | #### Gene, Disease & Drug Tools (3)
204 |
205 | - **gene_getter**: Get real-time gene information from MyGene.info
206 | - **disease_getter**: Get disease definitions and synonyms from MyDisease.info
207 | - **drug_getter**: Get drug/chemical information from MyChem.info
208 |
209 | **Note**: All individual tools that search by gene automatically include cBioPortal summaries when the `include_cbioportal` parameter is True (default). Trial searches can expand disease conditions with synonyms when `expand_synonyms` is True (default).
210 |
211 | ## Quick Start
212 |
213 | ### For Claude Desktop Users
214 |
215 | 1. **Install `uv`** if you don't have it (recommended):
216 |
217 | ```bash
218 | # MacOS
219 | brew install uv
220 |
221 | # Windows/Linux
222 | pip install uv
223 | ```
224 |
225 | 2. **Configure Claude Desktop**:
226 | - Open Claude Desktop settings
227 | - Navigate to Developer section
228 | - Click "Edit Config" and add:
229 | ```json
230 | {
231 | "mcpServers": {
232 | "biomcp": {
233 | "command": "uv",
234 | "args": ["run", "--with", "biomcp-python", "biomcp", "run"]
235 | }
236 | }
237 | }
238 | ```
239 | - Restart Claude Desktop and start chatting about biomedical topics!
240 |
241 | ### Python Package Installation
242 |
243 | ```bash
244 | # Using pip
245 | pip install biomcp-python
246 |
247 | # Using uv (recommended for faster installation)
248 | uv pip install biomcp-python
249 |
250 | # Run directly without installation
251 | uv run --with biomcp-python biomcp trial search --condition "lung cancer"
252 | ```
253 |
254 | ## Configuration
255 |
256 | ### Environment Variables
257 |
258 | BioMCP supports optional environment variables for enhanced functionality:
259 |
260 | ```bash
261 | # cBioPortal API authentication (optional)
262 | export CBIO_TOKEN="your-api-token" # For authenticated access
263 | export CBIO_BASE_URL="https://www.cbioportal.org/api" # Custom API endpoint
264 |
265 | # OncoKB demo server (optional - advanced users only)
266 | # By default: Uses free demo server with BRAF, ROS1, TP53 (no setup required)
267 | # For full gene access: Set ONCOKB_TOKEN from your OncoKB license
268 | # export ONCOKB_TOKEN="your-oncokb-token" # www.oncokb.org/account/settings
269 |
270 | # Performance tuning
271 | export BIOMCP_USE_CONNECTION_POOL="true" # Enable HTTP connection pooling (default: true)
272 | export BIOMCP_METRICS_ENABLED="false" # Enable performance metrics (default: false)
273 | ```
274 |
275 | ## Running BioMCP Server
276 |
277 | BioMCP supports multiple transport protocols to suit different deployment scenarios:
278 |
279 | ### Local Development (STDIO)
280 |
281 | For direct integration with Claude Desktop or local MCP clients:
282 |
283 | ```bash
284 | # Default STDIO mode for local development
285 | biomcp run
286 |
287 | # Or explicitly specify STDIO
288 | biomcp run --mode stdio
289 | ```
290 |
291 | ### HTTP Server Mode
292 |
293 | BioMCP supports multiple HTTP transport protocols:
294 |
295 | #### Legacy SSE Transport (Worker Mode)
296 |
297 | For backward compatibility with existing SSE clients:
298 |
299 | ```bash
300 | biomcp run --mode worker
301 | # Server available at http://localhost:8000/sse
302 | ```
303 |
304 | #### Streamable HTTP Transport (Recommended)
305 |
306 | The new MCP-compliant Streamable HTTP transport provides optimal performance and standards compliance:
307 |
308 | ```bash
309 | biomcp run --mode streamable_http
310 |
311 | # Custom host and port
312 | biomcp run --mode streamable_http --host 127.0.0.1 --port 8080
313 | ```
314 |
315 | Features of Streamable HTTP transport:
316 |
317 | - Single `/mcp` endpoint for all operations
318 | - Dynamic response mode (JSON for quick operations, SSE for long-running)
319 | - Session management support (future)
320 | - Full MCP specification compliance (2025-03-26)
321 | - Better scalability for cloud deployments
322 |
323 | ### Deployment Options
324 |
325 | #### Docker
326 |
327 | ```bash
328 | # Build the Docker image locally
329 | docker build -t biomcp:latest .
330 |
331 | # Run the container
332 | docker run -p 8000:8000 biomcp:latest biomcp run --mode streamable_http
333 | ```
334 |
335 | #### Cloudflare Workers
336 |
337 | The worker mode can be deployed to Cloudflare Workers for global edge deployment.
338 |
339 | Note: All APIs work without authentication, but tokens may provide higher rate limits.
340 |
341 | ## Command Line Interface
342 |
343 | BioMCP provides a comprehensive CLI for direct database interaction:
344 |
345 | ```bash
346 | # Get help
347 | biomcp --help
348 |
349 | # Run the MCP server
350 | biomcp run
351 |
352 | # Article search examples
353 | biomcp article search --gene BRAF --disease Melanoma # Includes preprints by default
354 | biomcp article search --gene BRAF --no-preprints # Exclude preprints
355 | biomcp article get 21717063 --full
356 |
357 | # Clinical trial examples
358 | biomcp trial search --condition "Lung Cancer" --phase PHASE3
359 | biomcp trial search --condition melanoma --source nci --api-key YOUR_KEY # Use NCI API
360 | biomcp trial get NCT04280705 Protocol
361 | biomcp trial get NCT04280705 --source nci --api-key YOUR_KEY # Get from NCI
362 |
363 | # Variant examples with external annotations
364 | biomcp variant search --gene TP53 --significance pathogenic
365 | biomcp variant get rs113488022 # Includes TCGA, 1000 Genomes, and cBioPortal data by default
366 | biomcp variant get rs113488022 --no-external # Core annotations only
367 |
368 | # OncoKB integration (uses free demo server automatically)
369 | biomcp variant search --gene BRAF --include-oncokb # Works with BRAF, ROS1, TP53
370 |
371 | # NCI-specific examples (requires NCI API key)
372 | biomcp organization search "MD Anderson" --api-key YOUR_KEY
373 | biomcp organization get ORG123456 --api-key YOUR_KEY
374 | biomcp intervention search pembrolizumab --api-key YOUR_KEY
375 | biomcp intervention search --type Device --api-key YOUR_KEY
376 | biomcp biomarker search "PD-L1" --api-key YOUR_KEY
377 | biomcp disease search melanoma --source nci --api-key YOUR_KEY
378 | ```
379 |
380 | ## Testing & Verification
381 |
382 | Test your BioMCP setup with the MCP Inspector:
383 |
384 | ```bash
385 | npx @modelcontextprotocol/inspector uv run --with biomcp-python biomcp run
386 | ```
387 |
388 | This opens a web interface where you can explore and test all available tools.
389 |
390 | ## Enterprise Version: OncoMCP
391 |
392 | OncoMCP extends BioMCP with GenomOncology's enterprise-grade precision oncology
393 | platform (POP), providing:
394 |
395 | - **HIPAA-Compliant Deployment**: Secure on-premise options
396 | - **Real-Time Trial Matching**: Up-to-date status and arm-level matching
397 | - **Healthcare Integration**: Seamless EHR and data warehouse connectivity
398 | - **Curated Knowledge Base**: 15,000+ trials and FDA approvals
399 | - **Sophisticated Patient Matching**: Using integrated clinical and molecular
400 | profiles
401 | - **Advanced NLP**: Structured extraction from unstructured text
402 | - **Comprehensive Biomarker Processing**: Mutation and rule processing
403 |
404 | Learn more: [GenomOncology](https://genomoncology.com/)
405 |
406 | ## MCP Registries
407 |
408 | [](https://smithery.ai/server/@genomoncology/biomcp)
409 |
410 | <a href="https://glama.ai/mcp/servers/@genomoncology/biomcp">
411 | <img width="380" height="200" src="https://glama.ai/mcp/servers/@genomoncology/biomcp/badge" />
412 | </a>
413 |
414 | ## Example Use Cases
415 |
416 | ### Gene Information Retrieval
417 |
418 | ```python
419 | # Get comprehensive gene information
420 | gene_getter(gene_id_or_symbol="TP53")
421 | # Returns: Official name, summary, aliases, links to databases
422 | ```
423 |
424 | ### Disease Synonym Expansion
425 |
426 | ```python
427 | # Get disease information with synonyms
428 | disease_getter(disease_id_or_name="GIST")
429 | # Returns: "gastrointestinal stromal tumor" and other synonyms
430 |
431 | # Search trials with automatic synonym expansion
432 | trial_searcher(conditions=["GIST"], expand_synonyms=True)
433 | # Searches for: GIST OR "gastrointestinal stromal tumor" OR "GI stromal tumor"
434 | ```
435 |
436 | ### Integrated Biomedical Research
437 |
438 | ```python
439 | # 1. Always start with thinking
440 | think(thought="Analyzing BRAF V600E in melanoma treatment", thoughtNumber=1)
441 |
442 | # 2. Get gene context
443 | gene_getter("BRAF")
444 |
445 | # 3. Search for pathogenic variants with OncoKB clinical interpretation (uses free demo server)
446 | variant_searcher(gene="BRAF", hgvsp="V600E", significance="pathogenic", include_oncokb=True)
447 |
448 | # 4. Find relevant clinical trials with disease expansion
449 | trial_searcher(conditions=["melanoma"], interventions=["BRAF inhibitor"])
450 | ```
451 |
452 | ## Documentation
453 |
454 | For comprehensive documentation, visit [https://biomcp.org](https://biomcp.org)
455 |
456 | ### Developer Guides
457 |
458 | - [HTTP Client Guide](./docs/http-client-guide.md) - Using the centralized HTTP client
459 | - [Migration Examples](./docs/migration-examples.md) - Migrating from direct HTTP usage
460 | - [Error Handling Guide](./docs/error-handling.md) - Comprehensive error handling patterns
461 | - [Integration Testing Guide](./docs/integration-testing.md) - Best practices for reliable integration tests
462 | - [Third-Party Endpoints](./THIRD_PARTY_ENDPOINTS.md) - Complete list of external APIs used
463 | - [Testing Guide](./docs/development/testing.md) - Running tests and understanding test categories
464 |
465 | ## Development
466 |
467 | ### Running Tests
468 |
469 | ```bash
470 | # Run all tests (including integration tests)
471 | make test
472 |
473 | # Run only unit tests (excluding integration tests)
474 | uv run python -m pytest tests -m "not integration"
475 |
476 | # Run only integration tests
477 | uv run python -m pytest tests -m "integration"
478 | ```
479 |
480 | **Note**: Integration tests make real API calls and may fail due to network issues or rate limiting.
481 | In CI/CD, integration tests are run separately and allowed to fail without blocking the build.
482 |
483 | ## BioMCP Examples Repo
484 |
485 | Looking to see BioMCP in action?
486 |
487 | Check out the companion repository:
488 | 👉 **[biomcp-examples](https://github.com/genomoncology/biomcp-examples)**
489 |
490 | It contains real prompts, AI-generated research briefs, and evaluation runs across different models.
491 | Use it to explore capabilities, compare outputs, or benchmark your own setup.
492 |
493 | Have a cool example of your own?
494 | **We’d love for you to contribute!** Just fork the repo and submit a PR with your experiment.
495 |
496 | ## License
497 |
498 | This project is licensed under the MIT License.
499 |
```
--------------------------------------------------------------------------------
/tests/tdd/drugs/__init__.py:
--------------------------------------------------------------------------------
```python
1 | """Tests for drug information tools."""
2 |
```
--------------------------------------------------------------------------------
/tests/tdd/openfda/__init__.py:
--------------------------------------------------------------------------------
```python
1 | """Test package for OpenFDA integration."""
2 |
```
--------------------------------------------------------------------------------
/tests/tdd/thinking/__init__.py:
--------------------------------------------------------------------------------
```python
1 | # Test module for sequential thinking functionality
2 |
```
--------------------------------------------------------------------------------
/src/biomcp/thinking/__init__.py:
--------------------------------------------------------------------------------
```python
1 | from . import sequential
2 |
3 | __all__ = [
4 | "sequential",
5 | ]
6 |
```
--------------------------------------------------------------------------------
/src/biomcp/resources/__init__.py:
--------------------------------------------------------------------------------
```python
1 | from .getter import get_instructions
2 |
3 | __all__ = [
4 | "get_instructions",
5 | ]
6 |
```
--------------------------------------------------------------------------------
/src/biomcp/cli/__init__.py:
--------------------------------------------------------------------------------
```python
1 | """BioMCP Command Line Interface."""
2 |
3 | from .main import app
4 |
5 | __all__ = ["app"]
6 |
```
--------------------------------------------------------------------------------
/src/biomcp/genes/__init__.py:
--------------------------------------------------------------------------------
```python
1 | """Gene information tools for BioMCP."""
2 |
3 | from .getter import get_gene
4 |
5 | __all__ = ["get_gene"]
6 |
```
--------------------------------------------------------------------------------
/glama.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "$schema": "https://glama.ai/mcp/schemas/server.json",
3 | "maintainers": ["imaurer", "jyeakley"]
4 | }
5 |
```
--------------------------------------------------------------------------------
/src/biomcp/drugs/__init__.py:
--------------------------------------------------------------------------------
```python
1 | """Drug information tools using MyChem.info."""
2 |
3 | from .getter import get_drug
4 |
5 | __all__ = ["get_drug"]
6 |
```
--------------------------------------------------------------------------------
/src/biomcp/workers/__init__.py:
--------------------------------------------------------------------------------
```python
1 | """Cloudflare Workers module for BioMCP."""
2 |
3 | from .worker import create_worker_app
4 |
5 | __all__ = ["create_worker_app"]
6 |
```
--------------------------------------------------------------------------------
/src/biomcp/variants/__init__.py:
--------------------------------------------------------------------------------
```python
1 | from . import search
2 | from . import getter
3 | from . import external
4 |
5 | __all__ = [
6 | "external",
7 | "getter",
8 | "search",
9 | ]
10 |
```
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "devDependencies": {
3 | "wrangler": "^4.13.2"
4 | },
5 | "dependencies": {
6 | "hono": "^4.7.8",
7 | "jose": "^6.0.11"
8 | }
9 | }
10 |
```
--------------------------------------------------------------------------------
/codecov.yaml:
--------------------------------------------------------------------------------
```yaml
1 | coverage:
2 | range: 90..100
3 | round: down
4 | precision: 1
5 | status:
6 | project:
7 | default:
8 | target: 95%
9 | threshold: 0.5%
10 |
```
--------------------------------------------------------------------------------
/src/biomcp/utils/__init__.py:
--------------------------------------------------------------------------------
```python
1 | """Utility modules for BioMCP."""
2 |
3 | from .query_utils import parse_or_query, contains_or_operator
4 |
5 | __all__ = ["contains_or_operator", "parse_or_query"]
6 |
```
--------------------------------------------------------------------------------
/src/biomcp/integrations/__init__.py:
--------------------------------------------------------------------------------
```python
1 | """BioThings API integrations for BioMCP."""
2 |
3 | from .biothings_client import BioThingsClient, DiseaseInfo, GeneInfo
4 |
5 | __all__ = ["BioThingsClient", "DiseaseInfo", "GeneInfo"]
6 |
```
--------------------------------------------------------------------------------
/src/biomcp/variants/constants.py:
--------------------------------------------------------------------------------
```python
1 | """Constants for variant modules."""
2 |
3 | import os
4 |
5 | # cBioPortal API endpoints
6 | CBIO_BASE_URL = os.getenv("CBIO_BASE_URL", "https://www.cbioportal.org/api")
7 | CBIO_TOKEN = os.getenv("CBIO_TOKEN")
8 |
```
--------------------------------------------------------------------------------
/src/biomcp/articles/__init__.py:
--------------------------------------------------------------------------------
```python
1 | from . import autocomplete
2 | from . import fetch
3 | from . import search
4 | from . import preprints
5 | from . import unified
6 |
7 |
8 | __all__ = [
9 | "autocomplete",
10 | "fetch",
11 | "preprints",
12 | "search",
13 | "unified",
14 | ]
15 |
```
--------------------------------------------------------------------------------
/lzyank.toml:
--------------------------------------------------------------------------------
```toml
1 | [default]
2 | exclude = [
3 | "uv.lock",
4 | "lzyank.toml",
5 | ".github",
6 | "*.ini",
7 | ".pre-commit-config.yaml",
8 | "LICENSE",
9 | "codecov.yaml",
10 | "mkdocs.yml",
11 | "tests/data"
12 | ]
13 |
14 | [actions]
15 | include = [".github/"]
16 |
```
--------------------------------------------------------------------------------
/src/biomcp/trials/__init__.py:
--------------------------------------------------------------------------------
```python
1 | from . import getter
2 | from . import nci_getter
3 | from . import nci_search
4 | from . import search
5 | from .search import LineOfTherapy
6 |
7 | __all__ = [
8 | "LineOfTherapy",
9 | "getter",
10 | "nci_getter",
11 | "nci_search",
12 | "search",
13 | ]
14 |
```
--------------------------------------------------------------------------------
/src/biomcp/diseases/__init__.py:
--------------------------------------------------------------------------------
```python
1 | """Disease information tools for BioMCP."""
2 |
3 | from .getter import get_disease
4 | from .search import search_diseases, get_disease_by_id, search_diseases_with_or
5 |
6 | __all__ = [
7 | "get_disease",
8 | "get_disease_by_id",
9 | "search_diseases",
10 | "search_diseases_with_or",
11 | ]
12 |
```
--------------------------------------------------------------------------------
/src/biomcp/interventions/__init__.py:
--------------------------------------------------------------------------------
```python
1 | """Interventions module for NCI Clinical Trials API integration."""
2 |
3 | from .getter import get_intervention
4 | from .search import search_interventions, search_interventions_with_or
5 |
6 | __all__ = [
7 | "get_intervention",
8 | "search_interventions",
9 | "search_interventions_with_or",
10 | ]
11 |
```
--------------------------------------------------------------------------------
/src/biomcp/organizations/__init__.py:
--------------------------------------------------------------------------------
```python
1 | """Organizations module for NCI Clinical Trials API integration."""
2 |
3 | from .getter import get_organization
4 | from .search import search_organizations, search_organizations_with_or
5 |
6 | __all__ = [
7 | "get_organization",
8 | "search_organizations",
9 | "search_organizations_with_or",
10 | ]
11 |
```
--------------------------------------------------------------------------------
/docs/robots.txt:
--------------------------------------------------------------------------------
```
1 | # Robots.txt for BioMCP Documentation
2 | # https://biomcp.org/
3 |
4 | User-agent: *
5 | Allow: /
6 |
7 | # Sitemap location
8 | Sitemap: https://biomcp.org/sitemap.xml
9 |
10 | # Rate limiting for crawlers
11 | Crawl-delay: 1
12 |
13 | # Block access to build artifacts
14 | Disallow: /site/
15 | Disallow: /.git/
16 | Disallow: /node_modules/
17 |
```
--------------------------------------------------------------------------------
/tests/data/pubtator/pubtator_autocomplete.json:
--------------------------------------------------------------------------------
```json
1 | [
2 | {
3 | "_id": "@GENE_BRAF",
4 | "biotype": "gene",
5 | "name": "BRAF",
6 | "description": "All Species",
7 | "match": "Matched on name <m>BRAF</m>"
8 | },
9 | {
10 | "_id": "@GENE_BRAFP1",
11 | "biotype": "gene",
12 | "name": "BRAFP1",
13 | "description": "All Species",
14 | "match": "Matched on name <m>BRAFP1</m>"
15 | }
16 | ]
17 |
```
--------------------------------------------------------------------------------
/src/biomcp/biomarkers/__init__.py:
--------------------------------------------------------------------------------
```python
1 | """Biomarkers module for NCI Clinical Trials API integration.
2 |
3 | Note: CTRP documentation indicates biomarker data may have limited public availability.
4 | This module focuses on trial eligibility biomarkers.
5 | """
6 |
7 | from .search import search_biomarkers, search_biomarkers_with_or
8 |
9 | __all__ = ["search_biomarkers", "search_biomarkers_with_or"]
10 |
```
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
```
1 | [tox]
2 | skipsdist = true
3 | envlist = py311, py312, py313
4 |
5 | [gh-actions]
6 | python =
7 | 3.11: py311
8 | 3.12: py312
9 | 3.13: py313
10 |
11 | [testenv]
12 | passenv = PYTHON_VERSION
13 | allowlist_externals = uv
14 | commands =
15 | uv sync --python {envpython}
16 | uv run python -m pytest --doctest-modules tests --cov --cov-config=pyproject.toml --cov-report=xml
17 | mypy
18 |
```
--------------------------------------------------------------------------------
/src/biomcp/__main__.py:
--------------------------------------------------------------------------------
```python
1 | import sys
2 |
3 | from dotenv import load_dotenv
4 |
5 | from .cli import app
6 |
7 | # Load environment variables from .env file
8 | load_dotenv()
9 |
10 |
11 | def main():
12 | try:
13 | app(standalone_mode=True)
14 | except SystemExit as e:
15 | sys.exit(e.code)
16 |
17 |
18 | if __name__ == "__main__":
19 | main()
20 |
21 | # Make main() the callable when importing __main__
22 | __call__ = main
23 |
```
--------------------------------------------------------------------------------
/.github/workflows/validate-codecov-config.yml:
--------------------------------------------------------------------------------
```yaml
1 | name: validate-codecov-config
2 |
3 | on:
4 | pull_request:
5 | paths: [codecov.yaml]
6 | push:
7 | branches: [main]
8 |
9 | jobs:
10 | validate-codecov-config:
11 | runs-on: ubuntu-22.04
12 | steps:
13 | - uses: actions/checkout@v6
14 | - name: Validate codecov configuration
15 | run: curl -sSL --fail-with-body --data-binary @codecov.yaml https://codecov.io/validate
16 |
```
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
```yaml
1 | services:
2 | biomcp-server:
3 | platform: linux/amd64
4 | build: .
5 | image: us.gcr.io/graceful-medley-134315/biomcp-server:${TAG}
6 | container_name: biomcp-server
7 | ports:
8 | - "8000:8000"
9 | environment:
10 | - MCP_MODE=streamable_http # Can be 'stdio', 'worker', 'http', or 'streamable_http'
11 | - ALPHAGENOME_API_KEY=${ALPHAGENOME_API_KEY:-}
12 | restart: unless-stopped
13 |
```
--------------------------------------------------------------------------------
/tests/tdd/variants/constants.py:
--------------------------------------------------------------------------------
```python
1 | """Constants for variant tests."""
2 |
3 | # API retry settings
4 | API_RETRY_DELAY_SECONDS = 1.0
5 | MAX_RETRY_ATTEMPTS = 2
6 |
7 | # Test data settings
8 | DEFAULT_MAX_STUDIES = 10 # Number of studies to query in integration tests
9 | STRUCTURE_CHECK_LIMIT = (
10 | 3 # Number of items to check when verifying data structures
11 | )
12 |
13 | # Timeout settings
14 | INTEGRATION_TEST_TIMEOUT = 30.0 # Maximum time for integration tests
15 |
```
--------------------------------------------------------------------------------
/src/biomcp/resources/getter.py:
--------------------------------------------------------------------------------
```python
1 | from pathlib import Path
2 |
3 | from .. import mcp_app
4 |
5 | RESOURCES_ROOT = Path(__file__).parent
6 |
7 |
8 | @mcp_app.resource("biomcp://instructions.md")
9 | def get_instructions() -> str:
10 | return (RESOURCES_ROOT / "instructions.md").read_text(encoding="utf-8")
11 |
12 |
13 | @mcp_app.resource("biomcp://researcher.md")
14 | def get_researcher() -> str:
15 | return (RESOURCES_ROOT / "researcher.md").read_text(encoding="utf-8")
16 |
```
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
```yaml
1 | version: 2
2 | updates:
3 | # Maintain dependencies for GitHub Actions
4 | - package-ecosystem: "github-actions"
5 | directory: "/"
6 | schedule:
7 | interval: "weekly"
8 | commit-message:
9 | prefix: "chore"
10 | include: "scope"
11 |
12 | # Maintain dependencies for Python
13 | - package-ecosystem: "pip"
14 | directory: "/"
15 | schedule:
16 | interval: "weekly"
17 | open-pull-requests-limit: 10
18 | commit-message:
19 | prefix: "chore"
20 | include: "scope"
21 | allow:
22 | - dependency-type: "all"
23 |
```
--------------------------------------------------------------------------------
/tests/tdd/conftest.py:
--------------------------------------------------------------------------------
```python
1 | from pathlib import Path
2 |
3 | from pytest import fixture
4 |
5 | from biomcp import http_client
6 |
7 |
8 | @fixture
9 | def anyio_backend():
10 | return "asyncio"
11 |
12 |
13 | class DummyCache:
14 | def __init__(self):
15 | self.store = {}
16 |
17 | def set(self, key, value, expire=None):
18 | self.store[key] = value
19 |
20 | def get(self, key, default=None):
21 | return self.store.get(key, default)
22 |
23 | @property
24 | def count(self):
25 | return len(self.store)
26 |
27 | def close(self):
28 | self.store.clear()
29 |
30 |
31 | @fixture
32 | def http_cache():
33 | cache = DummyCache()
34 | http_client._cache = cache
35 | yield cache
36 | cache.close()
37 |
38 |
39 | @fixture
40 | def data_dir():
41 | return Path(__file__).parent.parent / "data"
42 |
```
--------------------------------------------------------------------------------
/.github/actions/setup-python-env/action.yml:
--------------------------------------------------------------------------------
```yaml
1 | name: "Setup Python Environment"
2 | description: "Set up Python environment for the given Python version"
3 |
4 | inputs:
5 | python-version:
6 | description: "Python version to use"
7 | required: true
8 | default: "3.12"
9 | uv-version:
10 | description: "uv version to use"
11 | required: true
12 | default: "0.5.20"
13 |
14 | runs:
15 | using: "composite"
16 | steps:
17 | - uses: actions/setup-python@v5
18 | with:
19 | python-version: ${{ inputs.python-version }}
20 |
21 | - name: Install uv
22 | uses: astral-sh/setup-uv@v2
23 | with:
24 | version: ${{ inputs.uv-version }}
25 | enable-cache: "true"
26 | cache-suffix: ${{ matrix.python-version }}
27 |
28 | - name: Install Python dependencies
29 | run: uv sync --frozen
30 | shell: bash
31 |
```
--------------------------------------------------------------------------------
/src/biomcp/__init__.py:
--------------------------------------------------------------------------------
```python
1 | from .core import ensure_list, logger, mcp_app, StrEnum
2 |
3 | from . import constants
4 | from . import http_client
5 | from . import render
6 | from . import articles
7 | from . import trials
8 | from . import variants
9 | from . import resources
10 | from . import thinking
11 | from . import query_parser
12 | from . import query_router
13 | from . import router
14 | from . import thinking_tool
15 | from . import individual_tools
16 | from . import cbioportal_helper
17 |
18 |
19 | __all__ = [
20 | "StrEnum",
21 | "articles",
22 | "cbioportal_helper",
23 | "constants",
24 | "ensure_list",
25 | "http_client",
26 | "individual_tools",
27 | "logger",
28 | "mcp_app",
29 | "query_parser",
30 | "query_router",
31 | "render",
32 | "resources",
33 | "router",
34 | "thinking",
35 | "thinking_tool",
36 | "trials",
37 | "variants",
38 | ]
39 |
```
--------------------------------------------------------------------------------
/docs/developer-guides/generate_endpoints.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 | """
3 | Generate third-party endpoints documentation from the endpoint registry.
4 |
5 | This script reads the endpoint registry and generates a markdown file
6 | documenting all third-party API endpoints used by BioMCP.
7 | """
8 |
9 | import sys
10 | from pathlib import Path
11 |
12 | # Add src to Python path
13 | sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
14 |
15 | from biomcp.utils.endpoint_registry import EndpointRegistry
16 |
17 |
18 | def main():
19 | """Generate endpoints documentation."""
20 | # Initialize registry
21 | registry = EndpointRegistry()
22 |
23 | # Generate markdown report
24 | markdown_content = registry.generate_markdown_report()
25 |
26 | # Write to file
27 | output_path = Path(__file__).parent / "03-third-party-endpoints.md"
28 | output_path.write_text(markdown_content)
29 |
30 | print(f"Generated endpoints documentation: {output_path}")
31 |
32 |
33 | if __name__ == "__main__":
34 | main()
35 |
```
--------------------------------------------------------------------------------
/tests/tdd/articles/test_fetch.py:
--------------------------------------------------------------------------------
```python
1 | import json
2 |
3 | from biomcp.articles.fetch import fetch_articles
4 |
5 | pmids = [39293516, 34397683, 37296959]
6 |
7 |
8 | async def test_fetch_full_text(anyio_backend):
9 | results = await fetch_articles(pmids, full=True, output_json=True)
10 | assert isinstance(results, str)
11 | data = json.loads(results)
12 | assert len(data) == 3
13 | for item in data:
14 | assert item["pmid"] in pmids
15 | assert len(item["title"]) > 10
16 | assert len(item["abstract"]) > 100
17 | assert item["full_text"] is not None
18 |
19 |
20 | async def test_fetch_abstracts(anyio_backend):
21 | results = await fetch_articles(pmids, full=False, output_json=True)
22 | assert isinstance(results, str)
23 | data = json.loads(results)
24 | assert len(data) == 3
25 | for item in data:
26 | assert item["pmid"] in pmids
27 | assert len(item["title"]) > 10
28 | assert len(item["abstract"]) > 100
29 | assert "full_text" not in item
30 |
```
--------------------------------------------------------------------------------
/src/biomcp/openfda/__init__.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | OpenFDA integration for BioMCP.
3 |
4 | Provides access to FDA drug labels, adverse events, device data,
5 | drug approvals, recalls, and shortage information.
6 | """
7 |
8 | from .adverse_events import (
9 | search_adverse_events,
10 | get_adverse_event,
11 | )
12 | from .drug_labels import (
13 | search_drug_labels,
14 | get_drug_label,
15 | )
16 | from .device_events import (
17 | search_device_events,
18 | get_device_event,
19 | )
20 | from .drug_approvals import (
21 | search_drug_approvals,
22 | get_drug_approval,
23 | )
24 | from .drug_recalls import (
25 | search_drug_recalls,
26 | get_drug_recall,
27 | )
28 | from .drug_shortages import (
29 | search_drug_shortages,
30 | get_drug_shortage,
31 | )
32 |
33 | __all__ = [
34 | "get_adverse_event",
35 | "get_device_event",
36 | "get_drug_approval",
37 | "get_drug_label",
38 | "get_drug_recall",
39 | "get_drug_shortage",
40 | "search_adverse_events",
41 | "search_device_events",
42 | "search_drug_approvals",
43 | "search_drug_labels",
44 | "search_drug_recalls",
45 | "search_drug_shortages",
46 | ]
47 |
```
--------------------------------------------------------------------------------
/.github/workflows/deploy-docs.yml:
--------------------------------------------------------------------------------
```yaml
1 | name: Deploy Documentation
2 |
3 | on:
4 | # Allows you to manually trigger this workflow from the Actions tab
5 | workflow_dispatch:
6 |
7 | # Automatically trigger on pushes to main IF docs changed
8 | push:
9 | branches:
10 | - main
11 | paths:
12 | - "docs/**"
13 | - "mkdocs.yml"
14 | - ".github/workflows/deploy-docs.yml"
15 |
16 | jobs:
17 | deploy:
18 | runs-on: ubuntu-latest
19 | permissions:
20 | contents: write
21 | steps:
22 | - name: Check out code
23 | uses: actions/checkout@v6
24 | with:
25 | fetch-depth: 0
26 |
27 | - name: Set up Python environment
28 | uses: ./.github/actions/setup-python-env
29 | with:
30 | python-version: "3.11"
31 | uv-version: "0.5.20"
32 |
33 | - name: Configure Git User
34 | run: |
35 | git config user.name "github-actions[bot]"
36 | git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
37 |
38 | - name: Deploy documentation using MkDocs
39 | run: |
40 | uv run mkdocs gh-deploy --force
41 |
```
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
```dockerfile
1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
2 | FROM python:3.11-slim
3 |
4 | # set work directory
5 | WORKDIR /app
6 |
7 | # Install build dependencies and git (needed for AlphaGenome)
8 | RUN apt-get update && apt-get install -y --no-install-recommends gcc build-essential git && rm -rf /var/lib/apt/lists/*
9 |
10 | # Copy requirements (pyproject.toml, etc.)
11 | COPY pyproject.toml .
12 | COPY README.md .
13 | COPY LICENSE .
14 |
15 | # Copy source code
16 | COPY src ./src
17 | COPY tests ./tests
18 | COPY Makefile .
19 | COPY tox.ini .
20 |
21 | # Install the package with worker dependencies
22 | RUN pip install --upgrade pip && pip install .[worker]
23 |
24 | # Clone and install AlphaGenome
25 | RUN git clone https://github.com/google-deepmind/alphagenome.git /tmp/alphagenome && \
26 | pip install /tmp/alphagenome && \
27 | rm -rf /tmp/alphagenome
28 |
29 | # Expose port for remote MCP connections
30 | EXPOSE 8000
31 |
32 | # Set default mode to worker, but allow it to be overridden
33 | ENV MCP_MODE=stdio
34 |
35 | # Run the MCP server with configurable mode
36 | CMD ["sh", "-c", "biomcp run --mode ${MCP_MODE}"]
37 |
```
--------------------------------------------------------------------------------
/src/biomcp/thinking_tracker.py:
--------------------------------------------------------------------------------
```python
1 | """Track thinking tool usage within MCP sessions.
2 |
3 | This module provides a simple mechanism to track whether the think tool
4 | has been used in the current session, encouraging AI clients to follow
5 | best practices.
6 | """
7 |
8 | from contextvars import ContextVar
9 |
10 | # Track if thinking has been used in current context
11 | thinking_used: ContextVar[bool] = ContextVar("thinking_used", default=False)
12 |
13 |
14 | def mark_thinking_used() -> None:
15 | """Mark that the thinking tool has been used."""
16 | thinking_used.set(True)
17 |
18 |
19 | def has_thinking_been_used() -> bool:
20 | """Check if thinking tool has been used in current context."""
21 | return thinking_used.get()
22 |
23 |
24 | def reset_thinking_tracker() -> None:
25 | """Reset the thinking tracker (for testing)."""
26 | thinking_used.set(False)
27 |
28 |
29 | def get_thinking_reminder() -> str:
30 | """Get a reminder message if thinking hasn't been used."""
31 | if not has_thinking_been_used():
32 | return (
33 | "\n\n⚠️ **REMINDER**: You haven't used the 'think' tool yet! "
34 | "For optimal results, please use 'think' BEFORE searching to plan "
35 | "your research strategy and ensure comprehensive analysis."
36 | )
37 | return ""
38 |
```
--------------------------------------------------------------------------------
/tests/bdd/cli_help/test_help.py:
--------------------------------------------------------------------------------
```python
1 | import shlex
2 |
3 | from pytest_bdd import given, parsers, scenarios, then
4 | from typer.testing import CliRunner
5 |
6 | from biomcp.cli.main import app
7 |
8 | # Link to the feature file
9 | scenarios("help.feature")
10 |
11 | runner = CliRunner()
12 |
13 |
14 | @given(parsers.parse('I run "{command}"'), target_fixture="cli_result")
15 | def cli_result(command):
16 | """
17 | Run the given CLI command and return the result.
18 | """
19 | # Remove the initial token ("biomcp") if present
20 | args = (
21 | shlex.split(command)[1:]
22 | if command.startswith("biomcp")
23 | else shlex.split(command)
24 | )
25 | result = runner.invoke(app, args)
26 | assert result.exit_code == 0, f"CLI command failed: {result.stderr}"
27 | return result
28 |
29 |
30 | @then(parsers.parse('the output should contain "{expected}"'))
31 | def output_should_contain(cli_result, expected):
32 | """
33 | Verify that the output contains the expected text.
34 | This helper handles both plain text and rich-formatted text outputs.
35 | """
36 | # Check if the expected text is in the output, ignoring case
37 | assert (
38 | expected.lower() in cli_result.stdout.lower()
39 | ), f"Expected output to contain '{expected}', but it did not.\nActual output: {cli_result.stdout}"
40 |
```
--------------------------------------------------------------------------------
/tests/tdd/articles/test_autocomplete.py:
--------------------------------------------------------------------------------
```python
1 | from biomcp.articles.autocomplete import Entity, EntityRequest, autocomplete
2 |
3 |
4 | async def test_autocomplete(anyio_backend, http_cache):
5 | # new cache for each call
6 | assert http_cache.count == 0
7 |
8 | # gene (compare using entity_id directly)
9 | request = EntityRequest(concept="gene", query="her2")
10 | entity = await autocomplete(request=request)
11 | assert entity.entity_id == "@GENE_ERBB2"
12 |
13 | # variant
14 | request = EntityRequest(concept="variant", query="BRAF V600E")
15 | assert await autocomplete(request=request) == Entity(
16 | _id="@VARIANT_p.V600E_BRAF_human",
17 | biotype="variant",
18 | name="p.V600E",
19 | )
20 |
21 | # disease
22 | request = EntityRequest(concept="disease", query="lung adenocarcinoma")
23 | assert await autocomplete(request=request) == Entity(
24 | _id="@DISEASE_Adenocarcinoma_of_Lung",
25 | biotype="disease",
26 | name="Adenocarcinoma of Lung",
27 | match="Multiple matches",
28 | )
29 |
30 | assert http_cache.count == 3
31 |
32 | # duplicate request uses the cached response
33 | request = EntityRequest(concept="gene", query="her2")
34 | entity = await autocomplete(request=request)
35 | assert entity.entity_id == "@GENE_ERBB2"
36 | assert http_cache.count == 3
37 |
```
--------------------------------------------------------------------------------
/scripts/generate_endpoints_doc.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 | """Generate THIRD_PARTY_ENDPOINTS.md documentation."""
3 |
4 | import shutil
5 | import subprocess
6 | import sys
7 | from pathlib import Path
8 |
9 | # Add src to path
10 | sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
11 |
12 | from biomcp.utils.endpoint_registry import get_registry
13 |
14 |
15 | def main():
16 | """Generate the endpoints documentation."""
17 | registry = get_registry()
18 | output_path = Path(__file__).parent.parent / "THIRD_PARTY_ENDPOINTS.md"
19 |
20 | # Generate new content
21 | new_content = registry.generate_markdown_report()
22 |
23 | # Write new content
24 | output_path.write_text(new_content)
25 |
26 | # Run prettier to format the file
27 | npx_path = shutil.which("npx")
28 | if npx_path:
29 | try:
30 | # Safe: npx_path from shutil.which, output_path is controlled
31 | subprocess.run( # noqa: S603
32 | [npx_path, "prettier", "--write", str(output_path)],
33 | check=True,
34 | capture_output=True,
35 | text=True,
36 | )
37 | except subprocess.CalledProcessError as e:
38 | print(f"Warning: prettier formatting failed: {e.stderr}")
39 | else:
40 | print("Warning: npx not found, skipping prettier formatting")
41 |
42 | print(f"Generated {output_path}")
43 |
44 |
45 | if __name__ == "__main__":
46 | main()
47 |
```
--------------------------------------------------------------------------------
/tests/data/openfda/drugsfda_search.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "meta": {
3 | "results": {
4 | "skip": 0,
5 | "limit": 10,
6 | "total": 25
7 | }
8 | },
9 | "results": [
10 | {
11 | "application_number": "BLA125514",
12 | "sponsor_name": "MERCK SHARP DOHME",
13 | "openfda": {
14 | "application_number": ["BLA125514"],
15 | "brand_name": ["KEYTRUDA"],
16 | "generic_name": ["PEMBROLIZUMAB"],
17 | "manufacturer_name": ["Merck Sharp & Dohme Corp."],
18 | "substance_name": ["PEMBROLIZUMAB"]
19 | },
20 | "products": [
21 | {
22 | "product_number": "001",
23 | "reference_drug": "Yes",
24 | "brand_name": "KEYTRUDA",
25 | "active_ingredients": [
26 | {
27 | "name": "PEMBROLIZUMAB",
28 | "strength": "100MG/4ML"
29 | }
30 | ],
31 | "reference_standard": "Yes",
32 | "dosage_form": "INJECTION, SOLUTION",
33 | "route": "INTRAVENOUS",
34 | "marketing_status": "Prescription"
35 | }
36 | ],
37 | "submissions": [
38 | {
39 | "submission_type": "BLA",
40 | "submission_number": "125514",
41 | "submission_status": "AP",
42 | "submission_status_date": "20140904",
43 | "submission_class_code": "BLA",
44 | "submission_class_code_description": "Biologic License Application"
45 | }
46 | ]
47 | }
48 | ]
49 | }
50 |
```
--------------------------------------------------------------------------------
/tests/tdd/variants/test_filters.py:
--------------------------------------------------------------------------------
```python
1 | """Tests for the filters module."""
2 |
3 | import json
4 | import os
5 | from typing import Any
6 |
7 | import pytest
8 |
9 | from biomcp.variants.filters import filter_variants
10 |
11 |
12 | @pytest.fixture
13 | def braf_v600e_variants() -> list[dict[str, Any]]:
14 | """Load BRAF V600E test data."""
15 | test_data_path = os.path.join(
16 | os.path.dirname(__file__),
17 | "../../data/myvariant/variants_full_braf_v600e.json",
18 | )
19 | with open(test_data_path) as f:
20 | data = json.load(f)
21 | return data.get("hits", [])
22 |
23 |
24 | def test_filter_variants_civic_contributors(braf_v600e_variants):
25 | """Test filtering out civic.contributors path."""
26 | # Verify that civic.contributors exists in original data
27 | variant = braf_v600e_variants[0]
28 | assert "civic" in variant
29 | assert "contributors" in variant["civic"]
30 | assert variant["civic"]["contributors"] is not None
31 |
32 | # Filter out civic.contributors
33 | filtered = filter_variants(braf_v600e_variants)
34 |
35 | # Verify civic.contributors is removed but civic section remains
36 | filtered_variant = filtered[0]
37 | assert "civic" in filtered_variant
38 | assert "contributors" not in filtered_variant["civic"]
39 |
40 | # Verify other civic data is preserved
41 | assert "id" in filtered_variant["civic"]
42 | assert filtered_variant["civic"]["id"] == variant["civic"]["id"]
43 |
```
--------------------------------------------------------------------------------
/tests/bdd/search_articles/test_autocomplete.py:
--------------------------------------------------------------------------------
```python
1 | import asyncio
2 |
3 | from pytest_bdd import given, parsers, scenarios, then, when
4 |
5 | from biomcp.articles.autocomplete import (
6 | Concept,
7 | Entity,
8 | EntityRequest,
9 | autocomplete,
10 | )
11 |
12 | scenarios("autocomplete.feature")
13 |
14 |
15 | @given(
16 | parsers.parse(
17 | 'I have a valid concept "{concept}" and a valid query "{query}"',
18 | ),
19 | target_fixture="entity_request",
20 | )
21 | def entity_request(concept: Concept, query: str):
22 | return EntityRequest(concept=concept, query=query)
23 |
24 |
25 | @given(
26 | parsers.parse(
27 | 'I have a valid concept "{concept}" and an invalid query "{query}"',
28 | ),
29 | target_fixture="entity_request",
30 | )
31 | def invalid_query_request(concept: Concept, query: str):
32 | return EntityRequest(concept=concept, query=query)
33 |
34 |
35 | @when(
36 | "I call the Pubtator Autocomplete API",
37 | target_fixture="entity",
38 | )
39 | def entity(entity_request) -> Entity | None:
40 | return asyncio.run(autocomplete(request=entity_request))
41 |
42 |
43 | @then(parsers.parse('the response entity_id should be "{expected_id}"'))
44 | def check_entity_id(entity, expected_id):
45 | assert entity.entity_id == expected_id
46 |
47 |
48 | @then(parsers.parse('the response concept should be "{concept}"'))
49 | def check_concept(entity, concept):
50 | assert entity.concept == concept
51 |
52 |
53 | @then("the response should be empty")
54 | def check_empty_response(entity):
55 | assert entity is None
56 |
```
--------------------------------------------------------------------------------
/src/biomcp/utils/gene_validator.py:
--------------------------------------------------------------------------------
```python
1 | """Gene symbol validation utilities."""
2 |
3 | import re
4 |
5 | # Common gene symbol patterns
6 | GENE_SYMBOL_PATTERN = re.compile(r"^[A-Z][A-Z0-9-]*(\.[0-9]+)?$")
7 |
8 | # Known problematic or invalid gene symbols
9 | INVALID_GENES = {
10 | "INVALID",
11 | "UNKNOWN",
12 | "NULL",
13 | "NONE",
14 | "TEST",
15 | "INVALID_GENE_XYZ",
16 | }
17 |
18 |
19 | def is_valid_gene_symbol(gene: str | None) -> bool:
20 | """Validate if a string is a valid gene symbol.
21 |
22 | Args:
23 | gene: The gene symbol to validate
24 |
25 | Returns:
26 | True if the gene symbol appears valid, False otherwise
27 |
28 | Notes:
29 | - Gene symbols should start with a letter
30 | - Can contain letters, numbers, and hyphens
31 | - May have a version suffix (e.g., .1, .2)
32 | - Should be uppercase
33 | - Should not be in the invalid genes list
34 | """
35 | if not gene:
36 | return False
37 |
38 | gene = gene.strip()
39 |
40 | # Check length constraints
41 | if len(gene) < 2 or len(gene) > 20:
42 | return False
43 |
44 | # Check against known invalid genes
45 | if gene.upper() in INVALID_GENES:
46 | return False
47 |
48 | # Check pattern
49 | return bool(GENE_SYMBOL_PATTERN.match(gene))
50 |
51 |
52 | def sanitize_gene_symbol(gene: str) -> str:
53 | """Sanitize a gene symbol for API calls.
54 |
55 | Args:
56 | gene: The gene symbol to sanitize
57 |
58 | Returns:
59 | Sanitized gene symbol in uppercase with whitespace stripped
60 | """
61 | return gene.strip().upper()
62 |
```
--------------------------------------------------------------------------------
/tests/bdd/search_articles/test_search.py:
--------------------------------------------------------------------------------
```python
1 | """Test steps for search_pubmed feature."""
2 |
3 | from __future__ import annotations
4 |
5 | import asyncio
6 | import json
7 |
8 | from pytest_bdd import given, parsers, scenarios, then, when
9 |
10 | from biomcp.articles.search import (
11 | PubmedRequest,
12 | search_articles,
13 | )
14 |
15 | scenarios("search.feature")
16 |
17 |
18 | @given(
19 | parsers.parse('I build a query for "{gene}" "{disease}" "{variant}"'),
20 | target_fixture="query",
21 | )
22 | def query(gene, disease, variant) -> PubmedRequest:
23 | return PubmedRequest(
24 | genes=[gene],
25 | diseases=[disease],
26 | variants=[variant],
27 | )
28 |
29 |
30 | @when("I perform a search with that query", target_fixture="result")
31 | def result(query) -> list[dict]:
32 | text = asyncio.run(search_articles(query, output_json=True))
33 | return json.loads(text)
34 |
35 |
36 | @then(parsers.parse('the response should contain the article "{pmid:d}"'))
37 | def step_impl(result: list[dict], pmid: int):
38 | pm_ids = [article["pmid"] for article in result]
39 | assert pmid in pm_ids, "pmid not found in {pm_ids}"
40 |
41 |
42 | @then(
43 | parsers.parse('the article "{pmid:d}" abstract should contain "{phrase}"'),
44 | )
45 | def step_check_abstract(result: list[dict], pmid: int, phrase: str):
46 | for r in result:
47 | if r["pmid"] == pmid and r.get("abstract"):
48 | assert (
49 | phrase in r["abstract"]
50 | ), f"Phrase '{phrase}' not found in article {pmid}'s abstract"
51 | return
52 | raise AssertionError(f"Article {pmid} not found or has no abstract")
53 |
```
--------------------------------------------------------------------------------
/src/biomcp/workers/worker.py:
--------------------------------------------------------------------------------
```python
1 | """Worker implementation for BioMCP."""
2 |
3 | from fastapi import FastAPI, Response
4 | from fastapi.middleware.cors import CORSMiddleware
5 | from starlette.responses import JSONResponse
6 | from starlette.routing import Route
7 |
8 | from .. import mcp_app
9 |
10 | app = FastAPI(title="BioMCP Worker", version="0.1.10")
11 |
12 | # Add CORS middleware
13 | app.add_middleware(
14 | CORSMiddleware,
15 | allow_origins=["*"],
16 | allow_credentials=True,
17 | allow_methods=["*"],
18 | allow_headers=["*"],
19 | )
20 |
21 | streamable_app = mcp_app.streamable_http_app()
22 |
23 |
24 | # Add health endpoint to the streamable app before mounting
25 | async def health_check(request):
26 | return JSONResponse({"status": "healthy"})
27 |
28 |
29 | health_route = Route("/health", health_check, methods=["GET"])
30 | streamable_app.routes.append(health_route)
31 |
32 | app.mount("/", streamable_app)
33 |
34 |
35 | # Health endpoint is now added directly to the streamable_app above
36 |
37 |
38 | # Add OPTIONS endpoint for CORS preflight
39 | @app.options("/{path:path}")
40 | async def options_handler(path: str):
41 | """Handle CORS preflight requests."""
42 | return Response(
43 | content="",
44 | status_code=204,
45 | headers={
46 | "Access-Control-Allow-Origin": "*",
47 | "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
48 | "Access-Control-Allow-Headers": "*",
49 | "Access-Control-Max-Age": "86400", # 24 hours
50 | },
51 | )
52 |
53 |
54 | # Create a stub for create_worker_app to satisfy imports
55 | def create_worker_app() -> FastAPI:
56 | """Stub for create_worker_app to satisfy import in __init__.py."""
57 | return app
58 |
```
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
```python
1 | """Pytest configuration and fixtures."""
2 |
3 | import os
4 | from unittest.mock import AsyncMock, patch
5 |
6 | import pytest
7 |
8 | # Check if we should skip integration tests
9 | SKIP_INTEGRATION = os.environ.get("SKIP_INTEGRATION_TESTS", "").lower() in (
10 | "true",
11 | "1",
12 | "yes",
13 | )
14 |
15 |
16 | def pytest_configure(config):
17 | """Configure pytest with custom markers."""
18 | config.addinivalue_line(
19 | "markers",
20 | "integration: marks tests as integration tests (deselect with '-m \"not integration\"')",
21 | )
22 |
23 |
24 | def pytest_collection_modifyitems(config, items):
25 | """Modify test collection to handle integration tests."""
26 | if SKIP_INTEGRATION:
27 | skip_integration = pytest.mark.skip(
28 | reason="Integration tests disabled via SKIP_INTEGRATION_TESTS env var"
29 | )
30 | for item in items:
31 | if "integration" in item.keywords:
32 | item.add_marker(skip_integration)
33 |
34 |
35 | @pytest.fixture
36 | def mock_cbioportal_api():
37 | """Mock cBioPortal API responses for testing."""
38 | with patch(
39 | "biomcp.variants.cbioportal_search.CBioPortalSearchClient.get_gene_search_summary"
40 | ) as mock:
41 | # Return a mock summary
42 | mock.return_value = AsyncMock(
43 | gene="BRAF",
44 | total_mutations=1000,
45 | total_samples_tested=2000,
46 | mutation_frequency=50.0,
47 | hotspots=[
48 | AsyncMock(amino_acid_change="V600E", count=800),
49 | AsyncMock(amino_acid_change="V600K", count=100),
50 | ],
51 | cancer_distribution=["Melanoma", "Colorectal Cancer"],
52 | study_count=10,
53 | )
54 | yield mock
55 |
```
--------------------------------------------------------------------------------
/tests/bdd/get_variants/test_get.py:
--------------------------------------------------------------------------------
```python
1 | import json
2 | import shlex
3 |
4 | from pytest_bdd import given, parsers, scenarios, then
5 | from typer.testing import CliRunner
6 |
7 | from biomcp.cli.main import app
8 |
9 | # Link to the feature file
10 | scenarios("get.feature")
11 |
12 | runner = CliRunner()
13 |
14 |
15 | @given(parsers.parse('I run "{command}"'), target_fixture="cli_result")
16 | def cli_result(command):
17 | """
18 | Run the given CLI command and return the parsed JSON output.
19 | The command is expected to include the '--json' flag.
20 | """
21 | args = shlex.split(command)[1:] # remove the leading "biomcp" token
22 | result = runner.invoke(app, args)
23 | assert result.exit_code == 0, f"CLI command failed: {result.stderr}"
24 | return json.loads(result.stdout)
25 |
26 |
27 | def get_field_value_from_variant(variant, field_path):
28 | """
29 | Retrieve a value from a variant dictionary using a simple dot-notation path.
30 | (This version does not support array indexing.)
31 | """
32 | parts = field_path.split(".")
33 | value = variant
34 | for part in parts:
35 | value = value.get(part)
36 | if value is None:
37 | break
38 | return value
39 |
40 |
41 | @then(
42 | parsers.parse(
43 | 'at least one variant should have field "{field}" equal to "{expected}"'
44 | )
45 | )
46 | def variant_field_should_equal(cli_result, field, expected):
47 | """
48 | Verify that at least one variant in the returned list has the specified field equal to the expected value.
49 | """
50 | # cli_result is already a list of variant dicts.
51 | matching = [
52 | v
53 | for v in cli_result
54 | if str(get_field_value_from_variant(v, field)) == expected
55 | ]
56 | assert (
57 | matching
58 | ), f"No variant found with field '{field}' equal to '{expected}'"
59 |
```
--------------------------------------------------------------------------------
/tests/bdd/get_trials/test_get.py:
--------------------------------------------------------------------------------
```python
1 | import json
2 | import shlex
3 |
4 | from pytest_bdd import given, parsers, scenarios, then
5 | from typer.testing import CliRunner
6 |
7 | from biomcp.cli.main import app
8 |
9 | # Link to the feature file
10 | scenarios("get.feature")
11 |
12 | runner = CliRunner()
13 |
14 |
15 | @given(parsers.parse('I run "{command}"'), target_fixture="cli_result")
16 | def cli_result(command):
17 | """
18 | Run the given CLI command and return the parsed JSON output.
19 | The command is expected to include the '--json' flag.
20 | """
21 | # Remove the initial token ("biomcp") if present.
22 | args = shlex.split(command)[1:]
23 | result = runner.invoke(app, args)
24 | assert result.exit_code == 0, f"CLI command failed: {result.stderr}"
25 | return json.loads(result.stdout)
26 |
27 |
28 | def get_field_value(data, field_path):
29 | """
30 | Access a nested dictionary value using a dot-notation path.
31 | Supports array notation like "locations[0]".
32 | """
33 | parts = field_path.split(".")
34 | value = data
35 | for part in parts:
36 | if "[" in part and part.endswith("]"):
37 | # e.g. "locations[0]"
38 | base, index_str = part[:-1].split("[")
39 | index = int(index_str)
40 | value = value[base][index]
41 | else:
42 | value = value[part]
43 | return value
44 |
45 |
46 | @then(parsers.parse('the field "{field}" should equal "{expected}"'))
47 | def field_should_equal(cli_result, field, expected):
48 | """
49 | Verify that the value at the specified dot-notation field equals the expected value.
50 | """
51 | actual = get_field_value(cli_result, field)
52 | # Compare as strings for simplicity.
53 | assert (
54 | str(actual) == expected
55 | ), f"Expected field '{field}' to equal '{expected}', but got '{actual}'"
56 |
```
--------------------------------------------------------------------------------
/tests/bdd/conftest.py:
--------------------------------------------------------------------------------
```python
1 | import pytest
2 |
3 |
4 | def _recursive_extract(current_value, key_path, path_index):
5 | """Recursively extract values based on the key path."""
6 | if path_index >= len(key_path):
7 | if isinstance(current_value, list):
8 | yield from current_value
9 | else:
10 | yield current_value
11 |
12 | else:
13 | k = key_path[path_index]
14 | if isinstance(current_value, dict):
15 | next_value = current_value.get(k)
16 | if next_value is not None:
17 | yield from _recursive_extract(
18 | next_value,
19 | key_path,
20 | path_index + 1,
21 | )
22 |
23 | elif isinstance(current_value, list):
24 | for item in current_value:
25 | if isinstance(item, dict):
26 | next_value = item.get(k)
27 | if next_value is not None:
28 | yield from _recursive_extract(
29 | next_value,
30 | key_path,
31 | path_index + 1,
32 | )
33 |
34 |
35 | def iter_value(field_map: dict, data: dict | list, key: str):
36 | """Iterates through a nested structure, yielding all values encountered."""
37 | if isinstance(data, dict):
38 | # Handle new format with cBioPortal summary
39 | hits = data["variants"] if "variants" in data else data.get("hits", [])
40 | else:
41 | hits = data
42 | key_path = field_map.get(key, [key])
43 |
44 | # num = variant number for tracking each individual variant
45 | for num, hit in enumerate(hits, 1):
46 | for value in _recursive_extract(hit, key_path, 0):
47 | yield num, value
48 |
49 |
50 | @pytest.fixture(scope="module")
51 | def it() -> callable:
52 | return iter_value
53 |
```
--------------------------------------------------------------------------------
/tests/data/openfda/enforcement_detail.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "meta": {
3 | "results": {
4 | "skip": 0,
5 | "limit": 1,
6 | "total": 1
7 | }
8 | },
9 | "results": [
10 | {
11 | "country": "United States",
12 | "city": "Princeton",
13 | "reason_for_recall": "Presence of N-Nitrosodimethylamine (NDMA) impurity above the acceptable daily intake limit",
14 | "address_1": "One Merck Drive",
15 | "address_2": "Building 5",
16 | "product_quantity": "5,432 bottles",
17 | "code_info": "Lot numbers: AB1234 (Exp 12/2024), CD5678 (Exp 01/2025), EF9012 (Exp 02/2025)",
18 | "center_classification_date": "20230615",
19 | "distribution_pattern": "Nationwide distribution to wholesalers and retail pharmacies in all 50 states",
20 | "state": "NJ",
21 | "product_description": "Valsartan Tablets USP, 160 mg, 90 count bottles, NDC 0378-5160-90",
22 | "report_date": "20230622",
23 | "classification": "Class II",
24 | "openfda": {
25 | "application_number": ["ANDA090802"],
26 | "brand_name": ["VALSARTAN"],
27 | "generic_name": ["VALSARTAN"],
28 | "manufacturer_name": ["Mylan Pharmaceuticals Inc."],
29 | "product_ndc": ["0378-5160"],
30 | "package_ndc": ["0378-5160-90"],
31 | "unii": ["80M03YXJ7I"],
32 | "spl_set_id": ["4b5c5f6d-7e8f-9g0h-1i2j-3k4l5m6n7o8p"]
33 | },
34 | "more_code_info": "Manufacturing dates: January 2023 - March 2023",
35 | "recalling_firm": "Mylan Pharmaceuticals Inc.",
36 | "recall_number": "D-0001-2023",
37 | "initial_firm_notification": "Letter",
38 | "product_type": "Drugs",
39 | "event_id": "91234",
40 | "termination_date": "",
41 | "recall_initiation_date": "20230610",
42 | "postal_code": "08540-0004",
43 | "voluntary_mandated": "Voluntary: Firm Initiated",
44 | "status": "Ongoing"
45 | }
46 | ]
47 | }
48 |
```
--------------------------------------------------------------------------------
/tests/data/openfda/enforcement_search.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "meta": {
3 | "results": {
4 | "skip": 0,
5 | "limit": 10,
6 | "total": 45
7 | }
8 | },
9 | "results": [
10 | {
11 | "country": "United States",
12 | "city": "Princeton",
13 | "reason_for_recall": "Presence of N-Nitrosodimethylamine (NDMA) impurity",
14 | "address_1": "One Merck Drive",
15 | "address_2": "",
16 | "product_quantity": "5,432 bottles",
17 | "code_info": "Lot numbers: AB1234, CD5678, EF9012",
18 | "center_classification_date": "20230615",
19 | "distribution_pattern": "Nationwide",
20 | "state": "NJ",
21 | "product_description": "Valsartan Tablets USP, 160 mg, 90 count bottles",
22 | "report_date": "20230622",
23 | "classification": "Class II",
24 | "openfda": {
25 | "application_number": ["ANDA090802"],
26 | "brand_name": ["VALSARTAN"],
27 | "generic_name": ["VALSARTAN"],
28 | "manufacturer_name": ["Mylan Pharmaceuticals Inc."]
29 | },
30 | "recalling_firm": "Mylan Pharmaceuticals Inc.",
31 | "recall_number": "D-0001-2023",
32 | "initial_firm_notification": "Letter",
33 | "product_type": "Drugs",
34 | "event_id": "91234",
35 | "recall_initiation_date": "20230610",
36 | "postal_code": "08540",
37 | "voluntary_mandated": "Voluntary: Firm Initiated",
38 | "status": "Ongoing"
39 | },
40 | {
41 | "country": "United States",
42 | "city": "New York",
43 | "reason_for_recall": "Contamination with foreign substance",
44 | "product_quantity": "10,000 units",
45 | "classification": "Class I",
46 | "product_description": "Metformin Hydrochloride Extended-Release Tablets, 500 mg",
47 | "report_date": "20230515",
48 | "recalling_firm": "Generic Pharma Corp",
49 | "recall_number": "D-0002-2023",
50 | "recall_initiation_date": "20230510",
51 | "status": "Completed"
52 | }
53 | ]
54 | }
55 |
```
--------------------------------------------------------------------------------
/src/biomcp/logging_filter.py:
--------------------------------------------------------------------------------
```python
1 | """Logging filter to suppress non-critical ASGI errors."""
2 |
3 | import logging
4 |
5 |
6 | class ASGIErrorFilter(logging.Filter):
7 | """Filter out non-critical ASGI/Starlette middleware errors."""
8 |
9 | def filter(self, record: logging.LogRecord) -> bool:
10 | """Return False to suppress the log record, True to allow it."""
11 |
12 | # Check if this is an ASGI error we want to suppress
13 | if record.levelname == "ERROR":
14 | message = str(record.getMessage())
15 |
16 | # Suppress known non-critical ASGI errors
17 | if "Exception in ASGI application" in message:
18 | return False
19 | if "AssertionError" in message and "http.response.body" in message:
20 | return False
21 | if (
22 | "unhandled errors in a TaskGroup" in message
23 | and hasattr(record, "exc_info")
24 | and record.exc_info
25 | ):
26 | exc_type, exc_value, _ = record.exc_info
27 | if exc_type and "AssertionError" in str(exc_type):
28 | return False
29 |
30 | # Allow all other logs
31 | return True
32 |
33 |
34 | def setup_logging_filters():
35 | """Set up logging filters to suppress non-critical errors."""
36 |
37 | # Add filter to uvicorn error logger
38 | uvicorn_logger = logging.getLogger("uvicorn.error")
39 | uvicorn_logger.addFilter(ASGIErrorFilter())
40 |
41 | # Add filter to uvicorn access logger
42 | uvicorn_access_logger = logging.getLogger("uvicorn.access")
43 | uvicorn_access_logger.addFilter(ASGIErrorFilter())
44 |
45 | # Add filter to starlette logger
46 | starlette_logger = logging.getLogger("starlette")
47 | starlette_logger.addFilter(ASGIErrorFilter())
48 |
49 | # Add filter to fastapi logger
50 | fastapi_logger = logging.getLogger("fastapi")
51 | fastapi_logger.addFilter(ASGIErrorFilter())
52 |
```
--------------------------------------------------------------------------------
/src/biomcp/openfda/drug_shortages_detail_helpers.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Helper functions for formatting drug shortage details.
3 | """
4 |
5 | from typing import Any
6 |
7 |
8 | def format_shortage_status(shortage: dict[str, Any]) -> list[str]:
9 | """Format status information for shortage detail."""
10 | output = []
11 |
12 | status = shortage.get("status", "Unknown")
13 | status_emoji = "🔴" if "current" in status.lower() else "🟢"
14 | output.append(f"{status_emoji} **Status**: {status}")
15 |
16 | return output
17 |
18 |
19 | def format_shortage_names(shortage: dict[str, Any]) -> list[str]:
20 | """Format drug names for shortage detail."""
21 | output = []
22 |
23 | if generic := shortage.get("generic_name"):
24 | output.append(f"**Generic Name**: {generic}")
25 |
26 | brands = shortage.get("brand_names")
27 | if brands and brands[0]:
28 | output.append(f"**Brand Names**: {', '.join(brands)}")
29 |
30 | return output
31 |
32 |
33 | def format_shortage_timeline(shortage: dict[str, Any]) -> list[str]:
34 | """Format timeline information for shortage detail."""
35 | output = ["### Timeline"]
36 |
37 | if start_date := shortage.get("shortage_start_date"):
38 | output.append(f"**Shortage Started**: {start_date}")
39 |
40 | if resolution_date := shortage.get("resolution_date"):
41 | output.append(f"**Resolved**: {resolution_date}")
42 | elif estimated := shortage.get("estimated_resolution"):
43 | output.append(f"**Estimated Resolution**: {estimated}")
44 | else:
45 | output.append("**Estimated Resolution**: Unknown")
46 |
47 | return output
48 |
49 |
50 | def format_shortage_details_section(shortage: dict[str, Any]) -> list[str]:
51 | """Format details section for shortage detail."""
52 | output = ["### Details"]
53 |
54 | if reason := shortage.get("reason"):
55 | output.append(f"**Reason for Shortage**:\n{reason}")
56 |
57 | if notes := shortage.get("notes"):
58 | from .utils import clean_text
59 |
60 | output.append(f"\n**Additional Notes**:\n{clean_text(notes)}")
61 |
62 | return output
63 |
```
--------------------------------------------------------------------------------
/src/biomcp/openfda/exceptions.py:
--------------------------------------------------------------------------------
```python
1 | """Custom exceptions for OpenFDA integration."""
2 |
3 |
4 | class OpenFDAError(Exception):
5 | """Base exception for OpenFDA-related errors."""
6 |
7 | pass
8 |
9 |
10 | class OpenFDARateLimitError(OpenFDAError):
11 | """Raised when FDA API rate limit is exceeded."""
12 |
13 | def __init__(self, message: str = "FDA API rate limit exceeded"):
14 | super().__init__(message)
15 | self.message = message
16 |
17 |
18 | class OpenFDAValidationError(OpenFDAError):
19 | """Raised when FDA response validation fails."""
20 |
21 | def __init__(self, message: str = "Invalid FDA API response"):
22 | super().__init__(message)
23 | self.message = message
24 |
25 |
26 | class OpenFDAConnectionError(OpenFDAError):
27 | """Raised when connection to FDA API fails."""
28 |
29 | def __init__(self, message: str = "Failed to connect to FDA API"):
30 | super().__init__(message)
31 | self.message = message
32 |
33 |
34 | class OpenFDANotFoundError(OpenFDAError):
35 | """Raised when requested resource is not found."""
36 |
37 | def __init__(self, resource_type: str, resource_id: str):
38 | message = f"{resource_type} not found: {resource_id}"
39 | super().__init__(message)
40 | self.resource_type = resource_type
41 | self.resource_id = resource_id
42 | self.message = message
43 |
44 |
45 | class OpenFDATimeoutError(OpenFDAError):
46 | """Raised when FDA API request times out."""
47 |
48 | def __init__(self, message: str = "FDA API request timeout"):
49 | super().__init__(message)
50 | self.message = message
51 |
52 |
53 | class OpenFDAInvalidParameterError(OpenFDAError):
54 | """Raised when invalid parameters are provided."""
55 |
56 | def __init__(self, parameter: str, value: str, reason: str):
57 | message = (
58 | f"Invalid parameter '{parameter}' with value '{value}': {reason}"
59 | )
60 | super().__init__(message)
61 | self.parameter = parameter
62 | self.value = value
63 | self.reason = reason
64 | self.message = message
65 |
```
--------------------------------------------------------------------------------
/tests/bdd/fetch_articles/test_fetch.py:
--------------------------------------------------------------------------------
```python
1 | import json
2 | import shlex
3 |
4 | from pytest_bdd import given, parsers, scenarios, then
5 | from typer.testing import CliRunner
6 |
7 | from biomcp.cli.main import app
8 |
9 | scenarios("fetch.feature")
10 |
11 | runner = CliRunner()
12 |
13 |
14 | @given(parsers.parse('I run "{command}"'), target_fixture="cli_result")
15 | def cli_result(command):
16 | """Run the given CLI command and return the parsed JSON output."""
17 | args = shlex.split(command)[1:]
18 | result = runner.invoke(app, args)
19 | return json.loads(result.stdout)
20 |
21 |
22 | @then("the JSON output should be a non-empty list")
23 | def check_non_empty_list(cli_result):
24 | """Check that the JSON output is a list with at least one article."""
25 | assert isinstance(cli_result, list), "Expected JSON output to be a list"
26 | assert len(cli_result) > 0, "Expected at least one article in the output"
27 |
28 |
29 | @then("the first article's abstract should be populated")
30 | def check_abstract_populated(cli_result):
31 | """Check that the first article has a non-empty abstract."""
32 | article = cli_result[0]
33 | abstract = article.get("abstract")
34 | assert abstract is not None, "Abstract field is missing"
35 | assert abstract.strip() != "", "Abstract field is empty"
36 |
37 |
38 | @then("the application should return an error")
39 | def step_impl(cli_result):
40 | assert cli_result == [
41 | {"error": 'Error 400: {"detail":"Could not retrieve publications"}'}
42 | ]
43 |
44 |
45 | @then("the first article should have a DOI field")
46 | def check_doi_field(cli_result):
47 | """Check that the first article has a DOI field."""
48 | article = cli_result[0]
49 | doi = article.get("doi")
50 | assert doi is not None, "DOI field is missing"
51 | assert doi.startswith("10."), f"Invalid DOI format: {doi}"
52 |
53 |
54 | @then("the source should be Europe PMC")
55 | def check_europe_pmc_source(cli_result):
56 | """Check that the article source is Europe PMC."""
57 | article = cli_result[0]
58 | source = article.get("source")
59 | assert (
60 | source == "Europe PMC"
61 | ), f"Expected source 'Europe PMC', got '{source}'"
62 |
```
--------------------------------------------------------------------------------
/src/biomcp/metrics_handler.py:
--------------------------------------------------------------------------------
```python
1 | """MCP handler for metrics collection."""
2 |
3 | from typing import Annotated
4 |
5 | from biomcp.core import mcp_app
6 | from biomcp.metrics import get_all_metrics, get_metric_summary
7 |
8 |
9 | @mcp_app.tool()
10 | async def get_performance_metrics(
11 | metric_name: Annotated[
12 | str | None,
13 | "Specific metric name to retrieve, or None for all metrics",
14 | ] = None,
15 | ) -> str:
16 | """Get performance metrics for BioMCP operations.
17 |
18 | Returns performance statistics including:
19 | - Request counts and success rates
20 | - Response time percentiles (p50, p95, p99)
21 | - Error rates and types
22 | - Domain-specific performance breakdown
23 |
24 | Parameters:
25 | metric_name: Optional specific metric to retrieve
26 |
27 | Returns:
28 | Formatted metrics report
29 | """
30 | if metric_name:
31 | summary = await get_metric_summary(metric_name)
32 | if not summary:
33 | return f"No metrics found for '{metric_name}'"
34 |
35 | return _format_summary(summary)
36 | else:
37 | all_summaries = await get_all_metrics()
38 | if not all_summaries:
39 | return "No metrics collected yet"
40 |
41 | lines = ["# BioMCP Performance Metrics\n"]
42 | for name in sorted(all_summaries.keys()):
43 | summary = all_summaries[name]
44 | lines.append(f"## {name}")
45 | lines.append(_format_summary(summary))
46 | lines.append("")
47 |
48 | return "\n".join(lines)
49 |
50 |
51 | def _format_summary(summary) -> str:
52 | """Format a metric summary for display."""
53 | lines = [
54 | f"- Total requests: {summary.count}",
55 | f"- Success rate: {(1 - summary.error_rate) * 100:.1f}%",
56 | f"- Errors: {summary.error_count}",
57 | "",
58 | "### Response Times",
59 | f"- Average: {summary.avg_duration * 1000:.1f}ms",
60 | f"- Min: {summary.min_duration * 1000:.1f}ms",
61 | f"- Max: {summary.max_duration * 1000:.1f}ms",
62 | f"- P50: {summary.p50_duration * 1000:.1f}ms",
63 | f"- P95: {summary.p95_duration * 1000:.1f}ms",
64 | f"- P99: {summary.p99_duration * 1000:.1f}ms",
65 | ]
66 |
67 | return "\n".join(lines)
68 |
```
--------------------------------------------------------------------------------
/scripts/check_docs_in_mkdocs.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 | """Check that all markdown files in docs/ are referenced in mkdocs.yml."""
3 |
4 | import sys
5 | from pathlib import Path
6 |
7 | import yaml # DEP004
8 |
9 |
10 | def main():
11 | """Check documentation files are in mkdocs.yml."""
12 | docs_dir = Path(__file__).parent.parent / "docs"
13 | mkdocs_path = Path(__file__).parent.parent / "mkdocs.yml"
14 |
15 | # Load mkdocs.yml
16 | with open(mkdocs_path) as f:
17 | mkdocs_config = yaml.safe_load(f)
18 |
19 | # Extract all referenced files from nav
20 | referenced_files = set()
21 |
22 | def extract_files(nav_item, prefix=""):
23 | """Recursively extract file paths from nav structure."""
24 | if isinstance(nav_item, dict):
25 | for _key, value in nav_item.items():
26 | extract_files(value, prefix)
27 | elif isinstance(nav_item, list):
28 | for item in nav_item:
29 | extract_files(item, prefix)
30 | elif isinstance(nav_item, str) and nav_item.endswith(".md"):
31 | referenced_files.add(nav_item)
32 |
33 | extract_files(mkdocs_config.get("nav", []))
34 |
35 | # Find all markdown files in docs/
36 | all_md_files = set()
37 | for md_file in docs_dir.rglob("*.md"):
38 | # Get relative path from docs/
39 | rel_path = md_file.relative_to(docs_dir)
40 | all_md_files.add(str(rel_path))
41 |
42 | # Find unreferenced files
43 | unreferenced = all_md_files - referenced_files
44 |
45 | # Exclude some files that shouldn't be in nav
46 | exclude_patterns = {
47 | "CNAME", # GitHub pages config
48 | "README.md", # If exists
49 | }
50 |
51 | unreferenced = {
52 | f
53 | for f in unreferenced
54 | if not any(pattern in f for pattern in exclude_patterns)
55 | }
56 |
57 | if unreferenced:
58 | print(
59 | "The following documentation files are not referenced in mkdocs.yml:"
60 | )
61 | for file in sorted(unreferenced):
62 | print(f" - {file}")
63 | print("\nPlease add them to the appropriate section in mkdocs.yml")
64 | return 1
65 | else:
66 | print("All documentation files are referenced in mkdocs.yml ✓")
67 | return 0
68 |
69 |
70 | if __name__ == "__main__":
71 | sys.exit(main())
72 |
```
--------------------------------------------------------------------------------
/src/biomcp/cbioportal_helper.py:
--------------------------------------------------------------------------------
```python
1 | """Helper module for cBioPortal integration across tools.
2 |
3 | This module centralizes cBioPortal summary generation logic to avoid duplication.
4 | """
5 |
6 | import logging
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | async def get_cbioportal_summary_for_genes(
12 | genes: list[str] | None, request_params: dict | None = None
13 | ) -> str | None:
14 | """Get cBioPortal summary for given genes.
15 |
16 | Args:
17 | genes: List of gene symbols to get summaries for
18 | request_params: Optional additional parameters for the request
19 |
20 | Returns:
21 | Formatted cBioPortal summary or None if unavailable
22 | """
23 | if not genes:
24 | return None
25 |
26 | try:
27 | from biomcp.articles.search import PubmedRequest
28 | from biomcp.articles.unified import _get_cbioportal_summary
29 |
30 | # Create a request object for cBioPortal summary
31 | request = PubmedRequest(genes=genes)
32 |
33 | # Add any additional parameters if provided
34 | if request_params:
35 | for key, value in request_params.items():
36 | if hasattr(request, key):
37 | setattr(request, key, value)
38 |
39 | cbioportal_summary = await _get_cbioportal_summary(request)
40 | return cbioportal_summary
41 |
42 | except Exception as e:
43 | logger.warning(f"Failed to get cBioPortal summary: {e}")
44 | return None
45 |
46 |
47 | async def get_variant_cbioportal_summary(gene: str | None) -> str | None:
48 | """Get cBioPortal summary for variant searches.
49 |
50 | Args:
51 | gene: Gene symbol to get summary for
52 |
53 | Returns:
54 | Formatted cBioPortal summary or None if unavailable
55 | """
56 | if not gene:
57 | return None
58 |
59 | try:
60 | from biomcp.variants.cbioportal_search import (
61 | CBioPortalSearchClient,
62 | format_cbioportal_search_summary,
63 | )
64 |
65 | client = CBioPortalSearchClient()
66 | summary = await client.get_gene_search_summary(gene)
67 | if summary:
68 | return format_cbioportal_search_summary(summary)
69 | return None
70 |
71 | except Exception as e:
72 | logger.warning(
73 | f"Failed to get cBioPortal summary for variant search: {e}"
74 | )
75 | return None
76 |
```
--------------------------------------------------------------------------------
/src/biomcp/utils/rate_limiter.py:
--------------------------------------------------------------------------------
```python
1 | """Simple rate limiting utilities for API calls."""
2 |
3 | import asyncio
4 | import time
5 | from collections import defaultdict
6 |
7 |
8 | class RateLimiter:
9 | """Simple token bucket rate limiter."""
10 |
11 | def __init__(self, rate: int = 10, per_seconds: int = 1):
12 | """Initialize rate limiter.
13 |
14 | Args:
15 | rate: Number of allowed requests
16 | per_seconds: Time window in seconds
17 | """
18 | self.rate = rate
19 | self.per_seconds = per_seconds
20 | self.allowance: dict[str, float] = defaultdict(lambda: float(rate))
21 | self.last_check: dict[str, float] = defaultdict(float)
22 | self._lock = asyncio.Lock()
23 |
24 | async def check_rate_limit(
25 | self, key: str = "default"
26 | ) -> tuple[bool, float | None]:
27 | """Check if request is allowed under rate limit.
28 |
29 | Args:
30 | key: Identifier for rate limit bucket
31 |
32 | Returns:
33 | Tuple of (allowed, wait_time_if_not_allowed)
34 | """
35 | async with self._lock:
36 | current = time.time()
37 | time_passed = current - self.last_check[key]
38 | self.last_check[key] = current
39 |
40 | # Replenish tokens
41 | self.allowance[key] += time_passed * (self.rate / self.per_seconds)
42 |
43 | # Cap at maximum rate
44 | if self.allowance[key] > self.rate:
45 | self.allowance[key] = float(self.rate)
46 |
47 | # Check if request allowed
48 | if self.allowance[key] >= 1.0:
49 | self.allowance[key] -= 1.0
50 | return True, None
51 | else:
52 | # Calculate wait time
53 | wait_time = (1.0 - self.allowance[key]) * (
54 | self.per_seconds / self.rate
55 | )
56 | return False, wait_time
57 |
58 | async def wait_if_needed(self, key: str = "default") -> None:
59 | """Wait if rate limited before allowing request."""
60 | allowed, wait_time = await self.check_rate_limit(key)
61 | if not allowed and wait_time:
62 | await asyncio.sleep(wait_time)
63 |
64 |
65 | # Global rate limiter for cBioPortal API
66 | # Conservative: 5 requests per second
67 | cbioportal_limiter = RateLimiter(rate=5, per_seconds=1)
68 |
```
--------------------------------------------------------------------------------
/src/biomcp/articles/autocomplete.py:
--------------------------------------------------------------------------------
```python
1 | """Find entities for a given concept using the PUBTATOR API.
2 |
3 | Example URL:
4 | https://www.ncbi.nlm.nih.gov/research/pubtator3-api/entity/autocomplete/?query=BRAF
5 | """
6 |
7 | from typing import Literal
8 |
9 | from pydantic import BaseModel, Field, RootModel
10 |
11 | from .. import http_client
12 | from ..constants import PUBTATOR3_BASE_URL
13 |
14 | Concept = Literal["variant", "chemical", "disease", "gene"]
15 |
16 |
17 | class EntityRequest(BaseModel):
18 | concept: Concept | None = None
19 | query: str
20 | limit: int = Field(default=1, ge=1, le=100)
21 |
22 |
23 | class Entity(BaseModel):
24 | entity_id: str = Field(
25 | alias="_id",
26 | examples=["@GENE_BRAF"],
27 | description="Text-based entity following @<biotype>_<n> format.",
28 | )
29 | concept: Concept = Field(
30 | ...,
31 | alias="biotype",
32 | description="Entity label or concept type.",
33 | )
34 | name: str = Field(
35 | ...,
36 | description="Preferred term of entity concept.",
37 | examples=[
38 | "BRAF",
39 | "Adenocarcinoma of Lung",
40 | "Osimertinib",
41 | "EGFR L858R",
42 | ],
43 | )
44 | match: str | None = Field(
45 | default=None,
46 | description="Reason for the entity match.",
47 | examples=["Multiple matches", "Matched on name <m>NAME</m>"],
48 | )
49 |
50 | def __eq__(self, other) -> bool:
51 | return self.entity_id == other.entity_id
52 |
53 |
54 | class EntityList(RootModel):
55 | root: list[Entity]
56 |
57 | @property
58 | def first(self) -> Entity | None:
59 | return self.root[0] if self.root else None
60 |
61 |
62 | PUBTATOR3_AUTOCOMPLETE = f"{PUBTATOR3_BASE_URL}/entity/autocomplete/"
63 |
64 |
65 | async def autocomplete(request: EntityRequest) -> Entity | None:
66 | """Given a request of biotype and query, returns the best matching Entity.
67 | If API call fails or returns 0 results, then None is returned.
68 |
69 | Example Request:
70 | {
71 | "concept": "gene",
72 | "query": "BRAF"
73 | }
74 | Response:
75 | {
76 | "entity_id": "@GENE_BRAF",
77 | "biotype": "gene",
78 | "name": "BRAF",
79 | "match": "Matched on name <m>BRAF</m>"
80 | }
81 | """
82 | response, _ = await http_client.request_api(
83 | url=PUBTATOR3_AUTOCOMPLETE,
84 | request=request,
85 | response_model_type=EntityList,
86 | domain="pubmed",
87 | )
88 | return response.first if response else None
89 |
```
--------------------------------------------------------------------------------
/docs/reference/visual-architecture.md:
--------------------------------------------------------------------------------
```markdown
1 | # Visual Architecture Guide
2 |
3 | ## System Architecture
4 |
5 | BioMCP follows a clean architecture pattern with three main layers:
6 |
7 | ### 1. User Interface Layer
8 |
9 | - **biomcp CLI**: Command-line interface for direct usage
10 | - **Claude Desktop**: AI assistant integration via MCP
11 | - **Python SDK**: Programmatic access for custom applications
12 |
13 | ### 2. BioMCP Core Layer
14 |
15 | - **MCP Server**: Handles Model Context Protocol communication
16 | - **Cache System**: Smart caching for API responses
17 | - **Router**: Unified query routing across data sources
18 |
19 | ### 3. Data Source Layer
20 |
21 | - **PubMed/PubTator3**: Biomedical literature and annotations
22 | - **ClinicalTrials.gov**: Clinical trial registry
23 | - **MyVariant.info**: Genetic variant database
24 | - **cBioPortal**: Cancer genomics data
25 | - **NCI CTS API**: National Cancer Institute trial data
26 | - **BioThings APIs**: Gene, drug, and disease information
27 |
28 | ## Data Flow
29 |
30 | 1. **Request Processing**:
31 |
32 | - User sends query via CLI, Claude, or SDK
33 | - BioMCP server receives and validates request
34 | - Router determines appropriate data source(s)
35 |
36 | 2. **Caching Strategy**:
37 |
38 | - Check cache for existing results
39 | - If cache miss, fetch from external API
40 | - Store results with appropriate TTL
41 | - Return formatted results to user
42 |
43 | 3. **Response Formatting**:
44 | - Raw API data is normalized
45 | - Domain-specific enrichment applied
46 | - Results formatted for consumption
47 |
48 | ## Architecture References
49 |
50 | - [Detailed Architecture Diagrams](architecture-diagrams.md)
51 | - [Quick Architecture Reference](quick-architecture.md)
52 |
53 | ## Key Architecture Patterns
54 |
55 | ### Domain Separation
56 |
57 | Each data source has its own module with dedicated:
58 |
59 | - Search functions
60 | - Result parsers
61 | - Error handlers
62 | - Cache strategies
63 |
64 | ### Unified Interface
65 |
66 | All domains expose consistent methods:
67 |
68 | - `search()`: Query for multiple results
69 | - `fetch()`: Get detailed record by ID
70 | - Common parameter names across domains
71 |
72 | ### Smart Caching
73 |
74 | - API responses cached 15-30 minutes
75 | - Cache keys include query parameters
76 | - Automatic cache invalidation on errors
77 | - Per-domain cache configuration
78 |
79 | ### Error Resilience
80 |
81 | - Graceful degradation when APIs unavailable
82 | - Specific error messages for troubleshooting
83 | - Automatic retries with exponential backoff
84 | - Fallback to cached data when possible
85 |
```
--------------------------------------------------------------------------------
/docs/faq-condensed.md:
--------------------------------------------------------------------------------
```markdown
1 | # FAQ - Quick Answers
2 |
3 | ## Getting Started
4 |
5 | **Q: What is BioMCP?**
6 | A: A unified interface to biomedical databases (PubMed, ClinicalTrials.gov, MyVariant, etc.) for researchers and AI assistants.
7 |
8 | **Q: Do I need API keys?**
9 | A: No for basic use. Yes for: NCI trials (cancer-specific), AlphaGenome (variant predictions), enhanced cBioPortal features.
10 |
11 | **Q: How do I install it?**
12 | A: `uv tool install biomcp` (recommended) or `pip install biomcp-python`
13 |
14 | ## Common Issues
15 |
16 | **Q: "Command not found" after installation**
17 | A: Restart terminal, or use full path: `~/.local/bin/biomcp`
18 |
19 | **Q: No results for gene search**
20 | A: Use official symbols (ERBB2 not HER2). Check at [genenames.org](https://www.genenames.org)
21 |
22 | **Q: Location search not working**
23 | A: Must provide coordinates: `--latitude 42.3601 --longitude -71.0589`
24 |
25 | **Q: Why does the AI use 'think' first?**
26 | A: Required for systematic analysis. Improves search quality and prevents missed connections.
27 |
28 | ## Search Tips
29 |
30 | **Q: How to search variant notations?**
31 | A: Use OR syntax: `--keyword "V600E|p.V600E|c.1799T>A"`
32 |
33 | **Q: Include/exclude preprints?**
34 | A: Included by default. Use `--no-preprints` to exclude.
35 |
36 | **Q: Search multiple databases?**
37 | A: Use unified search: `search(query="gene:BRAF AND disease:melanoma")`
38 |
39 | ## Data Questions
40 |
41 | **Q: How current is the data?**
42 | A: Daily updates for PubMed/trials, weekly for BioThings, varies for cBioPortal.
43 |
44 | **Q: ClinicalTrials.gov vs NCI?**
45 | A: CT.gov = comprehensive, NCI = cancer-focused with biomarker filters (needs API key).
46 |
47 | **Q: What's MSI/TMB/VAF?**
48 | A: MSI = Microsatellite Instability, TMB = Tumor Mutational Burden, VAF = Variant Allele Frequency
49 |
50 | ## Technical
51 |
52 | **Q: Rate limits?**
53 | A: ~3 req/sec without keys, higher with keys. NCI = 1000/day with key.
54 |
55 | **Q: Cache issues?**
56 | A: Clear with: `rm -rf ~/.biomcp/cache`
57 |
58 | **Q: Which Python version?**
59 | A: 3.10+ required
60 |
61 | ## Quick References
62 |
63 | **Common Gene Aliases:**
64 |
65 | - HER2 → ERBB2
66 | - PD-L1 → CD274
67 | - c-MET → MET
68 |
69 | **City Coordinates:**
70 |
71 | - NYC: 40.7128, -74.0060
72 | - Boston: 42.3601, -71.0589
73 | - LA: 34.0522, -118.2437
74 |
75 | **Trial Status:**
76 |
77 | - RECRUITING = Currently enrolling
78 | - ACTIVE_NOT_RECRUITING = Ongoing
79 | - COMPLETED = Finished
80 |
81 | ## Getting Help
82 |
83 | 1. Check this FAQ
84 | 2. Read [Troubleshooting](troubleshooting.md)
85 | 3. Search [GitHub Issues](https://github.com/genomoncology/biomcp/issues)
86 | 4. Ask with version info: `biomcp --version`
87 |
```
--------------------------------------------------------------------------------
/src/biomcp/variants/filters.py:
--------------------------------------------------------------------------------
```python
1 | """Module for filtering variant data based on paths."""
2 |
3 | from typing import Any
4 |
5 |
6 | def _get_nested_value(data: dict[str, Any], path: str) -> Any:
7 | """Get a nested value from a dictionary using dot notation path."""
8 | keys = path.split(".")
9 | current = data
10 | for key in keys[:-1]:
11 | if not isinstance(current, dict) or key not in current:
12 | return None
13 | current = current[key]
14 | return current
15 |
16 |
17 | def _delete_nested_path(data: dict[str, Any], path: str) -> None:
18 | """Delete a nested path from a dictionary using dot notation."""
19 | keys = path.split(".")
20 | current = data
21 | for key in keys[:-1]:
22 | if not isinstance(current, dict) or key not in current:
23 | return
24 | current = current[key]
25 |
26 | if isinstance(current, dict) and keys[-1] in current:
27 | del current[keys[-1]]
28 |
29 |
30 | def _deep_copy_dict(data: dict[str, Any]) -> dict[str, Any]:
31 | """Create a deep copy of a dictionary, handling nested dicts and lists."""
32 | result: dict[str, Any] = {}
33 | for key, value in data.items():
34 | if isinstance(value, dict):
35 | result[key] = _deep_copy_dict(value)
36 | elif isinstance(value, list):
37 | result[key] = [
38 | _deep_copy_dict(item) if isinstance(item, dict) else item
39 | for item in value
40 | ]
41 | else:
42 | result[key] = value
43 | return result
44 |
45 |
46 | def filter_variants(variants: list[dict[str, Any]]) -> list[dict[str, Any]]:
47 | """
48 | Filter out specified paths from variant data.
49 |
50 | Args:
51 | variants: List of variant dictionaries from MyVariant.info API
52 |
53 | Returns:
54 | List of variant dictionaries with specified paths removed
55 | """
56 | # Create a deep copy to avoid modifying the input
57 | filtered_variants = []
58 | for variant in variants:
59 | # Create a deep copy of the variant
60 | filtered_variant = _deep_copy_dict(variant)
61 |
62 | # Remove specified paths
63 | for path in PATH_FILTERS:
64 | _delete_nested_path(filtered_variant, path)
65 |
66 | filtered_variants.append(filtered_variant)
67 |
68 | return filtered_variants
69 |
70 |
71 | PATH_FILTERS = [
72 | "civic.contributors",
73 | "civic.molecularProfiles",
74 | "dbsnp.gene.rnas",
75 | "dbnsfp.clinvar", # duplicate of root-level clinvar
76 | "civic.lastAcceptedRevisionEvent",
77 | "civic.lastSubmittedRevisionEvent",
78 | "civic.creationActivity",
79 | ]
80 |
```
--------------------------------------------------------------------------------
/.github/workflows/on-release-main.yml:
--------------------------------------------------------------------------------
```yaml
1 | name: release-main
2 |
3 | on:
4 | release:
5 | types: [published]
6 | branches: [main]
7 |
8 | jobs:
9 | set-version:
10 | runs-on: ubuntu-24.04
11 | steps:
12 | - uses: actions/checkout@v6
13 |
14 | - name: Export tag
15 | id: vars
16 | run: echo tag=${GITHUB_REF#refs/*/} >> $GITHUB_OUTPUT
17 | if: ${{ github.event_name == 'release' }}
18 |
19 | - name: Update project version
20 | run: |
21 | sed -i "s/^version = \".*\"/version = \"$RELEASE_VERSION\"/" pyproject.toml
22 | env:
23 | RELEASE_VERSION: ${{ steps.vars.outputs.tag }}
24 | if: ${{ github.event_name == 'release' }}
25 |
26 | - name: Upload updated pyproject.toml
27 | uses: actions/upload-artifact@v6
28 | with:
29 | name: pyproject-toml
30 | path: pyproject.toml
31 |
32 | test:
33 | runs-on: ubuntu-latest
34 | needs: [set-version]
35 | steps:
36 | - name: Check out
37 | uses: actions/checkout@v6
38 |
39 | - name: Download updated pyproject.toml
40 | uses: actions/download-artifact@v7
41 | with:
42 | name: pyproject-toml
43 |
44 | - name: Set up Python
45 | uses: actions/setup-python@v6
46 | with:
47 | python-version: "3.12"
48 |
49 | - name: Install uv
50 | uses: astral-sh/setup-uv@v7
51 | with:
52 | version: "0.4.29"
53 |
54 | - name: Install dependencies
55 | run: uv sync --group dev
56 |
57 | - name: Run tests
58 | run: uv run python -m pytest tests --cov --cov-config=pyproject.toml --cov-report=xml
59 |
60 | publish:
61 | runs-on: ubuntu-latest
62 | needs: [set-version, test]
63 | permissions:
64 | id-token: write
65 | environment: release
66 | steps:
67 | - name: Check out
68 | uses: actions/checkout@v6
69 |
70 | - name: Set up the environment
71 | uses: ./.github/actions/setup-python-env
72 |
73 | - name: Download updated pyproject.toml
74 | uses: actions/download-artifact@v7
75 | with:
76 | name: pyproject-toml
77 |
78 | - name: Build package
79 | run: uvx --from build pyproject-build --installer uv
80 |
81 | - name: Check package
82 | run: uvx twine check dist/*
83 |
84 | - name: Publish package
85 | uses: pypa/gh-action-pypi-publish@release/v1
86 | with:
87 | verbose: true
88 |
89 | deploy-docs:
90 | needs: publish
91 | runs-on: ubuntu-latest
92 | steps:
93 | - name: Check out
94 | uses: actions/checkout@v6
95 |
96 | - name: Set up the environment
97 | uses: ./.github/actions/setup-python-env
98 |
99 | - name: Deploy documentation
100 | run: uv run mkdocs gh-deploy --force
101 |
```
--------------------------------------------------------------------------------
/tests/data/openfda/drugsfda_detail.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "meta": {
3 | "results": {
4 | "skip": 0,
5 | "limit": 1,
6 | "total": 1
7 | }
8 | },
9 | "results": [
10 | {
11 | "application_number": "BLA125514",
12 | "sponsor_name": "MERCK SHARP DOHME",
13 | "openfda": {
14 | "application_number": ["BLA125514"],
15 | "brand_name": ["KEYTRUDA"],
16 | "generic_name": ["PEMBROLIZUMAB"],
17 | "manufacturer_name": ["Merck Sharp & Dohme Corp."],
18 | "substance_name": ["PEMBROLIZUMAB"],
19 | "product_ndc": ["0006-3026-02", "0006-3029-02"],
20 | "spl_set_id": ["c0e2de11-29e0-48a1-92f0-d9cb4dd56b15"],
21 | "unii": ["DPT0O3T46P"]
22 | },
23 | "products": [
24 | {
25 | "product_number": "001",
26 | "reference_drug": "Yes",
27 | "brand_name": "KEYTRUDA",
28 | "active_ingredients": [
29 | {
30 | "name": "PEMBROLIZUMAB",
31 | "strength": "100MG/4ML"
32 | }
33 | ],
34 | "reference_standard": "Yes",
35 | "dosage_form": "INJECTION, SOLUTION",
36 | "route": "INTRAVENOUS",
37 | "marketing_status": "Prescription"
38 | },
39 | {
40 | "product_number": "002",
41 | "reference_drug": "Yes",
42 | "brand_name": "KEYTRUDA",
43 | "active_ingredients": [
44 | {
45 | "name": "PEMBROLIZUMAB",
46 | "strength": "50MG/VIAL"
47 | }
48 | ],
49 | "reference_standard": "Yes",
50 | "dosage_form": "INJECTION, POWDER, LYOPHILIZED, FOR SOLUTION",
51 | "route": "INTRAVENOUS",
52 | "marketing_status": "Prescription"
53 | }
54 | ],
55 | "submissions": [
56 | {
57 | "submission_type": "BLA",
58 | "submission_number": "125514",
59 | "submission_status": "AP",
60 | "submission_status_date": "20140904",
61 | "review_priority": "P",
62 | "submission_class_code": "BLA",
63 | "submission_class_code_description": "Biologic License Application",
64 | "application_docs": [
65 | {
66 | "id": "52674",
67 | "url": "https://www.accessdata.fda.gov/drugsatfda_docs/label/2014/125514lbl.pdf",
68 | "date": "20140905",
69 | "type": "Label"
70 | }
71 | ]
72 | },
73 | {
74 | "submission_type": "SUPPL",
75 | "submission_number": "109",
76 | "submission_status": "AP",
77 | "submission_status_date": "20230316",
78 | "submission_class_code": "SUPPL",
79 | "submission_class_code_description": "Supplement"
80 | }
81 | ]
82 | }
83 | ]
84 | }
85 |
```
--------------------------------------------------------------------------------
/src/biomcp/exceptions.py:
--------------------------------------------------------------------------------
```python
1 | """Custom exceptions for BioMCP."""
2 |
3 | from typing import Any
4 |
5 |
6 | class BioMCPError(Exception):
7 | """Base exception for all BioMCP errors."""
8 |
9 | def __init__(self, message: str, details: dict[str, Any] | None = None):
10 | super().__init__(message)
11 | self.message = message
12 | self.details = details or {}
13 |
14 |
15 | class BioMCPSearchError(BioMCPError):
16 | """Base exception for search-related errors."""
17 |
18 | pass
19 |
20 |
21 | class InvalidDomainError(BioMCPSearchError):
22 | """Raised when an invalid domain is specified."""
23 |
24 | def __init__(self, domain: str, valid_domains: list[str]):
25 | message = f"Unknown domain: {domain}. Valid domains are: {', '.join(valid_domains)}"
26 | super().__init__(
27 | message, {"domain": domain, "valid_domains": valid_domains}
28 | )
29 |
30 |
31 | class InvalidParameterError(BioMCPSearchError):
32 | """Raised when invalid parameters are provided."""
33 |
34 | def __init__(self, parameter: str, value: Any, expected: str):
35 | message = f"Invalid value for parameter '{parameter}': {value}. Expected: {expected}"
36 | super().__init__(
37 | message,
38 | {"parameter": parameter, "value": value, "expected": expected},
39 | )
40 |
41 |
42 | class SearchExecutionError(BioMCPSearchError):
43 | """Raised when a search fails to execute."""
44 |
45 | def __init__(self, domain: str, error: Exception):
46 | message = f"Failed to execute search for domain '{domain}': {error!s}"
47 | super().__init__(
48 | message, {"domain": domain, "original_error": str(error)}
49 | )
50 |
51 |
52 | class ResultParsingError(BioMCPSearchError):
53 | """Raised when results cannot be parsed."""
54 |
55 | def __init__(self, domain: str, error: Exception):
56 | message = f"Failed to parse results for domain '{domain}': {error!s}"
57 | super().__init__(
58 | message, {"domain": domain, "original_error": str(error)}
59 | )
60 |
61 |
62 | class QueryParsingError(BioMCPError):
63 | """Raised when a query cannot be parsed."""
64 |
65 | def __init__(self, query: str, error: Exception):
66 | message = f"Failed to parse query '{query}': {error!s}"
67 | super().__init__(
68 | message, {"query": query, "original_error": str(error)}
69 | )
70 |
71 |
72 | class ThinkingError(BioMCPError):
73 | """Raised when sequential thinking encounters an error."""
74 |
75 | def __init__(self, thought_number: int, error: str):
76 | message = f"Error in thought {thought_number}: {error}"
77 | super().__init__(
78 | message, {"thought_number": thought_number, "error": error}
79 | )
80 |
```
--------------------------------------------------------------------------------
/docs/stylesheets/announcement.css:
--------------------------------------------------------------------------------
```css
1 | /* Announcement Banner Styles */
2 | .announcement-banner {
3 | background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
4 | border-radius: 12px;
5 | padding: 2rem;
6 | margin: 2rem 0;
7 | box-shadow: 0 10px 30px rgba(0, 0, 0, 0.15);
8 | position: relative;
9 | overflow: hidden;
10 | }
11 |
12 | .announcement-banner::before {
13 | content: "";
14 | position: absolute;
15 | top: -50%;
16 | right: -50%;
17 | width: 200%;
18 | height: 200%;
19 | background: radial-gradient(
20 | circle,
21 | rgba(255, 255, 255, 0.1) 0%,
22 | transparent 70%
23 | );
24 | animation: shimmer 3s infinite;
25 | }
26 |
27 | @keyframes shimmer {
28 | 0% {
29 | transform: rotate(0deg);
30 | }
31 | 100% {
32 | transform: rotate(360deg);
33 | }
34 | }
35 |
36 | .announcement-content {
37 | position: relative;
38 | z-index: 1;
39 | }
40 |
41 | .announcement-banner h2 {
42 | color: white !important;
43 | margin-top: 0 !important;
44 | font-size: 1.8rem;
45 | display: flex;
46 | align-items: center;
47 | gap: 0.5rem;
48 | }
49 |
50 | .announcement-banner .badge-new {
51 | background: #ff6b6b;
52 | color: white;
53 | padding: 0.2rem 0.6rem;
54 | border-radius: 20px;
55 | font-size: 0.8rem;
56 | font-weight: bold;
57 | animation: pulse 2s infinite;
58 | }
59 |
60 | @keyframes pulse {
61 | 0%,
62 | 100% {
63 | transform: scale(1);
64 | }
65 | 50% {
66 | transform: scale(1.05);
67 | }
68 | }
69 |
70 | .announcement-banner p {
71 | color: rgba(255, 255, 255, 0.95) !important;
72 | font-size: 1.1rem;
73 | margin: 1rem 0;
74 | }
75 |
76 | .announcement-banner .announcement-features {
77 | display: grid;
78 | grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
79 | gap: 1rem;
80 | margin: 1.5rem 0;
81 | }
82 |
83 | .announcement-banner .feature-item {
84 | background: rgba(255, 255, 255, 0.1);
85 | padding: 0.8rem;
86 | border-radius: 8px;
87 | backdrop-filter: blur(10px);
88 | border: 1px solid rgba(255, 255, 255, 0.2);
89 | }
90 |
91 | .announcement-banner .feature-item strong {
92 | color: white;
93 | display: block;
94 | margin-bottom: 0.3rem;
95 | }
96 |
97 | .announcement-banner .feature-item span {
98 | color: rgba(255, 255, 255, 0.85);
99 | font-size: 0.9rem;
100 | }
101 |
102 | .announcement-banner .cta-button {
103 | display: inline-block;
104 | background: white;
105 | color: #667eea !important;
106 | padding: 0.8rem 2rem;
107 | border-radius: 50px;
108 | text-decoration: none !important;
109 | font-weight: bold;
110 | margin-top: 1rem;
111 | transition: all 0.3s ease;
112 | box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
113 | }
114 |
115 | .announcement-banner .cta-button:hover {
116 | transform: translateY(-2px);
117 | box-shadow: 0 6px 20px rgba(0, 0, 0, 0.25);
118 | background: #f8f9fa;
119 | }
120 |
121 | .announcement-banner .cta-button::after {
122 | content: " →";
123 | font-size: 1.2rem;
124 | transition: transform 0.3s ease;
125 | display: inline-block;
126 | }
127 |
128 | .announcement-banner .cta-button:hover::after {
129 | transform: translateX(5px);
130 | }
131 |
```
--------------------------------------------------------------------------------
/tests/integration/test_simple.py:
--------------------------------------------------------------------------------
```python
1 | """Simple test to verify APIs work without Mastermind key."""
2 |
3 | import asyncio
4 |
5 | from biomcp.articles.preprints import EuropePMCClient
6 | from biomcp.variants.external import ExternalVariantAggregator
7 |
8 |
9 | async def test_preprints():
10 | """Test that preprint search works."""
11 | print("Testing Europe PMC preprint search...")
12 | client = EuropePMCClient()
13 |
14 | # Search for a common term
15 | results = await client.search("cancer")
16 |
17 | if results:
18 | print(f"✓ Found {len(results)} preprints")
19 | print(f" First: {results[0].title[:60]}...")
20 | return True
21 | else:
22 | print("✗ No results found")
23 | return False
24 |
25 |
26 | async def test_variants_without_mastermind():
27 | """Test variant aggregator without Mastermind API key."""
28 | print("\nTesting variant aggregator without Mastermind key...")
29 |
30 | # Create aggregator
31 | aggregator = ExternalVariantAggregator()
32 |
33 | # Test with a variant - even if individual sources fail,
34 | # the aggregator should handle it gracefully
35 | result = await aggregator.get_enhanced_annotations(
36 | "BRAF V600E", include_tcga=True, include_1000g=True
37 | )
38 |
39 | print("✓ Aggregator completed without errors")
40 | print(f" Variant ID: {result.variant_id}")
41 | print(f" TCGA data: {'Found' if result.tcga else 'Not found'}")
42 | print(
43 | f" 1000G data: {'Found' if result.thousand_genomes else 'Not found'}"
44 | )
45 | print(
46 | f" Errors: {result.error_sources if result.error_sources else 'None'}"
47 | )
48 |
49 | # Key test: aggregator should complete successfully
50 | if True: # Always passes now without Mastermind
51 | print("✓ Mastermind correctly skipped without API key")
52 | return True
53 | else:
54 | print("✗ Mastermind handling incorrect")
55 | return False
56 |
57 |
58 | async def main():
59 | """Run all tests."""
60 | print("=" * 60)
61 | print("Testing BioMCP features without external API keys")
62 | print("=" * 60)
63 |
64 | # Test preprints
65 | preprint_ok = await test_preprints()
66 |
67 | # Test variants
68 | variant_ok = await test_variants_without_mastermind()
69 |
70 | print("\n" + "=" * 60)
71 | print("Summary:")
72 | print(f" Preprint search: {'✓ PASS' if preprint_ok else '✗ FAIL'}")
73 | print(f" Variant aggregator: {'✓ PASS' if variant_ok else '✗ FAIL'}")
74 | print("=" * 60)
75 |
76 | if preprint_ok and variant_ok:
77 | print("\n✓ All features work without external API keys!")
78 | return 0
79 | else:
80 | print("\n✗ Some features failed")
81 | return 1
82 |
83 |
84 | if __name__ == "__main__":
85 | exit_code = asyncio.run(main())
86 | exit(exit_code)
87 |
```
--------------------------------------------------------------------------------
/tests/tdd/variants/test_links.py:
--------------------------------------------------------------------------------
```python
1 | """Tests for the links module."""
2 |
3 | import json
4 | import os
5 | from typing import Any
6 |
7 | import pytest
8 |
9 | from biomcp.variants.links import inject_links
10 |
11 |
12 | @pytest.fixture
13 | def braf_variants() -> list[dict[str, Any]]:
14 | """Load BRAF V600 test data."""
15 | test_data_path = os.path.join(
16 | os.path.dirname(__file__),
17 | "../../data/myvariant/variants_part_braf_v600_multiple.json",
18 | )
19 | with open(test_data_path) as f:
20 | return json.load(f)
21 |
22 |
23 | def test_inject_links_braf_variants(braf_variants):
24 | """Test URL injection for BRAF variants data."""
25 | result = inject_links(braf_variants)
26 |
27 | # Test first variant (no CIViC)
28 | variant0 = result[0]
29 | assert (
30 | variant0["dbsnp"]["url"]
31 | == f"https://www.ncbi.nlm.nih.gov/snp/{variant0['dbsnp']['rsid']}"
32 | )
33 | assert (
34 | variant0["clinvar"]["url"]
35 | == f"https://www.ncbi.nlm.nih.gov/clinvar/variation/{variant0['clinvar']['variant_id']}/"
36 | )
37 | assert (
38 | variant0["cosmic"]["url"]
39 | == f"https://cancer.sanger.ac.uk/cosmic/mutation/overview?id={variant0['cosmic']['cosmic_id']}"
40 | )
41 | assert "civic" not in variant0 or "url" not in variant0["civic"]
42 | assert (
43 | variant0["url"]["ensembl"]
44 | == f"https://ensembl.org/Homo_sapiens/Variation/Explore?v={variant0['dbsnp']['rsid']}"
45 | )
46 | assert variant0["url"]["ucsc_genome_browser"].startswith(
47 | "https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position=chr7:"
48 | )
49 | assert (
50 | variant0["url"]["hgnc"]
51 | == "https://www.genenames.org/data/gene-symbol-report/#!/symbol/BRAF"
52 | )
53 |
54 | # Test second variant (with CIViC)
55 | variant1 = result[1]
56 | assert (
57 | variant1["civic"]["url"]
58 | == f"https://civicdb.org/variants/{variant1['civic']['id']}/summary"
59 | )
60 |
61 | # Test empty list
62 | assert inject_links([]) == []
63 |
64 | # Test insertion (no REF)
65 | insertion = {
66 | "chrom": "7",
67 | "vcf": {"position": "123", "alt": "A"},
68 | "dbnsfp": {"genename": "GENE1"},
69 | }
70 | result = inject_links([insertion])[0]
71 | assert (
72 | result["url"]["ucsc_genome_browser"]
73 | == "https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position=chr7:123-124"
74 | )
75 |
76 | # Test deletion (no ALT)
77 | deletion = {
78 | "chrom": "7",
79 | "vcf": {"position": "123", "ref": "AAA"},
80 | "dbnsfp": {"genename": "GENE1"},
81 | }
82 | result = inject_links([deletion])[0]
83 | assert (
84 | result["url"]["ucsc_genome_browser"]
85 | == "https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position=chr7:123-126"
86 | )
87 |
```
--------------------------------------------------------------------------------
/docs/genomoncology.md:
--------------------------------------------------------------------------------
```markdown
1 | # **GenomOncology: Powering the Future of Precision Medicine**
2 |
3 | ## **Who We Are**
4 |
5 | GenomOncology is a leading healthcare technology company dedicated to transforming precision medicine through innovative genomic analysis solutions. We connect complex genomic data and actionable clinical insights, enabling healthcare providers to deliver personalized treatment strategies for cancer patients.
6 |
7 | ## **Our Commitment to Open Healthcare**
8 |
9 | We believe in the power of open source systems to further the impact of precision medicine. Through the BioMCP initiative we hope to engage the healthcare community in this open-access ecosystem designed to accelerate innovation in precision medicine. By evolving this open framework, we're moving to create a more collaborative, efficient, and transparent healthcare environment.
10 |
11 | ## **Our Precision Oncology Platform**
12 |
13 | Today, our proprietary knowledge management system, known as the Precision Oncology Platform (POP), serves as the backbone of our solutions, continuously aggregating and curating the latest genomic research, clinical trials, and treatment guidelines. This system:
14 |
15 | - Processes and harmonizes data from 40+ scientific and clinical sources
16 | - Updates weekly to incorporate the newest research findings
17 | - Utilizes advanced NLP to extract meaningful insights from unstructured text
18 | - Maintains a comprehensive database of 25,000+ variant-drug associations
19 |
20 | ## **Real-World Impact**
21 |
22 | Our technology currently powers precision medicine programs at:
23 |
24 | - 120+ hospitals and cancer centers
25 | - 15 academic medical centers
26 | - 8 commercial reference laboratories
27 | - 10+ pharmaceutical research programs
28 |
29 | Processing over 100,000 genomic profiles monthly, our solutions have helped match thousands of patients to targeted therapies and clinical trials, significantly improving outcomes.
30 |
31 | ## **Join Us In The Next Phase of Transforming Healthcare**
32 |
33 | By contributing to the BioMCP ecosystem, we're inviting developers to collaborate with us in creating the next generation of precision medicine tools. Whether you're looking to build applications that leverage genomic data, create integrations with existing healthcare systems, or explore novel approaches to biomarker analysis, GenomOncology provides the technological foundation to bring your ideas to life.
34 |
35 | ## **Get Started**
36 |
37 | Ready to explore what's possible with GenomOncology and BioMCP?
38 |
39 | - Clone our repositories on GitHub
40 | - Register for API access
41 | - Join our developer community
42 |
43 | Together, we can accelerate precision medicine through open collaboration and innovation.
44 |
45 | ---
46 |
47 | _GenomOncology: Transforming data into treatment decisions_
48 |
```
--------------------------------------------------------------------------------
/src/biomcp/cli/biomarkers.py:
--------------------------------------------------------------------------------
```python
1 | """CLI commands for biomarker search."""
2 |
3 | import asyncio
4 | from typing import Annotated
5 |
6 | import typer
7 |
8 | from ..biomarkers import search_biomarkers
9 | from ..biomarkers.search import format_biomarker_results
10 | from ..integrations.cts_api import CTSAPIError, get_api_key_instructions
11 |
12 | biomarker_app = typer.Typer(
13 | no_args_is_help=True,
14 | help="Search biomarkers used in clinical trial eligibility criteria",
15 | )
16 |
17 |
18 | @biomarker_app.command("search")
19 | def search_biomarkers_cli(
20 | name: Annotated[
21 | str | None,
22 | typer.Argument(
23 | help="Biomarker name to search for (e.g., 'PD-L1', 'EGFR mutation')"
24 | ),
25 | ] = None,
26 | biomarker_type: Annotated[
27 | str | None,
28 | typer.Option(
29 | "--type",
30 | help="Type of biomarker ('reference_gene' or 'branch')",
31 | ),
32 | ] = None,
33 | page_size: Annotated[
34 | int,
35 | typer.Option(
36 | "--page-size",
37 | help="Number of results per page",
38 | min=1,
39 | max=100,
40 | ),
41 | ] = 20,
42 | page: Annotated[
43 | int,
44 | typer.Option(
45 | "--page",
46 | help="Page number",
47 | min=1,
48 | ),
49 | ] = 1,
50 | api_key: Annotated[
51 | str | None,
52 | typer.Option(
53 | "--api-key",
54 | help="NCI API key (overrides NCI_API_KEY env var)",
55 | envvar="NCI_API_KEY",
56 | ),
57 | ] = None,
58 | ) -> None:
59 | """
60 | Search for biomarkers used in clinical trial eligibility criteria.
61 |
62 | Note: Biomarker data availability may be limited in CTRP. Results focus on
63 | biomarkers referenced in trial eligibility criteria. For detailed variant
64 | annotations, use 'biomcp variant search' with MyVariant.info.
65 |
66 | Examples:
67 | # Search by biomarker name
68 | biomcp biomarker search "PD-L1"
69 |
70 | # Search by type
71 | biomcp biomarker search --type reference_gene
72 |
73 | # Search for specific biomarker
74 | biomcp biomarker search "EGFR mutation"
75 | """
76 | try:
77 | results = asyncio.run(
78 | search_biomarkers(
79 | name=name,
80 | biomarker_type=biomarker_type,
81 | page_size=page_size,
82 | page=page,
83 | api_key=api_key,
84 | )
85 | )
86 |
87 | output = format_biomarker_results(results)
88 | typer.echo(output)
89 |
90 | except CTSAPIError as e:
91 | if "API key required" in str(e):
92 | typer.echo(get_api_key_instructions())
93 | else:
94 | typer.echo(f"Error: {e}", err=True)
95 | raise typer.Exit(1) from e
96 | except Exception as e:
97 | typer.echo(f"Unexpected error: {e}", err=True)
98 | raise typer.Exit(1) from e
99 |
```
--------------------------------------------------------------------------------
/src/biomcp/openfda/constants.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Constants for OpenFDA API integration.
3 | """
4 |
5 | # OpenFDA API Base
6 | OPENFDA_BASE_URL = "https://api.fda.gov"
7 |
8 | # Drug endpoints
9 | OPENFDA_DRUG_EVENTS_URL = f"{OPENFDA_BASE_URL}/drug/event.json"
10 | OPENFDA_DRUG_LABELS_URL = f"{OPENFDA_BASE_URL}/drug/label.json"
11 | OPENFDA_DRUG_ENFORCEMENT_URL = f"{OPENFDA_BASE_URL}/drug/enforcement.json"
12 | OPENFDA_DRUGSFDA_URL = f"{OPENFDA_BASE_URL}/drug/drugsfda.json"
13 |
14 | # Device endpoints
15 | OPENFDA_DEVICE_EVENTS_URL = f"{OPENFDA_BASE_URL}/device/event.json"
16 | OPENFDA_DEVICE_CLASSIFICATION_URL = (
17 | f"{OPENFDA_BASE_URL}/device/classification.json"
18 | )
19 | OPENFDA_DEVICE_RECALL_URL = f"{OPENFDA_BASE_URL}/device/recall.json"
20 |
21 | # API limits
22 | OPENFDA_DEFAULT_LIMIT = 25
23 | OPENFDA_MAX_LIMIT = 100
24 | OPENFDA_RATE_LIMIT_NO_KEY = 40 # requests per minute without key
25 | OPENFDA_RATE_LIMIT_WITH_KEY = 240 # requests per minute with key
26 |
27 | # Genomic device filters - product codes for genomic/diagnostic devices
28 | GENOMIC_DEVICE_PRODUCT_CODES = [
29 | "OOI", # Next Generation Sequencing Oncology Panel Test System
30 | "PQP", # Nucleic Acid Based In Vitro Diagnostic Devices
31 | "OYD", # Gene Mutation Detection System
32 | "NYE", # DNA Sequencer
33 | "OEO", # Hereditary or Somatic Variant Detection System
34 | "QIN", # Tumor Profiling Test
35 | "QDI", # Companion Diagnostic
36 | "PTA", # Cancer Predisposition Risk Assessment System
37 | ]
38 |
39 | # Common adverse event search fields
40 | ADVERSE_EVENT_FIELDS = [
41 | "patient.drug.medicinalproduct",
42 | "patient.drug.openfda.brand_name",
43 | "patient.drug.openfda.generic_name",
44 | "patient.drug.drugindication",
45 | "patient.reaction.reactionmeddrapt",
46 | ]
47 |
48 | # Label search fields
49 | LABEL_FIELDS = [
50 | "openfda.brand_name",
51 | "openfda.generic_name",
52 | "indications_and_usage",
53 | "boxed_warning",
54 | "warnings_and_precautions",
55 | "adverse_reactions",
56 | "drug_interactions",
57 | ]
58 |
59 | # Device event search fields
60 | DEVICE_FIELDS = [
61 | "device.brand_name",
62 | "device.generic_name",
63 | "device.manufacturer_d_name",
64 | "device.openfda.device_name",
65 | "device.openfda.medical_specialty_description",
66 | ]
67 |
68 | # Disclaimer text
69 | OPENFDA_DISCLAIMER = (
70 | "⚠️ **FDA Data Notice**: Information from openFDA API. "
71 | "Not for clinical decision-making. Adverse events don't prove causation. "
72 | "Data may be incomplete or delayed. Consult healthcare professionals and "
73 | "official FDA sources at fda.gov for medical decisions."
74 | )
75 |
76 | OPENFDA_SHORTAGE_DISCLAIMER = (
77 | "🚨 **Critical Warning**: Drug shortage information is time-sensitive. "
78 | "Always verify current availability with FDA Drug Shortages Database at "
79 | "https://www.accessdata.fda.gov/scripts/drugshortages/ before making "
80 | "supply chain or treatment decisions."
81 | )
82 |
```
--------------------------------------------------------------------------------
/tests/tdd/utils/test_rate_limiter.py:
--------------------------------------------------------------------------------
```python
1 | """Tests for rate limiting utilities."""
2 |
3 | import asyncio
4 | import time
5 |
6 | import pytest
7 |
8 | from biomcp.utils.rate_limiter import RateLimiter
9 |
10 |
11 | class TestRateLimiter:
12 | """Test rate limiting functionality."""
13 |
14 | @pytest.mark.asyncio
15 | async def test_basic_rate_limiting(self):
16 | """Test basic rate limiting behavior."""
17 | # Create limiter with 2 requests per second
18 | limiter = RateLimiter(rate=2, per_seconds=1)
19 |
20 | # First two requests should be allowed
21 | allowed1, wait1 = await limiter.check_rate_limit()
22 | assert allowed1 is True
23 | assert wait1 is None
24 |
25 | allowed2, wait2 = await limiter.check_rate_limit()
26 | assert allowed2 is True
27 | assert wait2 is None
28 |
29 | # Third request should be denied with wait time
30 | allowed3, wait3 = await limiter.check_rate_limit()
31 | assert allowed3 is False
32 | assert wait3 is not None
33 | assert wait3 > 0
34 |
35 | @pytest.mark.asyncio
36 | async def test_rate_limit_replenishment(self):
37 | """Test that tokens replenish over time."""
38 | # Create limiter with 1 request per second
39 | limiter = RateLimiter(rate=1, per_seconds=1)
40 |
41 | # Use the token
42 | allowed1, _ = await limiter.check_rate_limit()
43 | assert allowed1 is True
44 |
45 | # Should be denied immediately
46 | allowed2, wait2 = await limiter.check_rate_limit()
47 | assert allowed2 is False
48 |
49 | # Wait for replenishment
50 | await asyncio.sleep(1.1)
51 |
52 | # Should be allowed now
53 | allowed3, _ = await limiter.check_rate_limit()
54 | assert allowed3 is True
55 |
56 | @pytest.mark.asyncio
57 | async def test_multiple_keys(self):
58 | """Test rate limiting with different keys."""
59 | limiter = RateLimiter(rate=1, per_seconds=1)
60 |
61 | # Use token for key1
62 | allowed1, _ = await limiter.check_rate_limit("key1")
63 | assert allowed1 is True
64 |
65 | # key2 should still have tokens
66 | allowed2, _ = await limiter.check_rate_limit("key2")
67 | assert allowed2 is True
68 |
69 | # key1 should be limited
70 | allowed3, wait3 = await limiter.check_rate_limit("key1")
71 | assert allowed3 is False
72 | assert wait3 is not None
73 |
74 | @pytest.mark.asyncio
75 | async def test_wait_if_needed(self):
76 | """Test the wait_if_needed helper."""
77 | limiter = RateLimiter(rate=1, per_seconds=1)
78 |
79 | # First call should not wait
80 | start = time.time()
81 | await limiter.wait_if_needed()
82 | elapsed = time.time() - start
83 | assert elapsed < 0.1
84 |
85 | # Second call should wait
86 | start = time.time()
87 | await limiter.wait_if_needed()
88 | elapsed = time.time() - start
89 | assert elapsed >= 0.9 # Should wait approximately 1 second
90 |
```
--------------------------------------------------------------------------------
/src/biomcp/utils/metrics.py:
--------------------------------------------------------------------------------
```python
1 | """Metrics and monitoring utilities."""
2 |
3 | import asyncio
4 | import logging
5 | import time
6 | from collections.abc import Callable
7 | from functools import wraps
8 | from typing import Any, TypeVar, cast
9 |
10 | logger = logging.getLogger(__name__)
11 |
12 | T = TypeVar("T")
13 |
14 |
15 | def track_api_call(api_name: str):
16 | """Track API call metrics.
17 |
18 | Args:
19 | api_name: Name of the API being called
20 |
21 | Returns:
22 | Decorator function
23 | """
24 |
25 | def decorator(func: Callable[..., T]) -> Callable[..., T]:
26 | @wraps(func)
27 | async def async_wrapper(*args: Any, **kwargs: Any) -> T:
28 | start_time = time.time()
29 | try:
30 | result = await func(*args, **kwargs) # type: ignore[misc]
31 | duration = time.time() - start_time
32 | logger.info(
33 | f"{api_name} call succeeded",
34 | extra={
35 | "api": api_name,
36 | "duration": duration,
37 | "status": "success",
38 | },
39 | )
40 | return result
41 | except Exception as e:
42 | duration = time.time() - start_time
43 | logger.error(
44 | f"{api_name} call failed: {e}",
45 | extra={
46 | "api": api_name,
47 | "duration": duration,
48 | "status": "error",
49 | "error_type": type(e).__name__,
50 | },
51 | )
52 | raise
53 |
54 | @wraps(func)
55 | def sync_wrapper(*args: Any, **kwargs: Any) -> T:
56 | start_time = time.time()
57 | try:
58 | result = func(*args, **kwargs)
59 | duration = time.time() - start_time
60 | logger.info(
61 | f"{api_name} call succeeded",
62 | extra={
63 | "api": api_name,
64 | "duration": duration,
65 | "status": "success",
66 | },
67 | )
68 | return result
69 | except Exception as e:
70 | duration = time.time() - start_time
71 | logger.error(
72 | f"{api_name} call failed: {e}",
73 | extra={
74 | "api": api_name,
75 | "duration": duration,
76 | "status": "error",
77 | "error_type": type(e).__name__,
78 | },
79 | )
80 | raise
81 |
82 | # Return appropriate wrapper based on function type
83 | if asyncio.iscoroutinefunction(func):
84 | return cast(Callable[..., T], async_wrapper)
85 | else:
86 | return cast(Callable[..., T], sync_wrapper)
87 |
88 | return decorator
89 |
```
--------------------------------------------------------------------------------
/tests/tdd/trials/test_getter.py:
--------------------------------------------------------------------------------
```python
1 | from biomcp.trials.getter import Module, get_trial, modules
2 |
3 |
4 | async def test_get_protocol(anyio_backend):
5 | markdown = await get_trial("NCT04280705", Module.PROTOCOL)
6 | assert markdown.startswith("Url: https://clinicaltrials.gov/study/")
7 | assert len(markdown) > 10000 # 10370 on 2025-03-23
8 |
9 |
10 | async def test_get_locations(anyio_backend):
11 | markdown = await get_trial("NCT04280705", Module.LOCATIONS)
12 | starts_with = """Url: https://clinicaltrials.gov/study/NCT04280705
13 |
14 | # Protocol Section
15 | """
16 | assert markdown.startswith(starts_with)
17 | assert "University of California San Francisco" in markdown
18 | assert len(markdown) > 12000 # 12295 on 2025-03-23
19 |
20 |
21 | async def test_get_references(anyio_backend):
22 | markdown = await get_trial("NCT04280705", Module.REFERENCES)
23 | assert "# Protocol Section" in markdown
24 | assert "## References Module" in markdown
25 | assert len(markdown) > 0
26 |
27 |
28 | async def test_get_outcomes(anyio_backend):
29 | markdown = await get_trial("NCT04280705", Module.OUTCOMES)
30 | assert "# Protocol Section" in markdown
31 | assert (
32 | "## Outcomes Module" in markdown or "## Results Sections" in markdown
33 | )
34 | assert len(markdown) > 0
35 |
36 |
37 | async def test_invalid_nct_id(anyio_backend):
38 | markdown = await get_trial("NCT99999999")
39 | assert "NCT number NCT99999999 not found" in markdown
40 |
41 |
42 | def test_all_modules_exist():
43 | # Verify all modules are defined
44 | assert "Protocol" in modules
45 | assert "Locations" in modules
46 | assert "References" in modules
47 | assert "Outcomes" in modules
48 |
49 | # Verify protocol module contains critical sections
50 | protocol_sections = modules[Module.PROTOCOL]
51 | assert "IdentificationModule" in protocol_sections
52 | assert "StatusModule" in protocol_sections
53 | assert "DescriptionModule" in protocol_sections
54 |
55 |
56 | async def test_cli_default_module_functionality(anyio_backend):
57 | # Test directly with both explicit Protocol and None (which should use Protocol)
58 | markdown_with_protocol = await get_trial("NCT04280705", Module.PROTOCOL)
59 | assert len(markdown_with_protocol) > 10000
60 |
61 | # In a real CLI context, the default would be set at the CLI level
62 | # This test ensures the Protocol module is valid for that purpose
63 | assert "Protocol Section" in markdown_with_protocol
64 |
65 |
66 | async def test_json_output(anyio_backend):
67 | # Test JSON output format
68 | json_output = await get_trial(
69 | "NCT04280705", Module.PROTOCOL, output_json=True
70 | )
71 | assert json_output.startswith("{")
72 | assert "URL" in json_output
73 | assert "NCT04280705" in json_output
74 |
75 |
76 | async def test_error_handling_json_output(anyio_backend):
77 | # Test error handling with JSON output
78 | json_output = await get_trial(
79 | "NCT99999999", Module.PROTOCOL, output_json=True
80 | )
81 | assert "error" in json_output
82 | assert "NCT99999999" in json_output
83 |
```
--------------------------------------------------------------------------------
/wrangler.toml:
--------------------------------------------------------------------------------
```toml
1 | name = "biomcp-worker"
2 | main = "src/biomcp/workers/worker_entry_stytch.js"
3 | compatibility_date = "2025-04-28"
4 |
5 | [vars]
6 | # Environment variables for the worker
7 | # These can be overridden in several ways:
8 | # 1. In the Cloudflare dashboard under Workers & Pages > your-worker > Settings > Variables
9 | # 2. Using wrangler CLI: wrangler secret put REMOTE_MCP_SERVER_URL
10 | # 3. During local development: wrangler dev --var REMOTE_MCP_SERVER_URL="http://localhost:8000"
11 | # 4. In your CI/CD pipeline using environment variables with the format CF_REMOTE_MCP_SERVER_URL
12 | REMOTE_MCP_SERVER_URL = "http://localhost:8000" # Replace with your MCP server URL in production
13 |
14 | # Stytch OAuth Configuration
15 | # Replace these placeholder values with your actual Stytch credentials
16 | # For development, use test credentials from https://stytch.com/dashboard
17 | # For production, use production credentials and api.stytch.com instead of test.stytch.com
18 | STYTCH_PROJECT_ID = "project-test-xxxxxxxxxxxx" # Replace with your Stytch Project ID
19 | STYTCH_SECRET = "secret-test-xxxxxxxxxxxx" # Replace with your Stytch Secret (use wrangler secret for production)
20 | STYTCH_PUBLIC_TOKEN = "public-token-test-xxxxxxxxxxxx" # Replace with your Stytch Public Token
21 | STYTCH_API_URL = "https://test.stytch.com/v1" # Use https://api.stytch.com/v1 for production
22 | STYTCH_OAUTH_URL = "https://test.stytch.com/v1/public/oauth/google/start" # Update for production
23 |
24 | # Debug mode - set to true for development, false for production
25 | DEBUG = false
26 |
27 | # JWT Secret for signing tokens - use a strong, unique secret in production
28 | # For production, set this as a secret: wrangler secret put JWT_SECRET
29 | JWT_SECRET = "replace-with-a-strong-secret-key"
30 |
31 | # BigQuery variables
32 | # For production, set these as secrets or environment variables:
33 | # wrangler secret put BQ_PROJECT_ID
34 | # wrangler secret put BQ_DATASET
35 | # wrangler secret put BQ_SA_KEY_JSON
36 | BQ_PROJECT_ID = "your-project-id" # Replace with your actual project ID in production
37 | BQ_DATASET = "your_dataset_name" # Replace with your actual dataset in production
38 | BQ_TABLE="worker_logs"
39 |
40 | # Sensitive variables should be stored in the Cloudflare dashboard under Workers & Pages > your-worker > Settings > Secrets
41 | # OR you can declare them using npx wrangler secret put BQ_SA_KEY_JSON
42 | # Potential secrets:
43 | # BQ_SA_KEY_JSON
44 | # STYTCH_SECRET
45 |
46 |
47 | # Note: The ability to allow plaintext connections is now configured in the Cloudflare dashboard
48 | # under Security settings for your Worker
49 |
50 | [build]
51 | command = ""
52 |
53 | [triggers]
54 | crons = []
55 |
56 | [observability.logs]
57 | enabled = true
58 |
59 | # KV namespace for storing OAuth tokens and state
60 | # Create your KV namespace with: wrangler kv:namespace create OAUTH_KV
61 | # Then replace the ID below with your namespace ID
62 | [[kv_namespaces]]
63 | binding = "OAUTH_KV"
64 | id = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" # Replace with your KV namespace ID
65 |
```
--------------------------------------------------------------------------------
/docs/concepts/01-what-is-biomcp.md:
--------------------------------------------------------------------------------
```markdown
1 | # What is BioMCP?
2 |
3 | BioMCP is an open-source implementation of the Model Context Protocol (MCP) designed for biomedical research. It connects AI assistants to specialized biomedical databases, enabling natural language access to complex scientific data.
4 |
5 | [](https://www.youtube.com/watch?v=bKxOWrWUUhM)
6 |
7 | ## The Bridge to Biomedical Data
8 |
9 | BioMCP provides AI assistants with direct access to specialized biomedical databases that aren't available through general web search. Built on Anthropic's Model Context Protocol standard, it creates a toolbox that enables natural language queries across multiple scientific data sources.
10 |
11 | ## Connected Data Sources
12 |
13 | - **PubMed/PubTator3**: 30M+ research articles with entity recognition for genes, diseases, drugs, and variants
14 | - **ClinicalTrials.gov**: 400K+ clinical trials searchable by condition, location, phase, and eligibility
15 | - **MyVariant.info**: Comprehensive variant annotations with clinical significance
16 | - **cBioPortal**: Cancer genomics data automatically integrated with searches
17 | - **BioThings APIs**: Real-time gene, drug, and disease information
18 | - **NCI CTS API**: Enhanced cancer trial search with biomarker filtering
19 | - **AlphaGenome**: Variant effect predictions using Google DeepMind's AI
20 |
21 | ## How Does It Transform Research?
22 |
23 | What makes BioMCP particularly powerful is its conversational nature. A
24 | researcher might begin with a simple question about a disease, then naturally
25 | progress to exploring related clinical trials, and finally investigate genetic
26 | variants that affect treatment efficacy—all within a single, flowing
27 | conversation.
28 |
29 | The system remembers context throughout the interaction, allowing for natural
30 | follow-up questions and a research experience that mirrors how scientists
31 | actually work. Instead of requiring researchers to master complex query
32 | languages for each database, BioMCP translates natural language into the
33 | precise syntax each system requires.
34 |
35 | ## Why This Matters
36 |
37 | BioMCP represents a significant advancement in making specialized biomedical
38 | knowledge accessible. For researchers and clinicians, it means spending less
39 | time wrestling with complex database interfaces and more time advancing their
40 | work. For the broader field of AI in healthcare, it demonstrates how
41 | specialized knowledge domains can be made accessible through conversation.
42 |
43 | As both AI assistants (synchronous conversation partners) and AI agents (
44 | autonomous systems working toward goals over time) continue to evolve, tools
45 | like BioMCP will be essential in connecting these systems to the specialized
46 | knowledge they need to deliver meaningful insights in complex domains.
47 |
48 | By open-sourcing BioMCP, we're inviting the community to build upon this
49 | foundation, creating more powerful and accessible tools for biomedical research
50 | and ultimately accelerating the pace of scientific discovery.
51 |
```
--------------------------------------------------------------------------------
/src/biomcp/core.py:
--------------------------------------------------------------------------------
```python
1 | """Core module for BioMCP containing shared resources."""
2 |
3 | from contextlib import asynccontextmanager
4 | from enum import Enum
5 | from typing import Any
6 |
7 | from mcp.server.fastmcp import FastMCP
8 | from mcp.server.fastmcp.utilities.logging import get_logger
9 |
10 | from .logging_filter import setup_logging_filters
11 |
12 | # Set up logger first
13 | logger = get_logger(__name__)
14 |
15 | # Set up logging filters to suppress non-critical ASGI errors
16 | setup_logging_filters()
17 |
18 |
19 | # Define a lifespan function for startup tasks
20 | @asynccontextmanager
21 | async def lifespan(mcp):
22 | """Lifespan context manager for startup/shutdown tasks."""
23 | # Startup
24 | yield
25 |
26 | # Shutdown (if needed)
27 |
28 |
29 | # Initialize the MCP app with lifespan
30 | # Note: stateless_http=True is needed for proper streamable HTTP support
31 | mcp_app = FastMCP(
32 | name="BioMCP - Biomedical Model Context Protocol Server",
33 | lifespan=lifespan,
34 | stateless_http=True, # Enable stateless HTTP for streamable transport
35 | )
36 |
37 |
38 | class StrEnum(str, Enum):
39 | def __str__(self):
40 | return self.value
41 |
42 | @classmethod
43 | def _missing_(cls, value):
44 | if isinstance(value, str):
45 | for member in cls:
46 | if member.lower() == value.lower():
47 | return member
48 | m = member.lower().replace(" ", "_")
49 | v = value.lower().replace(" ", "_")
50 | if m == v:
51 | return member
52 | return None
53 |
54 |
55 | class PublicationState(StrEnum):
56 | """Publication state of an article."""
57 |
58 | PREPRINT = "preprint"
59 | PEER_REVIEWED = "peer_reviewed"
60 | UNKNOWN = "unknown"
61 |
62 |
63 | def ensure_list(value: Any, split_strings: bool = False) -> list[Any]:
64 | """
65 | Convert a value to a list if it's not already.
66 |
67 | This is particularly useful for handling inputs from LLMs that might
68 | provide comma-separated strings instead of proper lists.
69 |
70 | Args:
71 | value: The value to convert to a list
72 | split_strings: If True, splits string values by comma and strips whitespace.
73 | If False, wraps the string in a list without splitting.
74 |
75 | Returns:
76 | A list containing the value(s)
77 | - If value is None, returns an empty list
78 | - If value is a string and split_strings is True, splits by comma and strips whitespace
79 | - If value is a string and split_strings is False, wraps it in a list
80 | - If value is already a list, returns it unchanged
81 | - For other types, wraps them in a list
82 | """
83 | if value is None:
84 | return []
85 | if isinstance(value, str) and split_strings:
86 | # Split by comma and strip whitespace
87 | return [item.strip() for item in value.split(",")]
88 | if isinstance(value, list):
89 | return value
90 | # For any other type, wrap it in a list
91 | return [value]
92 |
93 |
94 | # Set httpx logger to warn level only
95 | httpx_logger = get_logger("httpx")
96 | httpx_logger.setLevel("WARN")
97 |
98 | # Set main logger level
99 | logger.setLevel("INFO")
100 |
```
--------------------------------------------------------------------------------
/tests/tdd/variants/test_alphagenome.py:
--------------------------------------------------------------------------------
```python
1 | """Tests for AlphaGenome integration."""
2 |
3 | from unittest.mock import patch
4 |
5 | import pytest
6 |
7 | from biomcp.variants.alphagenome import predict_variant_effects
8 |
9 |
10 | @pytest.mark.asyncio
11 | async def test_predict_variant_effects_no_api_key():
12 | """Test that missing API key returns helpful error message."""
13 | with patch.dict("os.environ", {}, clear=True):
14 | result = await predict_variant_effects(
15 | chromosome="chr7",
16 | position=140753336,
17 | reference="A",
18 | alternate="T",
19 | )
20 |
21 | assert "AlphaGenome API key required" in result
22 | assert "https://deepmind.google.com/science/alphagenome" in result
23 | assert "ALPHAGENOME_API_KEY" in result
24 |
25 |
26 | @pytest.mark.asyncio
27 | async def test_predict_variant_effects_not_installed():
28 | """Test that missing AlphaGenome package returns installation instructions or API error."""
29 | # Since AlphaGenome might be installed in test environments, we need to test both cases
30 | # We'll set a dummy API key and check what error we get
31 | import os
32 |
33 | original_key = os.environ.get("ALPHAGENOME_API_KEY")
34 | try:
35 | os.environ["ALPHAGENOME_API_KEY"] = "test-key"
36 |
37 | result = await predict_variant_effects(
38 | chromosome="chr7",
39 | position=140753336,
40 | reference="A",
41 | alternate="T",
42 | skip_cache=True, # Skip cache to ensure fresh results
43 | )
44 |
45 | # The function should either:
46 | # 1. Handle ImportError if AlphaGenome is not installed
47 | # 2. Return API error if AlphaGenome is installed but API key is invalid
48 | # 3. Return a prediction failure for other errors
49 | assert any([
50 | "AlphaGenome not installed" in result,
51 | "AlphaGenome prediction failed" in result,
52 | "API key not valid"
53 | in result, # This can happen with invalid test keys
54 | ])
55 |
56 | if "AlphaGenome not installed" in result:
57 | assert "git clone" in result
58 | assert "pip install" in result
59 | finally:
60 | # Restore original key
61 | if original_key is None:
62 | os.environ.pop("ALPHAGENOME_API_KEY", None)
63 | else:
64 | os.environ["ALPHAGENOME_API_KEY"] = original_key
65 |
66 |
67 | @pytest.mark.asyncio
68 | async def test_predict_variant_effects_basic_parameters():
69 | """Test that function accepts the expected parameters."""
70 | # This tests the function interface without requiring AlphaGenome
71 | with patch.dict("os.environ", {}, clear=True):
72 | # Test with all parameters
73 | result = await predict_variant_effects(
74 | chromosome="chrX",
75 | position=12345,
76 | reference="G",
77 | alternate="C",
78 | interval_size=500_000,
79 | tissue_types=["UBERON:0002367", "UBERON:0001157"],
80 | )
81 |
82 | # Should get API key error (not import error), proving parameters were accepted
83 | assert "AlphaGenome API key required" in result
84 |
```
--------------------------------------------------------------------------------
/example_scripts/mcp_integration.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env -S uv --quiet run --script
2 | # /// script
3 | # requires-python = ">=3.11"
4 | # dependencies = [
5 | # "mcp",
6 | # ]
7 | # ///
8 |
9 | # Scripts to reproduce this page:
10 | # https://biomcp.org/mcp_integration/
11 |
12 | import asyncio
13 |
14 | from mcp.client.session import ClientSession
15 | from mcp.client.stdio import StdioServerParameters, stdio_client
16 | from mcp.types import TextContent
17 |
18 |
19 | async def check_server():
20 | # Run with pypi package using `uv` not `uvx`
21 | server_params = StdioServerParameters(
22 | command="uv",
23 | args=["run", "--with", "biomcp-python", "biomcp", "run"],
24 | )
25 | #
26 | # Run with local code
27 | # server_params = StdioServerParameters(
28 | # command="python",
29 | # args=["-m", "biomcp", "run"],
30 | # )
31 |
32 | async with (
33 | stdio_client(server_params) as (read, write),
34 | ClientSession(read, write) as session,
35 | ):
36 | await session.initialize()
37 |
38 | # list prompts
39 | prompts = await session.list_prompts()
40 | print("Available prompts:", prompts)
41 |
42 | # list resources
43 | resources = await session.list_resources()
44 | print("Available resources:", resources)
45 |
46 | # list tools
47 | tool_result = await session.list_tools()
48 | tools = tool_result.tools
49 | print("Available tools:", tools)
50 | assert len(tools) == 13 # 3 core tools + 10 individual tools
51 |
52 | # IMPORTANT: Always use think tool first!
53 | think_result = await session.call_tool(
54 | "think",
55 | {
56 | "thought": "Planning to analyze variant rs113488022 for BRAF gene...",
57 | "thoughtNumber": 1,
58 | "totalThoughts": 2,
59 | "nextThoughtNeeded": True,
60 | },
61 | )
62 | assert (
63 | think_result.isError is False
64 | ), f"Think error: {think_result.content}"
65 |
66 | # Now fetch variant details using unified fetch tool
67 | tool_name = "fetch"
68 | tool_args = {"domain": "variant", "id_": "rs113488022"}
69 | result = await session.call_tool(tool_name, tool_args)
70 | assert result.isError is False, f"Error: {result.content}"
71 |
72 | # --- Assertions ---
73 | # 1. Check the call was successful (not an error)
74 | assert (
75 | result.isError is False
76 | ), f"Tool call resulted in error: {result.content}"
77 |
78 | # 2. Check there is content
79 | assert result.content is not None
80 | assert len(result.content) >= 1
81 |
82 | # 3. Check the type of the first content block
83 | content_block = result.content[0]
84 | assert isinstance(content_block, TextContent)
85 |
86 | markdown_output = content_block.text
87 | # print(markdown_output)
88 | assert isinstance(markdown_output, str)
89 | assert "rs113488022" in markdown_output
90 | assert "BRAF" in markdown_output
91 | assert "Pathogenic" in markdown_output
92 | print(f"Successfully called tool '{tool_name}' with args {tool_args}")
93 |
94 |
95 | if __name__ == "__main__":
96 | asyncio.run(check_server())
97 |
```
--------------------------------------------------------------------------------
/src/biomcp/variants/cancer_types.py:
--------------------------------------------------------------------------------
```python
1 | """Cancer type configuration for gene-specific studies."""
2 |
3 | # Gene to cancer type keyword mapping
4 | # These keywords are used to filter relevant studies from cBioPortal
5 | GENE_CANCER_KEYWORDS = {
6 | "BRAF": [
7 | "skcm", # melanoma
8 | "thca", # thyroid
9 | "coad", # colorectal
10 | "lung",
11 | "glioma", # brain
12 | "hairy_cell", # hairy cell leukemia
13 | ],
14 | "KRAS": [
15 | "coad", # colorectal
16 | "paad", # pancreatic
17 | "lung",
18 | "stad", # stomach
19 | "coadread", # colorectal adenocarcinoma
20 | "ampca", # ampullary carcinoma
21 | ],
22 | "TP53": [
23 | "brca", # breast
24 | "ov", # ovarian
25 | "lung",
26 | "hnsc", # head/neck
27 | "lgg", # lower grade glioma
28 | "gbm", # glioblastoma
29 | "blca", # bladder
30 | "lihc", # liver
31 | ],
32 | "EGFR": [
33 | "lung",
34 | "nsclc", # non-small cell lung cancer
35 | "gbm", # glioblastoma
36 | "hnsc", # head/neck
37 | ],
38 | "PIK3CA": [
39 | "brca", # breast
40 | "hnsc", # head/neck
41 | "coad", # colorectal
42 | "ucec", # endometrial
43 | ],
44 | "PTEN": [
45 | "prad", # prostate
46 | "gbm", # glioblastoma
47 | "ucec", # endometrial
48 | "brca", # breast
49 | ],
50 | "APC": [
51 | "coad", # colorectal
52 | "coadread",
53 | "stad", # stomach
54 | ],
55 | "VHL": [
56 | "rcc", # renal cell carcinoma
57 | "ccrcc", # clear cell RCC
58 | "kirc", # kidney clear cell
59 | ],
60 | "RB1": [
61 | "rbl", # retinoblastoma
62 | "sclc", # small cell lung cancer
63 | "blca", # bladder
64 | ],
65 | "BRCA1": [
66 | "brca", # breast
67 | "ov", # ovarian
68 | "prad", # prostate
69 | "paad", # pancreatic
70 | ],
71 | "BRCA2": [
72 | "brca", # breast
73 | "ov", # ovarian
74 | "prad", # prostate
75 | "paad", # pancreatic
76 | ],
77 | "ALK": [
78 | "lung",
79 | "nsclc", # non-small cell lung cancer
80 | "alcl", # anaplastic large cell lymphoma
81 | "nbl", # neuroblastoma
82 | ],
83 | "MYC": [
84 | "burkitt", # Burkitt lymphoma
85 | "dlbcl", # diffuse large B-cell lymphoma
86 | "mm", # multiple myeloma
87 | "nbl", # neuroblastoma
88 | ],
89 | "NRAS": [
90 | "mel", # melanoma
91 | "skcm",
92 | "thca", # thyroid
93 | "aml", # acute myeloid leukemia
94 | ],
95 | "KIT": [
96 | "gist", # gastrointestinal stromal tumor
97 | "mel", # melanoma
98 | "aml", # acute myeloid leukemia
99 | ],
100 | }
101 |
102 | # Default keywords for genes not in the mapping
103 | DEFAULT_CANCER_KEYWORDS = ["msk", "tcga", "metabric", "dfci", "broad"]
104 |
105 | # Maximum number of studies to query per gene
106 | MAX_STUDIES_PER_GENE = 20
107 |
108 | # Maximum mutations to process per study
109 | MAX_MUTATIONS_PER_STUDY = 5000
110 |
111 |
112 | def get_cancer_keywords(gene: str) -> list[str]:
113 | """Get cancer type keywords for a given gene.
114 |
115 | Args:
116 | gene: Gene symbol (e.g., "BRAF")
117 |
118 | Returns:
119 | List of cancer type keywords to search for
120 | """
121 | return GENE_CANCER_KEYWORDS.get(gene.upper(), DEFAULT_CANCER_KEYWORDS)
122 |
```