This is page 1 of 19. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .github │ ├── actions │ │ └── setup-python-env │ │ └── action.yml │ ├── dependabot.yml │ └── workflows │ ├── ci.yml │ ├── deploy-docs.yml │ ├── main.yml.disabled │ ├── on-release-main.yml │ └── validate-codecov-config.yml ├── .gitignore ├── .pre-commit-config.yaml ├── BIOMCP_DATA_FLOW.md ├── CHANGELOG.md ├── CNAME ├── codecov.yaml ├── docker-compose.yml ├── Dockerfile ├── docs │ ├── apis │ │ ├── error-codes.md │ │ ├── overview.md │ │ └── python-sdk.md │ ├── assets │ │ ├── biomcp-cursor-locations.png │ │ ├── favicon.ico │ │ ├── icon.png │ │ ├── logo.png │ │ ├── mcp_architecture.txt │ │ └── remote-connection │ │ ├── 00_connectors.png │ │ ├── 01_add_custom_connector.png │ │ ├── 02_connector_enabled.png │ │ ├── 03_connect_to_biomcp.png │ │ ├── 04_select_google_oauth.png │ │ └── 05_success_connect.png │ ├── backend-services-reference │ │ ├── 01-overview.md │ │ ├── 02-biothings-suite.md │ │ ├── 03-cbioportal.md │ │ ├── 04-clinicaltrials-gov.md │ │ ├── 05-nci-cts-api.md │ │ ├── 06-pubtator3.md │ │ └── 07-alphagenome.md │ ├── blog │ │ ├── ai-assisted-clinical-trial-search-analysis.md │ │ ├── images │ │ │ ├── deep-researcher-video.png │ │ │ ├── researcher-announce.png │ │ │ ├── researcher-drop-down.png │ │ │ ├── researcher-prompt.png │ │ │ ├── trial-search-assistant.png │ │ │ └── what_is_biomcp_thumbnail.png │ │ └── researcher-persona-resource.md │ ├── changelog.md │ ├── CNAME │ ├── concepts │ │ ├── 01-what-is-biomcp.md │ │ ├── 02-the-deep-researcher-persona.md │ │ └── 03-sequential-thinking-with-the-think-tool.md │ ├── developer-guides │ │ ├── 01-server-deployment.md │ │ ├── 02-contributing-and-testing.md │ │ ├── 03-third-party-endpoints.md │ │ ├── 04-transport-protocol.md │ │ ├── 05-error-handling.md │ │ ├── 06-http-client-and-caching.md │ │ ├── 07-performance-optimizations.md │ │ └── generate_endpoints.py │ ├── faq-condensed.md │ ├── FDA_SECURITY.md │ ├── genomoncology.md │ ├── getting-started │ │ ├── 01-quickstart-cli.md │ │ ├── 02-claude-desktop-integration.md │ │ └── 03-authentication-and-api-keys.md │ ├── how-to-guides │ │ ├── 01-find-articles-and-cbioportal-data.md │ │ ├── 02-find-trials-with-nci-and-biothings.md │ │ ├── 03-get-comprehensive-variant-annotations.md │ │ ├── 04-predict-variant-effects-with-alphagenome.md │ │ ├── 05-logging-and-monitoring-with-bigquery.md │ │ └── 06-search-nci-organizations-and-interventions.md │ ├── index.md │ ├── policies.md │ ├── reference │ │ ├── architecture-diagrams.md │ │ ├── quick-architecture.md │ │ ├── quick-reference.md │ │ └── visual-architecture.md │ ├── robots.txt │ ├── stylesheets │ │ ├── announcement.css │ │ └── extra.css │ ├── troubleshooting.md │ ├── tutorials │ │ ├── biothings-prompts.md │ │ ├── claude-code-biomcp-alphagenome.md │ │ ├── nci-prompts.md │ │ ├── openfda-integration.md │ │ ├── openfda-prompts.md │ │ ├── pydantic-ai-integration.md │ │ └── remote-connection.md │ ├── user-guides │ │ ├── 01-command-line-interface.md │ │ ├── 02-mcp-tools-reference.md │ │ └── 03-integrating-with-ides-and-clients.md │ └── workflows │ └── all-workflows.md ├── example_scripts │ ├── mcp_integration.py │ └── python_sdk.py ├── glama.json ├── LICENSE ├── lzyank.toml ├── Makefile ├── mkdocs.yml ├── package-lock.json ├── package.json ├── pyproject.toml ├── README.md ├── scripts │ ├── check_docs_in_mkdocs.py │ ├── check_http_imports.py │ └── generate_endpoints_doc.py ├── smithery.yaml ├── src │ └── biomcp │ ├── __init__.py │ ├── __main__.py │ ├── articles │ │ ├── __init__.py │ │ ├── autocomplete.py │ │ ├── fetch.py │ │ ├── preprints.py │ │ ├── search_optimized.py │ │ ├── search.py │ │ └── unified.py │ ├── biomarkers │ │ ├── __init__.py │ │ └── search.py │ ├── cbioportal_helper.py │ ├── circuit_breaker.py │ ├── cli │ │ ├── __init__.py │ │ ├── articles.py │ │ ├── biomarkers.py │ │ ├── diseases.py │ │ ├── health.py │ │ ├── interventions.py │ │ ├── main.py │ │ ├── openfda.py │ │ ├── organizations.py │ │ ├── server.py │ │ ├── trials.py │ │ └── variants.py │ ├── connection_pool.py │ ├── constants.py │ ├── core.py │ ├── diseases │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── domain_handlers.py │ ├── drugs │ │ ├── __init__.py │ │ └── getter.py │ ├── exceptions.py │ ├── genes │ │ ├── __init__.py │ │ └── getter.py │ ├── http_client_simple.py │ ├── http_client.py │ ├── individual_tools.py │ ├── integrations │ │ ├── __init__.py │ │ ├── biothings_client.py │ │ └── cts_api.py │ ├── interventions │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── logging_filter.py │ ├── metrics_handler.py │ ├── metrics.py │ ├── openfda │ │ ├── __init__.py │ │ ├── adverse_events_helpers.py │ │ ├── adverse_events.py │ │ ├── cache.py │ │ ├── constants.py │ │ ├── device_events_helpers.py │ │ ├── device_events.py │ │ ├── drug_approvals.py │ │ ├── drug_labels_helpers.py │ │ ├── drug_labels.py │ │ ├── drug_recalls_helpers.py │ │ ├── drug_recalls.py │ │ ├── drug_shortages_detail_helpers.py │ │ ├── drug_shortages_helpers.py │ │ ├── drug_shortages.py │ │ ├── exceptions.py │ │ ├── input_validation.py │ │ ├── rate_limiter.py │ │ ├── utils.py │ │ └── validation.py │ ├── organizations │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── parameter_parser.py │ ├── prefetch.py │ ├── query_parser.py │ ├── query_router.py │ ├── rate_limiter.py │ ├── render.py │ ├── request_batcher.py │ ├── resources │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── instructions.md │ │ └── researcher.md │ ├── retry.py │ ├── router_handlers.py │ ├── router.py │ ├── shared_context.py │ ├── thinking │ │ ├── __init__.py │ │ ├── sequential.py │ │ └── session.py │ ├── thinking_tool.py │ ├── thinking_tracker.py │ ├── trials │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── nci_getter.py │ │ ├── nci_search.py │ │ └── search.py │ ├── utils │ │ ├── __init__.py │ │ ├── cancer_types_api.py │ │ ├── cbio_http_adapter.py │ │ ├── endpoint_registry.py │ │ ├── gene_validator.py │ │ ├── metrics.py │ │ ├── mutation_filter.py │ │ ├── query_utils.py │ │ ├── rate_limiter.py │ │ └── request_cache.py │ ├── variants │ │ ├── __init__.py │ │ ├── alphagenome.py │ │ ├── cancer_types.py │ │ ├── cbio_external_client.py │ │ ├── cbioportal_mutations.py │ │ ├── cbioportal_search_helpers.py │ │ ├── cbioportal_search.py │ │ ├── constants.py │ │ ├── external.py │ │ ├── filters.py │ │ ├── getter.py │ │ ├── links.py │ │ └── search.py │ └── workers │ ├── __init__.py │ ├── worker_entry_stytch.js │ ├── worker_entry.js │ └── worker.py ├── tests │ ├── bdd │ │ ├── cli_help │ │ │ ├── help.feature │ │ │ └── test_help.py │ │ ├── conftest.py │ │ ├── features │ │ │ └── alphagenome_integration.feature │ │ ├── fetch_articles │ │ │ ├── fetch.feature │ │ │ └── test_fetch.py │ │ ├── get_trials │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── get_variants │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── search_articles │ │ │ ├── autocomplete.feature │ │ │ ├── search.feature │ │ │ ├── test_autocomplete.py │ │ │ └── test_search.py │ │ ├── search_trials │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ ├── search_variants │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ └── steps │ │ └── test_alphagenome_steps.py │ ├── config │ │ └── test_smithery_config.py │ ├── conftest.py │ ├── data │ │ ├── ct_gov │ │ │ ├── clinical_trials_api_v2.yaml │ │ │ ├── trials_NCT04280705.json │ │ │ └── trials_NCT04280705.txt │ │ ├── myvariant │ │ │ ├── myvariant_api.yaml │ │ │ ├── myvariant_field_descriptions.csv │ │ │ ├── variants_full_braf_v600e.json │ │ │ ├── variants_full_braf_v600e.txt │ │ │ └── variants_part_braf_v600_multiple.json │ │ ├── openfda │ │ │ ├── drugsfda_detail.json │ │ │ ├── drugsfda_search.json │ │ │ ├── enforcement_detail.json │ │ │ └── enforcement_search.json │ │ └── pubtator │ │ ├── pubtator_autocomplete.json │ │ └── pubtator3_paper.txt │ ├── integration │ │ ├── test_openfda_integration.py │ │ ├── test_preprints_integration.py │ │ ├── test_simple.py │ │ └── test_variants_integration.py │ ├── tdd │ │ ├── articles │ │ │ ├── test_autocomplete.py │ │ │ ├── test_cbioportal_integration.py │ │ │ ├── test_fetch.py │ │ │ ├── test_preprints.py │ │ │ ├── test_search.py │ │ │ └── test_unified.py │ │ ├── conftest.py │ │ ├── drugs │ │ │ ├── __init__.py │ │ │ └── test_drug_getter.py │ │ ├── openfda │ │ │ ├── __init__.py │ │ │ ├── test_adverse_events.py │ │ │ ├── test_device_events.py │ │ │ ├── test_drug_approvals.py │ │ │ ├── test_drug_labels.py │ │ │ ├── test_drug_recalls.py │ │ │ ├── test_drug_shortages.py │ │ │ └── test_security.py │ │ ├── test_biothings_integration_real.py │ │ ├── test_biothings_integration.py │ │ ├── test_circuit_breaker.py │ │ ├── test_concurrent_requests.py │ │ ├── test_connection_pool.py │ │ ├── test_domain_handlers.py │ │ ├── test_drug_approvals.py │ │ ├── test_drug_recalls.py │ │ ├── test_drug_shortages.py │ │ ├── test_endpoint_documentation.py │ │ ├── test_error_scenarios.py │ │ ├── test_europe_pmc_fetch.py │ │ ├── test_mcp_integration.py │ │ ├── test_mcp_tools.py │ │ ├── test_metrics.py │ │ ├── test_nci_integration.py │ │ ├── test_nci_mcp_tools.py │ │ ├── test_network_policies.py │ │ ├── test_offline_mode.py │ │ ├── test_openfda_unified.py │ │ ├── test_pten_r173_search.py │ │ ├── test_render.py │ │ ├── test_request_batcher.py.disabled │ │ ├── test_retry.py │ │ ├── test_router.py │ │ ├── test_shared_context.py.disabled │ │ ├── test_unified_biothings.py │ │ ├── thinking │ │ │ ├── __init__.py │ │ │ └── test_sequential.py │ │ ├── trials │ │ │ ├── test_backward_compatibility.py │ │ │ ├── test_getter.py │ │ │ └── test_search.py │ │ ├── utils │ │ │ ├── test_gene_validator.py │ │ │ ├── test_mutation_filter.py │ │ │ ├── test_rate_limiter.py │ │ │ └── test_request_cache.py │ │ ├── variants │ │ │ ├── constants.py │ │ │ ├── test_alphagenome_api_key.py │ │ │ ├── test_alphagenome_comprehensive.py │ │ │ ├── test_alphagenome.py │ │ │ ├── test_cbioportal_mutations.py │ │ │ ├── test_cbioportal_search.py │ │ │ ├── test_external_integration.py │ │ │ ├── test_external.py │ │ │ ├── test_extract_gene_aa_change.py │ │ │ ├── test_filters.py │ │ │ ├── test_getter.py │ │ │ ├── test_links.py │ │ │ └── test_search.py │ │ └── workers │ │ └── test_worker_sanitization.js │ └── test_pydantic_ai_integration.py ├── THIRD_PARTY_ENDPOINTS.md ├── tox.ini ├── uv.lock └── wrangler.toml ``` # Files -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- ```yaml 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: "v4.4.0" 4 | hooks: 5 | - id: check-case-conflict 6 | - id: check-merge-conflict 7 | - id: check-toml 8 | - id: check-yaml 9 | - id: end-of-file-fixer 10 | - id: trailing-whitespace 11 | 12 | - repo: https://github.com/astral-sh/ruff-pre-commit 13 | rev: "v0.6.3" 14 | hooks: 15 | - id: ruff 16 | args: [--exit-non-zero-on-fix] 17 | - id: ruff-format 18 | 19 | - repo: local 20 | hooks: 21 | - id: update-endpoints-doc 22 | name: Update THIRD_PARTY_ENDPOINTS.md 23 | entry: uv run python scripts/generate_endpoints_doc.py 24 | language: system 25 | pass_filenames: false 26 | files: 'src/biomcp/utils/endpoint_registry\.py$' 27 | - id: check-http-imports 28 | name: Check for direct HTTP library imports 29 | entry: uv run python scripts/check_http_imports.py 30 | language: system 31 | pass_filenames: false 32 | always_run: true 33 | files: '\.py$' 34 | - id: check-docs-in-mkdocs 35 | name: Check documentation files are in mkdocs.yml 36 | entry: uv run python scripts/check_docs_in_mkdocs.py 37 | language: system 38 | pass_filenames: false 39 | files: '^docs/.*\.md$|^mkdocs\.yml$' 40 | 41 | - repo: https://github.com/pre-commit/mirrors-prettier 42 | rev: "v3.0.3" 43 | hooks: 44 | - id: prettier 45 | ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` 1 | docs/source 2 | 3 | # From https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 90 | __pypackages__/ 91 | 92 | # Celery stuff 93 | celerybeat-schedule 94 | celerybeat.pid 95 | 96 | # SageMath parsed files 97 | *.sage.py 98 | 99 | # Environments 100 | .env 101 | .venv 102 | env/ 103 | venv/ 104 | ENV/ 105 | env.bak/ 106 | venv.bak/ 107 | 108 | # Spyder project settings 109 | .spyderproject 110 | .spyproject 111 | 112 | # Rope project settings 113 | .ropeproject 114 | 115 | # mkdocs documentation 116 | /site 117 | 118 | # ruff 119 | .ruff_cache 120 | 121 | # mypy 122 | .mypy_cache/ 123 | .dmypy.json 124 | dmypy.json 125 | 126 | # Pyre type checker 127 | .pyre/ 128 | 129 | # pytype static type analyzer 130 | .pytype/ 131 | 132 | # Cython debug symbols 133 | cython_debug/ 134 | 135 | # Vscode config files 136 | .vscode/ 137 | 138 | # PyCharm 139 | .idea/ 140 | 141 | # LLMs Notes 142 | llms/ 143 | vault/ 144 | 145 | .DS_Store 146 | /node_modules/ 147 | 148 | CLAUDE.md 149 | lzyank.toml 150 | experiment/ 151 | alphagenome 152 | spike/ 153 | ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown 1 | # BioMCP: Biomedical Model Context Protocol 2 | 3 | BioMCP is an open source (MIT License) toolkit that empowers AI assistants and 4 | agents with specialized biomedical knowledge. Built following the Model Context 5 | Protocol (MCP), it connects AI systems to authoritative biomedical data 6 | sources, enabling them to answer questions about clinical trials, scientific 7 | literature, and genomic variants with precision and depth. 8 | 9 | [](https://www.youtube.com/watch?v=bKxOWrWUUhM) 10 | 11 | ## MCPHub Certification 12 | 13 | BioMCP is certified by [MCPHub](https://mcphub.com/mcp-servers/genomoncology/biomcp). This certification ensures that BioMCP follows best practices for Model Context Protocol implementation and provides reliable biomedical data access. 14 | 15 | ## Why BioMCP? 16 | 17 | While Large Language Models have broad general knowledge, they often lack 18 | specialized domain-specific information or access to up-to-date resources. 19 | BioMCP bridges this gap for biomedicine by: 20 | 21 | - Providing **structured access** to clinical trials, biomedical literature, 22 | and genomic variants 23 | - Enabling **natural language queries** to specialized databases without 24 | requiring knowledge of their specific syntax 25 | - Supporting **biomedical research** workflows through a consistent interface 26 | - Functioning as an **MCP server** for AI assistants and agents 27 | 28 | ## Biomedical Data Sources 29 | 30 | BioMCP integrates with multiple biomedical data sources: 31 | 32 | ### Literature Sources 33 | 34 | - **PubTator3/PubMed** - Peer-reviewed biomedical literature with entity annotations 35 | - **bioRxiv/medRxiv** - Preprint servers for biology and health sciences 36 | - **Europe PMC** - Open science platform including preprints 37 | 38 | ### Clinical & Genomic Sources 39 | 40 | - **ClinicalTrials.gov** - Clinical trial registry and results database 41 | - **NCI Clinical Trials Search API** - National Cancer Institute's curated cancer trials database 42 | - Advanced search filters (biomarkers, prior therapies, brain metastases) 43 | - Organization and intervention databases 44 | - Disease vocabulary with synonyms 45 | - **BioThings Suite** - Comprehensive biomedical data APIs: 46 | - **MyVariant.info** - Consolidated genetic variant annotation 47 | - **MyGene.info** - Real-time gene annotations and information 48 | - **MyDisease.info** - Disease ontology and synonym information 49 | - **MyChem.info** - Drug/chemical annotations and properties 50 | - **TCGA/GDC** - The Cancer Genome Atlas for cancer variant data 51 | - **1000 Genomes** - Population frequency data via Ensembl 52 | - **cBioPortal** - Cancer genomics portal with mutation occurrence data 53 | 54 | ### Regulatory & Safety Sources 55 | 56 | - **OpenFDA** - FDA regulatory and safety data: 57 | - **Drug Adverse Events (FAERS)** - Post-market drug safety reports 58 | - **Drug Labels (SPL)** - Official prescribing information 59 | - **Device Events (MAUDE)** - Medical device adverse events, with genomic device filtering 60 | 61 | ## Available MCP Tools 62 | 63 | BioMCP provides 24 specialized tools for biomedical research: 64 | 65 | ### Core Tools (3) 66 | 67 | #### 1. Think Tool (ALWAYS USE FIRST!) 68 | 69 | **CRITICAL**: The `think` tool MUST be your first step for ANY biomedical research task. 70 | 71 | ```python 72 | # Start analysis with sequential thinking 73 | think( 74 | thought="Breaking down the query about BRAF mutations in melanoma...", 75 | thoughtNumber=1, 76 | totalThoughts=3, 77 | nextThoughtNeeded=True 78 | ) 79 | ``` 80 | 81 | The sequential thinking tool helps: 82 | 83 | - Break down complex biomedical problems systematically 84 | - Plan multi-step research approaches 85 | - Track reasoning progress 86 | - Ensure comprehensive analysis 87 | 88 | #### 2. Search Tool 89 | 90 | The search tool supports two modes: 91 | 92 | ##### Unified Query Language (Recommended) 93 | 94 | Use the `query` parameter with structured field syntax for powerful cross-domain searches: 95 | 96 | ```python 97 | # Simple natural language 98 | search(query="BRAF melanoma") 99 | 100 | # Field-specific search 101 | search(query="gene:BRAF AND trials.condition:melanoma") 102 | 103 | # Complex queries 104 | search(query="gene:BRAF AND variants.significance:pathogenic AND articles.date:>2023") 105 | 106 | # Get searchable fields schema 107 | search(get_schema=True) 108 | 109 | # Explain how a query is parsed 110 | search(query="gene:BRAF", explain_query=True) 111 | ``` 112 | 113 | **Supported Fields:** 114 | 115 | - **Cross-domain**: `gene:`, `variant:`, `disease:` 116 | - **Trials**: `trials.condition:`, `trials.phase:`, `trials.status:`, `trials.intervention:` 117 | - **Articles**: `articles.author:`, `articles.journal:`, `articles.date:` 118 | - **Variants**: `variants.significance:`, `variants.rsid:`, `variants.frequency:` 119 | 120 | ##### Domain-Based Search 121 | 122 | Use the `domain` parameter with specific filters: 123 | 124 | ```python 125 | # Search articles (includes automatic cBioPortal integration) 126 | search(domain="article", genes=["BRAF"], diseases=["melanoma"]) 127 | 128 | # Search with mutation-specific cBioPortal data 129 | search(domain="article", genes=["BRAF"], keywords=["V600E"]) 130 | search(domain="article", genes=["SRSF2"], keywords=["F57*"]) # Wildcard patterns 131 | 132 | # Search trials 133 | search(domain="trial", conditions=["lung cancer"], phase="3") 134 | 135 | # Search variants 136 | search(domain="variant", gene="TP53", significance="pathogenic") 137 | ``` 138 | 139 | **Note**: When searching articles with a gene parameter, cBioPortal data is automatically included: 140 | 141 | - Gene-level summaries show mutation frequency across cancer studies 142 | - Mutation-specific searches (e.g., "V600E") show study-level occurrence data 143 | - Cancer types are dynamically resolved from cBioPortal API 144 | 145 | #### 3. Fetch Tool 146 | 147 | Retrieve full details for a single article, trial, or variant: 148 | 149 | ```python 150 | # Fetch article details (supports both PMID and DOI) 151 | fetch(domain="article", id="34567890") # PMID 152 | fetch(domain="article", id="10.1101/2024.01.20.23288905") # DOI 153 | 154 | # Fetch trial with all sections 155 | fetch(domain="trial", id="NCT04280705", detail="all") 156 | 157 | # Fetch variant details 158 | fetch(domain="variant", id="rs113488022") 159 | ``` 160 | 161 | **Domain-specific options:** 162 | 163 | - **Articles**: `detail="full"` retrieves full text if available 164 | - **Trials**: `detail` can be "protocol", "locations", "outcomes", "references", or "all" 165 | - **Variants**: Always returns full details 166 | 167 | ### Individual Tools (21) 168 | 169 | For users who prefer direct access to specific functionality, BioMCP also provides 21 individual tools: 170 | 171 | #### Article Tools (2) 172 | 173 | - **article_searcher**: Search PubMed/PubTator3 and preprints 174 | - **article_getter**: Fetch detailed article information (supports PMID and DOI) 175 | 176 | #### Trial Tools (5) 177 | 178 | - **trial_searcher**: Search ClinicalTrials.gov or NCI CTS API (via source parameter) 179 | - **trial_getter**: Fetch all trial details from either source 180 | - **trial_protocol_getter**: Fetch protocol information only (ClinicalTrials.gov) 181 | - **trial_references_getter**: Fetch trial publications (ClinicalTrials.gov) 182 | - **trial_outcomes_getter**: Fetch outcome measures and results (ClinicalTrials.gov) 183 | - **trial_locations_getter**: Fetch site locations and contacts (ClinicalTrials.gov) 184 | 185 | #### Variant Tools (2) 186 | 187 | - **variant_searcher**: Search MyVariant.info database 188 | - **variant_getter**: Fetch comprehensive variant details 189 | 190 | #### NCI-Specific Tools (6) 191 | 192 | - **nci_organization_searcher**: Search NCI's organization database 193 | - **nci_organization_getter**: Get organization details by ID 194 | - **nci_intervention_searcher**: Search NCI's intervention database (drugs, devices, procedures) 195 | - **nci_intervention_getter**: Get intervention details by ID 196 | - **nci_biomarker_searcher**: Search biomarkers used in trial eligibility criteria 197 | - **nci_disease_searcher**: Search NCI's controlled vocabulary of cancer conditions 198 | 199 | #### Gene, Disease & Drug Tools (3) 200 | 201 | - **gene_getter**: Get real-time gene information from MyGene.info 202 | - **disease_getter**: Get disease definitions and synonyms from MyDisease.info 203 | - **drug_getter**: Get drug/chemical information from MyChem.info 204 | 205 | **Note**: All individual tools that search by gene automatically include cBioPortal summaries when the `include_cbioportal` parameter is True (default). Trial searches can expand disease conditions with synonyms when `expand_synonyms` is True (default). 206 | 207 | ## Quick Start 208 | 209 | ### For Claude Desktop Users 210 | 211 | 1. **Install `uv`** if you don't have it (recommended): 212 | 213 | ```bash 214 | # MacOS 215 | brew install uv 216 | 217 | # Windows/Linux 218 | pip install uv 219 | ``` 220 | 221 | 2. **Configure Claude Desktop**: 222 | - Open Claude Desktop settings 223 | - Navigate to Developer section 224 | - Click "Edit Config" and add: 225 | ```json 226 | { 227 | "mcpServers": { 228 | "biomcp": { 229 | "command": "uv", 230 | "args": ["run", "--with", "biomcp-python", "biomcp", "run"] 231 | } 232 | } 233 | } 234 | ``` 235 | - Restart Claude Desktop and start chatting about biomedical topics! 236 | 237 | ### Python Package Installation 238 | 239 | ```bash 240 | # Using pip 241 | pip install biomcp-python 242 | 243 | # Using uv (recommended for faster installation) 244 | uv pip install biomcp-python 245 | 246 | # Run directly without installation 247 | uv run --with biomcp-python biomcp trial search --condition "lung cancer" 248 | ``` 249 | 250 | ## Configuration 251 | 252 | ### Environment Variables 253 | 254 | BioMCP supports optional environment variables for enhanced functionality: 255 | 256 | ```bash 257 | # cBioPortal API authentication (optional) 258 | export CBIO_TOKEN="your-api-token" # For authenticated access 259 | export CBIO_BASE_URL="https://www.cbioportal.org/api" # Custom API endpoint 260 | 261 | # Performance tuning 262 | export BIOMCP_USE_CONNECTION_POOL="true" # Enable HTTP connection pooling (default: true) 263 | export BIOMCP_METRICS_ENABLED="false" # Enable performance metrics (default: false) 264 | ``` 265 | 266 | ## Running BioMCP Server 267 | 268 | BioMCP supports multiple transport protocols to suit different deployment scenarios: 269 | 270 | ### Local Development (STDIO) 271 | 272 | For direct integration with Claude Desktop or local MCP clients: 273 | 274 | ```bash 275 | # Default STDIO mode for local development 276 | biomcp run 277 | 278 | # Or explicitly specify STDIO 279 | biomcp run --mode stdio 280 | ``` 281 | 282 | ### HTTP Server Mode 283 | 284 | BioMCP supports multiple HTTP transport protocols: 285 | 286 | #### Legacy SSE Transport (Worker Mode) 287 | 288 | For backward compatibility with existing SSE clients: 289 | 290 | ```bash 291 | biomcp run --mode worker 292 | # Server available at http://localhost:8000/sse 293 | ``` 294 | 295 | #### Streamable HTTP Transport (Recommended) 296 | 297 | The new MCP-compliant Streamable HTTP transport provides optimal performance and standards compliance: 298 | 299 | ```bash 300 | biomcp run --mode streamable_http 301 | 302 | # Custom host and port 303 | biomcp run --mode streamable_http --host 127.0.0.1 --port 8080 304 | ``` 305 | 306 | Features of Streamable HTTP transport: 307 | 308 | - Single `/mcp` endpoint for all operations 309 | - Dynamic response mode (JSON for quick operations, SSE for long-running) 310 | - Session management support (future) 311 | - Full MCP specification compliance (2025-03-26) 312 | - Better scalability for cloud deployments 313 | 314 | ### Deployment Options 315 | 316 | #### Docker 317 | 318 | ```bash 319 | # Build the Docker image locally 320 | docker build -t biomcp:latest . 321 | 322 | # Run the container 323 | docker run -p 8000:8000 biomcp:latest biomcp run --mode streamable_http 324 | ``` 325 | 326 | #### Cloudflare Workers 327 | 328 | The worker mode can be deployed to Cloudflare Workers for global edge deployment. 329 | 330 | Note: All APIs work without authentication, but tokens may provide higher rate limits. 331 | 332 | ## Command Line Interface 333 | 334 | BioMCP provides a comprehensive CLI for direct database interaction: 335 | 336 | ```bash 337 | # Get help 338 | biomcp --help 339 | 340 | # Run the MCP server 341 | biomcp run 342 | 343 | # Article search examples 344 | biomcp article search --gene BRAF --disease Melanoma # Includes preprints by default 345 | biomcp article search --gene BRAF --no-preprints # Exclude preprints 346 | biomcp article get 21717063 --full 347 | 348 | # Clinical trial examples 349 | biomcp trial search --condition "Lung Cancer" --phase PHASE3 350 | biomcp trial search --condition melanoma --source nci --api-key YOUR_KEY # Use NCI API 351 | biomcp trial get NCT04280705 Protocol 352 | biomcp trial get NCT04280705 --source nci --api-key YOUR_KEY # Get from NCI 353 | 354 | # Variant examples with external annotations 355 | biomcp variant search --gene TP53 --significance pathogenic 356 | biomcp variant get rs113488022 # Includes TCGA, 1000 Genomes, and cBioPortal data by default 357 | biomcp variant get rs113488022 --no-external # Core annotations only 358 | 359 | # NCI-specific examples (requires NCI API key) 360 | biomcp organization search "MD Anderson" --api-key YOUR_KEY 361 | biomcp organization get ORG123456 --api-key YOUR_KEY 362 | biomcp intervention search pembrolizumab --api-key YOUR_KEY 363 | biomcp intervention search --type Device --api-key YOUR_KEY 364 | biomcp biomarker search "PD-L1" --api-key YOUR_KEY 365 | biomcp disease search melanoma --source nci --api-key YOUR_KEY 366 | ``` 367 | 368 | ## Testing & Verification 369 | 370 | Test your BioMCP setup with the MCP Inspector: 371 | 372 | ```bash 373 | npx @modelcontextprotocol/inspector uv run --with biomcp-python biomcp run 374 | ``` 375 | 376 | This opens a web interface where you can explore and test all available tools. 377 | 378 | ## Enterprise Version: OncoMCP 379 | 380 | OncoMCP extends BioMCP with GenomOncology's enterprise-grade precision oncology 381 | platform (POP), providing: 382 | 383 | - **HIPAA-Compliant Deployment**: Secure on-premise options 384 | - **Real-Time Trial Matching**: Up-to-date status and arm-level matching 385 | - **Healthcare Integration**: Seamless EHR and data warehouse connectivity 386 | - **Curated Knowledge Base**: 15,000+ trials and FDA approvals 387 | - **Sophisticated Patient Matching**: Using integrated clinical and molecular 388 | profiles 389 | - **Advanced NLP**: Structured extraction from unstructured text 390 | - **Comprehensive Biomarker Processing**: Mutation and rule processing 391 | 392 | Learn more: [GenomOncology](https://genomoncology.com/) 393 | 394 | ## MCP Registries 395 | 396 | [](https://smithery.ai/server/@genomoncology/biomcp) 397 | 398 | <a href="https://glama.ai/mcp/servers/@genomoncology/biomcp"> 399 | <img width="380" height="200" src="https://glama.ai/mcp/servers/@genomoncology/biomcp/badge" /> 400 | </a> 401 | 402 | ## Example Use Cases 403 | 404 | ### Gene Information Retrieval 405 | 406 | ```python 407 | # Get comprehensive gene information 408 | gene_getter(gene_id_or_symbol="TP53") 409 | # Returns: Official name, summary, aliases, links to databases 410 | ``` 411 | 412 | ### Disease Synonym Expansion 413 | 414 | ```python 415 | # Get disease information with synonyms 416 | disease_getter(disease_id_or_name="GIST") 417 | # Returns: "gastrointestinal stromal tumor" and other synonyms 418 | 419 | # Search trials with automatic synonym expansion 420 | trial_searcher(conditions=["GIST"], expand_synonyms=True) 421 | # Searches for: GIST OR "gastrointestinal stromal tumor" OR "GI stromal tumor" 422 | ``` 423 | 424 | ### Integrated Biomedical Research 425 | 426 | ```python 427 | # 1. Always start with thinking 428 | think(thought="Analyzing BRAF V600E in melanoma treatment", thoughtNumber=1) 429 | 430 | # 2. Get gene context 431 | gene_getter("BRAF") 432 | 433 | # 3. Search for pathogenic variants 434 | variant_searcher(gene="BRAF", hgvsp="V600E", significance="pathogenic") 435 | 436 | # 4. Find relevant clinical trials with disease expansion 437 | trial_searcher(conditions=["melanoma"], interventions=["BRAF inhibitor"]) 438 | ``` 439 | 440 | ## Documentation 441 | 442 | For comprehensive documentation, visit [https://biomcp.org](https://biomcp.org) 443 | 444 | ### Developer Guides 445 | 446 | - [HTTP Client Guide](./docs/http-client-guide.md) - Using the centralized HTTP client 447 | - [Migration Examples](./docs/migration-examples.md) - Migrating from direct HTTP usage 448 | - [Error Handling Guide](./docs/error-handling.md) - Comprehensive error handling patterns 449 | - [Integration Testing Guide](./docs/integration-testing.md) - Best practices for reliable integration tests 450 | - [Third-Party Endpoints](./THIRD_PARTY_ENDPOINTS.md) - Complete list of external APIs used 451 | - [Testing Guide](./docs/development/testing.md) - Running tests and understanding test categories 452 | 453 | ## Development 454 | 455 | ### Running Tests 456 | 457 | ```bash 458 | # Run all tests (including integration tests) 459 | make test 460 | 461 | # Run only unit tests (excluding integration tests) 462 | uv run python -m pytest tests -m "not integration" 463 | 464 | # Run only integration tests 465 | uv run python -m pytest tests -m "integration" 466 | ``` 467 | 468 | **Note**: Integration tests make real API calls and may fail due to network issues or rate limiting. 469 | In CI/CD, integration tests are run separately and allowed to fail without blocking the build. 470 | 471 | ## BioMCP Examples Repo 472 | 473 | Looking to see BioMCP in action? 474 | 475 | Check out the companion repository: 476 | 👉 **[biomcp-examples](https://github.com/genomoncology/biomcp-examples)** 477 | 478 | It contains real prompts, AI-generated research briefs, and evaluation runs across different models. 479 | Use it to explore capabilities, compare outputs, or benchmark your own setup. 480 | 481 | Have a cool example of your own? 482 | **We’d love for you to contribute!** Just fork the repo and submit a PR with your experiment. 483 | 484 | ## License 485 | 486 | This project is licensed under the MIT License. 487 | ``` -------------------------------------------------------------------------------- /tests/tdd/drugs/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """Tests for drug information tools.""" 2 | ``` -------------------------------------------------------------------------------- /tests/tdd/openfda/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """Test package for OpenFDA integration.""" 2 | ``` -------------------------------------------------------------------------------- /tests/tdd/thinking/__init__.py: -------------------------------------------------------------------------------- ```python 1 | # Test module for sequential thinking functionality 2 | ``` -------------------------------------------------------------------------------- /src/biomcp/thinking/__init__.py: -------------------------------------------------------------------------------- ```python 1 | from . import sequential 2 | 3 | __all__ = [ 4 | "sequential", 5 | ] 6 | ``` -------------------------------------------------------------------------------- /src/biomcp/resources/__init__.py: -------------------------------------------------------------------------------- ```python 1 | from .getter import get_instructions 2 | 3 | __all__ = [ 4 | "get_instructions", 5 | ] 6 | ``` -------------------------------------------------------------------------------- /src/biomcp/cli/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """BioMCP Command Line Interface.""" 2 | 3 | from .main import app 4 | 5 | __all__ = ["app"] 6 | ``` -------------------------------------------------------------------------------- /src/biomcp/genes/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """Gene information tools for BioMCP.""" 2 | 3 | from .getter import get_gene 4 | 5 | __all__ = ["get_gene"] 6 | ``` -------------------------------------------------------------------------------- /glama.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "$schema": "https://glama.ai/mcp/schemas/server.json", 3 | "maintainers": ["imaurer", "jyeakley"] 4 | } 5 | ``` -------------------------------------------------------------------------------- /src/biomcp/drugs/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """Drug information tools using MyChem.info.""" 2 | 3 | from .getter import get_drug 4 | 5 | __all__ = ["get_drug"] 6 | ``` -------------------------------------------------------------------------------- /src/biomcp/workers/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """Cloudflare Workers module for BioMCP.""" 2 | 3 | from .worker import create_worker_app 4 | 5 | __all__ = ["create_worker_app"] 6 | ``` -------------------------------------------------------------------------------- /src/biomcp/variants/__init__.py: -------------------------------------------------------------------------------- ```python 1 | from . import search 2 | from . import getter 3 | from . import external 4 | 5 | __all__ = [ 6 | "external", 7 | "getter", 8 | "search", 9 | ] 10 | ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "devDependencies": { 3 | "wrangler": "^4.13.2" 4 | }, 5 | "dependencies": { 6 | "hono": "^4.7.8", 7 | "jose": "^6.0.11" 8 | } 9 | } 10 | ``` -------------------------------------------------------------------------------- /codecov.yaml: -------------------------------------------------------------------------------- ```yaml 1 | coverage: 2 | range: 90..100 3 | round: down 4 | precision: 1 5 | status: 6 | project: 7 | default: 8 | target: 95% 9 | threshold: 0.5% 10 | ``` -------------------------------------------------------------------------------- /src/biomcp/utils/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """Utility modules for BioMCP.""" 2 | 3 | from .query_utils import parse_or_query, contains_or_operator 4 | 5 | __all__ = ["contains_or_operator", "parse_or_query"] 6 | ``` -------------------------------------------------------------------------------- /src/biomcp/integrations/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """BioThings API integrations for BioMCP.""" 2 | 3 | from .biothings_client import BioThingsClient, DiseaseInfo, GeneInfo 4 | 5 | __all__ = ["BioThingsClient", "DiseaseInfo", "GeneInfo"] 6 | ``` -------------------------------------------------------------------------------- /src/biomcp/variants/constants.py: -------------------------------------------------------------------------------- ```python 1 | """Constants for variant modules.""" 2 | 3 | import os 4 | 5 | # cBioPortal API endpoints 6 | CBIO_BASE_URL = os.getenv("CBIO_BASE_URL", "https://www.cbioportal.org/api") 7 | CBIO_TOKEN = os.getenv("CBIO_TOKEN") 8 | ``` -------------------------------------------------------------------------------- /src/biomcp/articles/__init__.py: -------------------------------------------------------------------------------- ```python 1 | from . import autocomplete 2 | from . import fetch 3 | from . import search 4 | from . import preprints 5 | from . import unified 6 | 7 | 8 | __all__ = [ 9 | "autocomplete", 10 | "fetch", 11 | "preprints", 12 | "search", 13 | "unified", 14 | ] 15 | ``` -------------------------------------------------------------------------------- /lzyank.toml: -------------------------------------------------------------------------------- ```toml 1 | [default] 2 | exclude = [ 3 | "uv.lock", 4 | "lzyank.toml", 5 | ".github", 6 | "*.ini", 7 | ".pre-commit-config.yaml", 8 | "LICENSE", 9 | "codecov.yaml", 10 | "mkdocs.yml", 11 | "tests/data" 12 | ] 13 | 14 | [actions] 15 | include = [".github/"] 16 | ``` -------------------------------------------------------------------------------- /src/biomcp/trials/__init__.py: -------------------------------------------------------------------------------- ```python 1 | from . import getter 2 | from . import nci_getter 3 | from . import nci_search 4 | from . import search 5 | from .search import LineOfTherapy 6 | 7 | __all__ = [ 8 | "LineOfTherapy", 9 | "getter", 10 | "nci_getter", 11 | "nci_search", 12 | "search", 13 | ] 14 | ``` -------------------------------------------------------------------------------- /src/biomcp/diseases/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """Disease information tools for BioMCP.""" 2 | 3 | from .getter import get_disease 4 | from .search import search_diseases, get_disease_by_id, search_diseases_with_or 5 | 6 | __all__ = [ 7 | "get_disease", 8 | "get_disease_by_id", 9 | "search_diseases", 10 | "search_diseases_with_or", 11 | ] 12 | ``` -------------------------------------------------------------------------------- /src/biomcp/interventions/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """Interventions module for NCI Clinical Trials API integration.""" 2 | 3 | from .getter import get_intervention 4 | from .search import search_interventions, search_interventions_with_or 5 | 6 | __all__ = [ 7 | "get_intervention", 8 | "search_interventions", 9 | "search_interventions_with_or", 10 | ] 11 | ``` -------------------------------------------------------------------------------- /src/biomcp/organizations/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """Organizations module for NCI Clinical Trials API integration.""" 2 | 3 | from .getter import get_organization 4 | from .search import search_organizations, search_organizations_with_or 5 | 6 | __all__ = [ 7 | "get_organization", 8 | "search_organizations", 9 | "search_organizations_with_or", 10 | ] 11 | ``` -------------------------------------------------------------------------------- /docs/robots.txt: -------------------------------------------------------------------------------- ``` 1 | # Robots.txt for BioMCP Documentation 2 | # https://biomcp.org/ 3 | 4 | User-agent: * 5 | Allow: / 6 | 7 | # Sitemap location 8 | Sitemap: https://biomcp.org/sitemap.xml 9 | 10 | # Rate limiting for crawlers 11 | Crawl-delay: 1 12 | 13 | # Block access to build artifacts 14 | Disallow: /site/ 15 | Disallow: /.git/ 16 | Disallow: /node_modules/ 17 | ``` -------------------------------------------------------------------------------- /tests/data/pubtator/pubtator_autocomplete.json: -------------------------------------------------------------------------------- ```json 1 | [ 2 | { 3 | "_id": "@GENE_BRAF", 4 | "biotype": "gene", 5 | "name": "BRAF", 6 | "description": "All Species", 7 | "match": "Matched on name <m>BRAF</m>" 8 | }, 9 | { 10 | "_id": "@GENE_BRAFP1", 11 | "biotype": "gene", 12 | "name": "BRAFP1", 13 | "description": "All Species", 14 | "match": "Matched on name <m>BRAFP1</m>" 15 | } 16 | ] 17 | ``` -------------------------------------------------------------------------------- /src/biomcp/biomarkers/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """Biomarkers module for NCI Clinical Trials API integration. 2 | 3 | Note: CTRP documentation indicates biomarker data may have limited public availability. 4 | This module focuses on trial eligibility biomarkers. 5 | """ 6 | 7 | from .search import search_biomarkers, search_biomarkers_with_or 8 | 9 | __all__ = ["search_biomarkers", "search_biomarkers_with_or"] 10 | ``` -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- ``` 1 | [tox] 2 | skipsdist = true 3 | envlist = py311, py312, py313 4 | 5 | [gh-actions] 6 | python = 7 | 3.11: py311 8 | 3.12: py312 9 | 3.13: py313 10 | 11 | [testenv] 12 | passenv = PYTHON_VERSION 13 | allowlist_externals = uv 14 | commands = 15 | uv sync --python {envpython} 16 | uv run python -m pytest --doctest-modules tests --cov --cov-config=pyproject.toml --cov-report=xml 17 | mypy 18 | ``` -------------------------------------------------------------------------------- /src/biomcp/__main__.py: -------------------------------------------------------------------------------- ```python 1 | import sys 2 | 3 | from dotenv import load_dotenv 4 | 5 | from .cli import app 6 | 7 | # Load environment variables from .env file 8 | load_dotenv() 9 | 10 | 11 | def main(): 12 | try: 13 | app(standalone_mode=True) 14 | except SystemExit as e: 15 | sys.exit(e.code) 16 | 17 | 18 | if __name__ == "__main__": 19 | main() 20 | 21 | # Make main() the callable when importing __main__ 22 | __call__ = main 23 | ``` -------------------------------------------------------------------------------- /.github/workflows/validate-codecov-config.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: validate-codecov-config 2 | 3 | on: 4 | pull_request: 5 | paths: [codecov.yaml] 6 | push: 7 | branches: [main] 8 | 9 | jobs: 10 | validate-codecov-config: 11 | runs-on: ubuntu-22.04 12 | steps: 13 | - uses: actions/checkout@v5 14 | - name: Validate codecov configuration 15 | run: curl -sSL --fail-with-body --data-binary @codecov.yaml https://codecov.io/validate 16 | ``` -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- ```yaml 1 | services: 2 | biomcp-server: 3 | platform: linux/amd64 4 | build: . 5 | image: us.gcr.io/graceful-medley-134315/biomcp-server:${TAG} 6 | container_name: biomcp-server 7 | ports: 8 | - "8000:8000" 9 | environment: 10 | - MCP_MODE=streamable_http # Can be 'stdio', 'worker', 'http', or 'streamable_http' 11 | - ALPHAGENOME_API_KEY=${ALPHAGENOME_API_KEY:-} 12 | restart: unless-stopped 13 | ``` -------------------------------------------------------------------------------- /tests/tdd/variants/constants.py: -------------------------------------------------------------------------------- ```python 1 | """Constants for variant tests.""" 2 | 3 | # API retry settings 4 | API_RETRY_DELAY_SECONDS = 1.0 5 | MAX_RETRY_ATTEMPTS = 2 6 | 7 | # Test data settings 8 | DEFAULT_MAX_STUDIES = 10 # Number of studies to query in integration tests 9 | STRUCTURE_CHECK_LIMIT = ( 10 | 3 # Number of items to check when verifying data structures 11 | ) 12 | 13 | # Timeout settings 14 | INTEGRATION_TEST_TIMEOUT = 30.0 # Maximum time for integration tests 15 | ``` -------------------------------------------------------------------------------- /src/biomcp/resources/getter.py: -------------------------------------------------------------------------------- ```python 1 | from pathlib import Path 2 | 3 | from .. import mcp_app 4 | 5 | RESOURCES_ROOT = Path(__file__).parent 6 | 7 | 8 | @mcp_app.resource("biomcp://instructions.md") 9 | def get_instructions() -> str: 10 | return (RESOURCES_ROOT / "instructions.md").read_text(encoding="utf-8") 11 | 12 | 13 | @mcp_app.resource("biomcp://researcher.md") 14 | def get_researcher() -> str: 15 | return (RESOURCES_ROOT / "researcher.md").read_text(encoding="utf-8") 16 | ``` -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- ```yaml 1 | version: 2 2 | updates: 3 | # Maintain dependencies for GitHub Actions 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: "weekly" 8 | commit-message: 9 | prefix: "chore" 10 | include: "scope" 11 | 12 | # Maintain dependencies for Python 13 | - package-ecosystem: "pip" 14 | directory: "/" 15 | schedule: 16 | interval: "weekly" 17 | open-pull-requests-limit: 10 18 | commit-message: 19 | prefix: "chore" 20 | include: "scope" 21 | allow: 22 | - dependency-type: "all" 23 | ``` -------------------------------------------------------------------------------- /tests/tdd/conftest.py: -------------------------------------------------------------------------------- ```python 1 | from pathlib import Path 2 | 3 | from pytest import fixture 4 | 5 | from biomcp import http_client 6 | 7 | 8 | @fixture 9 | def anyio_backend(): 10 | return "asyncio" 11 | 12 | 13 | class DummyCache: 14 | def __init__(self): 15 | self.store = {} 16 | 17 | def set(self, key, value, expire=None): 18 | self.store[key] = value 19 | 20 | def get(self, key, default=None): 21 | return self.store.get(key, default) 22 | 23 | @property 24 | def count(self): 25 | return len(self.store) 26 | 27 | def close(self): 28 | self.store.clear() 29 | 30 | 31 | @fixture 32 | def http_cache(): 33 | cache = DummyCache() 34 | http_client._cache = cache 35 | yield cache 36 | cache.close() 37 | 38 | 39 | @fixture 40 | def data_dir(): 41 | return Path(__file__).parent.parent / "data" 42 | ``` -------------------------------------------------------------------------------- /.github/actions/setup-python-env/action.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: "Setup Python Environment" 2 | description: "Set up Python environment for the given Python version" 3 | 4 | inputs: 5 | python-version: 6 | description: "Python version to use" 7 | required: true 8 | default: "3.12" 9 | uv-version: 10 | description: "uv version to use" 11 | required: true 12 | default: "0.5.20" 13 | 14 | runs: 15 | using: "composite" 16 | steps: 17 | - uses: actions/setup-python@v5 18 | with: 19 | python-version: ${{ inputs.python-version }} 20 | 21 | - name: Install uv 22 | uses: astral-sh/setup-uv@v2 23 | with: 24 | version: ${{ inputs.uv-version }} 25 | enable-cache: "true" 26 | cache-suffix: ${{ matrix.python-version }} 27 | 28 | - name: Install Python dependencies 29 | run: uv sync --frozen 30 | shell: bash 31 | ``` -------------------------------------------------------------------------------- /src/biomcp/__init__.py: -------------------------------------------------------------------------------- ```python 1 | from .core import ensure_list, logger, mcp_app, StrEnum 2 | 3 | from . import constants 4 | from . import http_client 5 | from . import render 6 | from . import articles 7 | from . import trials 8 | from . import variants 9 | from . import resources 10 | from . import thinking 11 | from . import query_parser 12 | from . import query_router 13 | from . import router 14 | from . import thinking_tool 15 | from . import individual_tools 16 | from . import cbioportal_helper 17 | 18 | 19 | __all__ = [ 20 | "StrEnum", 21 | "articles", 22 | "cbioportal_helper", 23 | "constants", 24 | "ensure_list", 25 | "http_client", 26 | "individual_tools", 27 | "logger", 28 | "mcp_app", 29 | "query_parser", 30 | "query_router", 31 | "render", 32 | "resources", 33 | "router", 34 | "thinking", 35 | "thinking_tool", 36 | "trials", 37 | "variants", 38 | ] 39 | ``` -------------------------------------------------------------------------------- /docs/developer-guides/generate_endpoints.py: -------------------------------------------------------------------------------- ```python 1 | #!/usr/bin/env python3 2 | """ 3 | Generate third-party endpoints documentation from the endpoint registry. 4 | 5 | This script reads the endpoint registry and generates a markdown file 6 | documenting all third-party API endpoints used by BioMCP. 7 | """ 8 | 9 | import sys 10 | from pathlib import Path 11 | 12 | # Add src to Python path 13 | sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) 14 | 15 | from biomcp.utils.endpoint_registry import EndpointRegistry 16 | 17 | 18 | def main(): 19 | """Generate endpoints documentation.""" 20 | # Initialize registry 21 | registry = EndpointRegistry() 22 | 23 | # Generate markdown report 24 | markdown_content = registry.generate_markdown_report() 25 | 26 | # Write to file 27 | output_path = Path(__file__).parent / "03-third-party-endpoints.md" 28 | output_path.write_text(markdown_content) 29 | 30 | print(f"Generated endpoints documentation: {output_path}") 31 | 32 | 33 | if __name__ == "__main__": 34 | main() 35 | ``` -------------------------------------------------------------------------------- /tests/tdd/articles/test_fetch.py: -------------------------------------------------------------------------------- ```python 1 | import json 2 | 3 | from biomcp.articles.fetch import fetch_articles 4 | 5 | pmids = [39293516, 34397683, 37296959] 6 | 7 | 8 | async def test_fetch_full_text(anyio_backend): 9 | results = await fetch_articles(pmids, full=True, output_json=True) 10 | assert isinstance(results, str) 11 | data = json.loads(results) 12 | assert len(data) == 3 13 | for item in data: 14 | assert item["pmid"] in pmids 15 | assert len(item["title"]) > 10 16 | assert len(item["abstract"]) > 100 17 | assert item["full_text"] is not None 18 | 19 | 20 | async def test_fetch_abstracts(anyio_backend): 21 | results = await fetch_articles(pmids, full=False, output_json=True) 22 | assert isinstance(results, str) 23 | data = json.loads(results) 24 | assert len(data) == 3 25 | for item in data: 26 | assert item["pmid"] in pmids 27 | assert len(item["title"]) > 10 28 | assert len(item["abstract"]) > 100 29 | assert "full_text" not in item 30 | ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | OpenFDA integration for BioMCP. 3 | 4 | Provides access to FDA drug labels, adverse events, device data, 5 | drug approvals, recalls, and shortage information. 6 | """ 7 | 8 | from .adverse_events import ( 9 | search_adverse_events, 10 | get_adverse_event, 11 | ) 12 | from .drug_labels import ( 13 | search_drug_labels, 14 | get_drug_label, 15 | ) 16 | from .device_events import ( 17 | search_device_events, 18 | get_device_event, 19 | ) 20 | from .drug_approvals import ( 21 | search_drug_approvals, 22 | get_drug_approval, 23 | ) 24 | from .drug_recalls import ( 25 | search_drug_recalls, 26 | get_drug_recall, 27 | ) 28 | from .drug_shortages import ( 29 | search_drug_shortages, 30 | get_drug_shortage, 31 | ) 32 | 33 | __all__ = [ 34 | "get_adverse_event", 35 | "get_device_event", 36 | "get_drug_approval", 37 | "get_drug_label", 38 | "get_drug_recall", 39 | "get_drug_shortage", 40 | "search_adverse_events", 41 | "search_device_events", 42 | "search_drug_approvals", 43 | "search_drug_labels", 44 | "search_drug_recalls", 45 | "search_drug_shortages", 46 | ] 47 | ``` -------------------------------------------------------------------------------- /.github/workflows/deploy-docs.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: Deploy Documentation 2 | 3 | on: 4 | # Allows you to manually trigger this workflow from the Actions tab 5 | workflow_dispatch: 6 | 7 | # Automatically trigger on pushes to main IF docs changed 8 | push: 9 | branches: 10 | - main 11 | paths: 12 | - "docs/**" 13 | - "mkdocs.yml" 14 | - ".github/workflows/deploy-docs.yml" 15 | 16 | jobs: 17 | deploy: 18 | runs-on: ubuntu-latest 19 | permissions: 20 | contents: write 21 | steps: 22 | - name: Check out code 23 | uses: actions/checkout@v5 24 | with: 25 | fetch-depth: 0 26 | 27 | - name: Set up Python environment 28 | uses: ./.github/actions/setup-python-env 29 | with: 30 | python-version: "3.11" 31 | uv-version: "0.5.20" 32 | 33 | - name: Configure Git User 34 | run: | 35 | git config user.name "github-actions[bot]" 36 | git config user.email "41898282+github-actions[bot]@users.noreply.github.com" 37 | 38 | - name: Deploy documentation using MkDocs 39 | run: | 40 | uv run mkdocs gh-deploy --force 41 | ``` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- ```dockerfile 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile 2 | FROM python:3.11-slim 3 | 4 | # set work directory 5 | WORKDIR /app 6 | 7 | # Install build dependencies and git (needed for AlphaGenome) 8 | RUN apt-get update && apt-get install -y --no-install-recommends gcc build-essential git && rm -rf /var/lib/apt/lists/* 9 | 10 | # Copy requirements (pyproject.toml, etc.) 11 | COPY pyproject.toml . 12 | COPY README.md . 13 | COPY LICENSE . 14 | 15 | # Copy source code 16 | COPY src ./src 17 | COPY tests ./tests 18 | COPY Makefile . 19 | COPY tox.ini . 20 | 21 | # Install the package with worker dependencies 22 | RUN pip install --upgrade pip && pip install .[worker] 23 | 24 | # Clone and install AlphaGenome 25 | RUN git clone https://github.com/google-deepmind/alphagenome.git /tmp/alphagenome && \ 26 | pip install /tmp/alphagenome && \ 27 | rm -rf /tmp/alphagenome 28 | 29 | # Expose port for remote MCP connections 30 | EXPOSE 8000 31 | 32 | # Set default mode to worker, but allow it to be overridden 33 | ENV MCP_MODE=stdio 34 | 35 | # Run the MCP server with configurable mode 36 | CMD ["sh", "-c", "biomcp run --mode ${MCP_MODE}"] 37 | ``` -------------------------------------------------------------------------------- /src/biomcp/thinking_tracker.py: -------------------------------------------------------------------------------- ```python 1 | """Track thinking tool usage within MCP sessions. 2 | 3 | This module provides a simple mechanism to track whether the think tool 4 | has been used in the current session, encouraging AI clients to follow 5 | best practices. 6 | """ 7 | 8 | from contextvars import ContextVar 9 | 10 | # Track if thinking has been used in current context 11 | thinking_used: ContextVar[bool] = ContextVar("thinking_used", default=False) 12 | 13 | 14 | def mark_thinking_used() -> None: 15 | """Mark that the thinking tool has been used.""" 16 | thinking_used.set(True) 17 | 18 | 19 | def has_thinking_been_used() -> bool: 20 | """Check if thinking tool has been used in current context.""" 21 | return thinking_used.get() 22 | 23 | 24 | def reset_thinking_tracker() -> None: 25 | """Reset the thinking tracker (for testing).""" 26 | thinking_used.set(False) 27 | 28 | 29 | def get_thinking_reminder() -> str: 30 | """Get a reminder message if thinking hasn't been used.""" 31 | if not has_thinking_been_used(): 32 | return ( 33 | "\n\n⚠️ **REMINDER**: You haven't used the 'think' tool yet! " 34 | "For optimal results, please use 'think' BEFORE searching to plan " 35 | "your research strategy and ensure comprehensive analysis." 36 | ) 37 | return "" 38 | ``` -------------------------------------------------------------------------------- /tests/bdd/cli_help/test_help.py: -------------------------------------------------------------------------------- ```python 1 | import shlex 2 | 3 | from pytest_bdd import given, parsers, scenarios, then 4 | from typer.testing import CliRunner 5 | 6 | from biomcp.cli.main import app 7 | 8 | # Link to the feature file 9 | scenarios("help.feature") 10 | 11 | runner = CliRunner() 12 | 13 | 14 | @given(parsers.parse('I run "{command}"'), target_fixture="cli_result") 15 | def cli_result(command): 16 | """ 17 | Run the given CLI command and return the result. 18 | """ 19 | # Remove the initial token ("biomcp") if present 20 | args = ( 21 | shlex.split(command)[1:] 22 | if command.startswith("biomcp") 23 | else shlex.split(command) 24 | ) 25 | result = runner.invoke(app, args) 26 | assert result.exit_code == 0, f"CLI command failed: {result.stderr}" 27 | return result 28 | 29 | 30 | @then(parsers.parse('the output should contain "{expected}"')) 31 | def output_should_contain(cli_result, expected): 32 | """ 33 | Verify that the output contains the expected text. 34 | This helper handles both plain text and rich-formatted text outputs. 35 | """ 36 | # Check if the expected text is in the output, ignoring case 37 | assert ( 38 | expected.lower() in cli_result.stdout.lower() 39 | ), f"Expected output to contain '{expected}', but it did not.\nActual output: {cli_result.stdout}" 40 | ``` -------------------------------------------------------------------------------- /tests/tdd/articles/test_autocomplete.py: -------------------------------------------------------------------------------- ```python 1 | from biomcp.articles.autocomplete import Entity, EntityRequest, autocomplete 2 | 3 | 4 | async def test_autocomplete(anyio_backend, http_cache): 5 | # new cache for each call 6 | assert http_cache.count == 0 7 | 8 | # gene (compare using entity_id directly) 9 | request = EntityRequest(concept="gene", query="her2") 10 | entity = await autocomplete(request=request) 11 | assert entity.entity_id == "@GENE_ERBB2" 12 | 13 | # variant 14 | request = EntityRequest(concept="variant", query="BRAF V600E") 15 | assert await autocomplete(request=request) == Entity( 16 | _id="@VARIANT_p.V600E_BRAF_human", 17 | biotype="variant", 18 | name="p.V600E", 19 | ) 20 | 21 | # disease 22 | request = EntityRequest(concept="disease", query="lung adenocarcinoma") 23 | assert await autocomplete(request=request) == Entity( 24 | _id="@DISEASE_Adenocarcinoma_of_Lung", 25 | biotype="disease", 26 | name="Adenocarcinoma of Lung", 27 | match="Multiple matches", 28 | ) 29 | 30 | assert http_cache.count == 3 31 | 32 | # duplicate request uses the cached response 33 | request = EntityRequest(concept="gene", query="her2") 34 | entity = await autocomplete(request=request) 35 | assert entity.entity_id == "@GENE_ERBB2" 36 | assert http_cache.count == 3 37 | ``` -------------------------------------------------------------------------------- /scripts/generate_endpoints_doc.py: -------------------------------------------------------------------------------- ```python 1 | #!/usr/bin/env python3 2 | """Generate THIRD_PARTY_ENDPOINTS.md documentation.""" 3 | 4 | import shutil 5 | import subprocess 6 | import sys 7 | from pathlib import Path 8 | 9 | # Add src to path 10 | sys.path.insert(0, str(Path(__file__).parent.parent / "src")) 11 | 12 | from biomcp.utils.endpoint_registry import get_registry 13 | 14 | 15 | def main(): 16 | """Generate the endpoints documentation.""" 17 | registry = get_registry() 18 | output_path = Path(__file__).parent.parent / "THIRD_PARTY_ENDPOINTS.md" 19 | 20 | # Generate new content 21 | new_content = registry.generate_markdown_report() 22 | 23 | # Write new content 24 | output_path.write_text(new_content) 25 | 26 | # Run prettier to format the file 27 | npx_path = shutil.which("npx") 28 | if npx_path: 29 | try: 30 | # Safe: npx_path from shutil.which, output_path is controlled 31 | subprocess.run( # noqa: S603 32 | [npx_path, "prettier", "--write", str(output_path)], 33 | check=True, 34 | capture_output=True, 35 | text=True, 36 | ) 37 | except subprocess.CalledProcessError as e: 38 | print(f"Warning: prettier formatting failed: {e.stderr}") 39 | else: 40 | print("Warning: npx not found, skipping prettier formatting") 41 | 42 | print(f"Generated {output_path}") 43 | 44 | 45 | if __name__ == "__main__": 46 | main() 47 | ``` -------------------------------------------------------------------------------- /tests/data/openfda/drugsfda_search.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "meta": { 3 | "results": { 4 | "skip": 0, 5 | "limit": 10, 6 | "total": 25 7 | } 8 | }, 9 | "results": [ 10 | { 11 | "application_number": "BLA125514", 12 | "sponsor_name": "MERCK SHARP DOHME", 13 | "openfda": { 14 | "application_number": ["BLA125514"], 15 | "brand_name": ["KEYTRUDA"], 16 | "generic_name": ["PEMBROLIZUMAB"], 17 | "manufacturer_name": ["Merck Sharp & Dohme Corp."], 18 | "substance_name": ["PEMBROLIZUMAB"] 19 | }, 20 | "products": [ 21 | { 22 | "product_number": "001", 23 | "reference_drug": "Yes", 24 | "brand_name": "KEYTRUDA", 25 | "active_ingredients": [ 26 | { 27 | "name": "PEMBROLIZUMAB", 28 | "strength": "100MG/4ML" 29 | } 30 | ], 31 | "reference_standard": "Yes", 32 | "dosage_form": "INJECTION, SOLUTION", 33 | "route": "INTRAVENOUS", 34 | "marketing_status": "Prescription" 35 | } 36 | ], 37 | "submissions": [ 38 | { 39 | "submission_type": "BLA", 40 | "submission_number": "125514", 41 | "submission_status": "AP", 42 | "submission_status_date": "20140904", 43 | "submission_class_code": "BLA", 44 | "submission_class_code_description": "Biologic License Application" 45 | } 46 | ] 47 | } 48 | ] 49 | } 50 | ``` -------------------------------------------------------------------------------- /tests/tdd/variants/test_filters.py: -------------------------------------------------------------------------------- ```python 1 | """Tests for the filters module.""" 2 | 3 | import json 4 | import os 5 | from typing import Any 6 | 7 | import pytest 8 | 9 | from biomcp.variants.filters import filter_variants 10 | 11 | 12 | @pytest.fixture 13 | def braf_v600e_variants() -> list[dict[str, Any]]: 14 | """Load BRAF V600E test data.""" 15 | test_data_path = os.path.join( 16 | os.path.dirname(__file__), 17 | "../../data/myvariant/variants_full_braf_v600e.json", 18 | ) 19 | with open(test_data_path) as f: 20 | data = json.load(f) 21 | return data.get("hits", []) 22 | 23 | 24 | def test_filter_variants_civic_contributors(braf_v600e_variants): 25 | """Test filtering out civic.contributors path.""" 26 | # Verify that civic.contributors exists in original data 27 | variant = braf_v600e_variants[0] 28 | assert "civic" in variant 29 | assert "contributors" in variant["civic"] 30 | assert variant["civic"]["contributors"] is not None 31 | 32 | # Filter out civic.contributors 33 | filtered = filter_variants(braf_v600e_variants) 34 | 35 | # Verify civic.contributors is removed but civic section remains 36 | filtered_variant = filtered[0] 37 | assert "civic" in filtered_variant 38 | assert "contributors" not in filtered_variant["civic"] 39 | 40 | # Verify other civic data is preserved 41 | assert "id" in filtered_variant["civic"] 42 | assert filtered_variant["civic"]["id"] == variant["civic"]["id"] 43 | ``` -------------------------------------------------------------------------------- /tests/bdd/search_articles/test_autocomplete.py: -------------------------------------------------------------------------------- ```python 1 | import asyncio 2 | 3 | from pytest_bdd import given, parsers, scenarios, then, when 4 | 5 | from biomcp.articles.autocomplete import ( 6 | Concept, 7 | Entity, 8 | EntityRequest, 9 | autocomplete, 10 | ) 11 | 12 | scenarios("autocomplete.feature") 13 | 14 | 15 | @given( 16 | parsers.parse( 17 | 'I have a valid concept "{concept}" and a valid query "{query}"', 18 | ), 19 | target_fixture="entity_request", 20 | ) 21 | def entity_request(concept: Concept, query: str): 22 | return EntityRequest(concept=concept, query=query) 23 | 24 | 25 | @given( 26 | parsers.parse( 27 | 'I have a valid concept "{concept}" and an invalid query "{query}"', 28 | ), 29 | target_fixture="entity_request", 30 | ) 31 | def invalid_query_request(concept: Concept, query: str): 32 | return EntityRequest(concept=concept, query=query) 33 | 34 | 35 | @when( 36 | "I call the Pubtator Autocomplete API", 37 | target_fixture="entity", 38 | ) 39 | def entity(entity_request) -> Entity | None: 40 | return asyncio.run(autocomplete(request=entity_request)) 41 | 42 | 43 | @then(parsers.parse('the response entity_id should be "{expected_id}"')) 44 | def check_entity_id(entity, expected_id): 45 | assert entity.entity_id == expected_id 46 | 47 | 48 | @then(parsers.parse('the response concept should be "{concept}"')) 49 | def check_concept(entity, concept): 50 | assert entity.concept == concept 51 | 52 | 53 | @then("the response should be empty") 54 | def check_empty_response(entity): 55 | assert entity is None 56 | ``` -------------------------------------------------------------------------------- /src/biomcp/utils/gene_validator.py: -------------------------------------------------------------------------------- ```python 1 | """Gene symbol validation utilities.""" 2 | 3 | import re 4 | 5 | # Common gene symbol patterns 6 | GENE_SYMBOL_PATTERN = re.compile(r"^[A-Z][A-Z0-9-]*(\.[0-9]+)?$") 7 | 8 | # Known problematic or invalid gene symbols 9 | INVALID_GENES = { 10 | "INVALID", 11 | "UNKNOWN", 12 | "NULL", 13 | "NONE", 14 | "TEST", 15 | "INVALID_GENE_XYZ", 16 | } 17 | 18 | 19 | def is_valid_gene_symbol(gene: str | None) -> bool: 20 | """Validate if a string is a valid gene symbol. 21 | 22 | Args: 23 | gene: The gene symbol to validate 24 | 25 | Returns: 26 | True if the gene symbol appears valid, False otherwise 27 | 28 | Notes: 29 | - Gene symbols should start with a letter 30 | - Can contain letters, numbers, and hyphens 31 | - May have a version suffix (e.g., .1, .2) 32 | - Should be uppercase 33 | - Should not be in the invalid genes list 34 | """ 35 | if not gene: 36 | return False 37 | 38 | gene = gene.strip() 39 | 40 | # Check length constraints 41 | if len(gene) < 2 or len(gene) > 20: 42 | return False 43 | 44 | # Check against known invalid genes 45 | if gene.upper() in INVALID_GENES: 46 | return False 47 | 48 | # Check pattern 49 | return bool(GENE_SYMBOL_PATTERN.match(gene)) 50 | 51 | 52 | def sanitize_gene_symbol(gene: str) -> str: 53 | """Sanitize a gene symbol for API calls. 54 | 55 | Args: 56 | gene: The gene symbol to sanitize 57 | 58 | Returns: 59 | Sanitized gene symbol in uppercase with whitespace stripped 60 | """ 61 | return gene.strip().upper() 62 | ``` -------------------------------------------------------------------------------- /tests/bdd/search_articles/test_search.py: -------------------------------------------------------------------------------- ```python 1 | """Test steps for search_pubmed feature.""" 2 | 3 | from __future__ import annotations 4 | 5 | import asyncio 6 | import json 7 | 8 | from pytest_bdd import given, parsers, scenarios, then, when 9 | 10 | from biomcp.articles.search import ( 11 | PubmedRequest, 12 | search_articles, 13 | ) 14 | 15 | scenarios("search.feature") 16 | 17 | 18 | @given( 19 | parsers.parse('I build a query for "{gene}" "{disease}" "{variant}"'), 20 | target_fixture="query", 21 | ) 22 | def query(gene, disease, variant) -> PubmedRequest: 23 | return PubmedRequest( 24 | genes=[gene], 25 | diseases=[disease], 26 | variants=[variant], 27 | ) 28 | 29 | 30 | @when("I perform a search with that query", target_fixture="result") 31 | def result(query) -> list[dict]: 32 | text = asyncio.run(search_articles(query, output_json=True)) 33 | return json.loads(text) 34 | 35 | 36 | @then(parsers.parse('the response should contain the article "{pmid:d}"')) 37 | def step_impl(result: list[dict], pmid: int): 38 | pm_ids = [article["pmid"] for article in result] 39 | assert pmid in pm_ids, "pmid not found in {pm_ids}" 40 | 41 | 42 | @then( 43 | parsers.parse('the article "{pmid:d}" abstract should contain "{phrase}"'), 44 | ) 45 | def step_check_abstract(result: list[dict], pmid: int, phrase: str): 46 | for r in result: 47 | if r["pmid"] == pmid and r.get("abstract"): 48 | assert ( 49 | phrase in r["abstract"] 50 | ), f"Phrase '{phrase}' not found in article {pmid}'s abstract" 51 | return 52 | raise AssertionError(f"Article {pmid} not found or has no abstract") 53 | ``` -------------------------------------------------------------------------------- /src/biomcp/workers/worker.py: -------------------------------------------------------------------------------- ```python 1 | """Worker implementation for BioMCP.""" 2 | 3 | from fastapi import FastAPI, Response 4 | from fastapi.middleware.cors import CORSMiddleware 5 | from starlette.responses import JSONResponse 6 | from starlette.routing import Route 7 | 8 | from .. import mcp_app 9 | 10 | app = FastAPI(title="BioMCP Worker", version="0.1.10") 11 | 12 | # Add CORS middleware 13 | app.add_middleware( 14 | CORSMiddleware, 15 | allow_origins=["*"], 16 | allow_credentials=True, 17 | allow_methods=["*"], 18 | allow_headers=["*"], 19 | ) 20 | 21 | streamable_app = mcp_app.streamable_http_app() 22 | 23 | 24 | # Add health endpoint to the streamable app before mounting 25 | async def health_check(request): 26 | return JSONResponse({"status": "healthy"}) 27 | 28 | 29 | health_route = Route("/health", health_check, methods=["GET"]) 30 | streamable_app.routes.append(health_route) 31 | 32 | app.mount("/", streamable_app) 33 | 34 | 35 | # Health endpoint is now added directly to the streamable_app above 36 | 37 | 38 | # Add OPTIONS endpoint for CORS preflight 39 | @app.options("/{path:path}") 40 | async def options_handler(path: str): 41 | """Handle CORS preflight requests.""" 42 | return Response( 43 | content="", 44 | status_code=204, 45 | headers={ 46 | "Access-Control-Allow-Origin": "*", 47 | "Access-Control-Allow-Methods": "GET, POST, OPTIONS", 48 | "Access-Control-Allow-Headers": "*", 49 | "Access-Control-Max-Age": "86400", # 24 hours 50 | }, 51 | ) 52 | 53 | 54 | # Create a stub for create_worker_app to satisfy imports 55 | def create_worker_app() -> FastAPI: 56 | """Stub for create_worker_app to satisfy import in __init__.py.""" 57 | return app 58 | ``` -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- ```python 1 | """Pytest configuration and fixtures.""" 2 | 3 | import os 4 | from unittest.mock import AsyncMock, patch 5 | 6 | import pytest 7 | 8 | # Check if we should skip integration tests 9 | SKIP_INTEGRATION = os.environ.get("SKIP_INTEGRATION_TESTS", "").lower() in ( 10 | "true", 11 | "1", 12 | "yes", 13 | ) 14 | 15 | 16 | def pytest_configure(config): 17 | """Configure pytest with custom markers.""" 18 | config.addinivalue_line( 19 | "markers", 20 | "integration: marks tests as integration tests (deselect with '-m \"not integration\"')", 21 | ) 22 | 23 | 24 | def pytest_collection_modifyitems(config, items): 25 | """Modify test collection to handle integration tests.""" 26 | if SKIP_INTEGRATION: 27 | skip_integration = pytest.mark.skip( 28 | reason="Integration tests disabled via SKIP_INTEGRATION_TESTS env var" 29 | ) 30 | for item in items: 31 | if "integration" in item.keywords: 32 | item.add_marker(skip_integration) 33 | 34 | 35 | @pytest.fixture 36 | def mock_cbioportal_api(): 37 | """Mock cBioPortal API responses for testing.""" 38 | with patch( 39 | "biomcp.variants.cbioportal_search.CBioPortalSearchClient.get_gene_search_summary" 40 | ) as mock: 41 | # Return a mock summary 42 | mock.return_value = AsyncMock( 43 | gene="BRAF", 44 | total_mutations=1000, 45 | total_samples_tested=2000, 46 | mutation_frequency=50.0, 47 | hotspots=[ 48 | AsyncMock(amino_acid_change="V600E", count=800), 49 | AsyncMock(amino_acid_change="V600K", count=100), 50 | ], 51 | cancer_distribution=["Melanoma", "Colorectal Cancer"], 52 | study_count=10, 53 | ) 54 | yield mock 55 | ``` -------------------------------------------------------------------------------- /tests/bdd/get_variants/test_get.py: -------------------------------------------------------------------------------- ```python 1 | import json 2 | import shlex 3 | 4 | from pytest_bdd import given, parsers, scenarios, then 5 | from typer.testing import CliRunner 6 | 7 | from biomcp.cli.main import app 8 | 9 | # Link to the feature file 10 | scenarios("get.feature") 11 | 12 | runner = CliRunner() 13 | 14 | 15 | @given(parsers.parse('I run "{command}"'), target_fixture="cli_result") 16 | def cli_result(command): 17 | """ 18 | Run the given CLI command and return the parsed JSON output. 19 | The command is expected to include the '--json' flag. 20 | """ 21 | args = shlex.split(command)[1:] # remove the leading "biomcp" token 22 | result = runner.invoke(app, args) 23 | assert result.exit_code == 0, f"CLI command failed: {result.stderr}" 24 | return json.loads(result.stdout) 25 | 26 | 27 | def get_field_value_from_variant(variant, field_path): 28 | """ 29 | Retrieve a value from a variant dictionary using a simple dot-notation path. 30 | (This version does not support array indexing.) 31 | """ 32 | parts = field_path.split(".") 33 | value = variant 34 | for part in parts: 35 | value = value.get(part) 36 | if value is None: 37 | break 38 | return value 39 | 40 | 41 | @then( 42 | parsers.parse( 43 | 'at least one variant should have field "{field}" equal to "{expected}"' 44 | ) 45 | ) 46 | def variant_field_should_equal(cli_result, field, expected): 47 | """ 48 | Verify that at least one variant in the returned list has the specified field equal to the expected value. 49 | """ 50 | # cli_result is already a list of variant dicts. 51 | matching = [ 52 | v 53 | for v in cli_result 54 | if str(get_field_value_from_variant(v, field)) == expected 55 | ] 56 | assert ( 57 | matching 58 | ), f"No variant found with field '{field}' equal to '{expected}'" 59 | ``` -------------------------------------------------------------------------------- /tests/bdd/get_trials/test_get.py: -------------------------------------------------------------------------------- ```python 1 | import json 2 | import shlex 3 | 4 | from pytest_bdd import given, parsers, scenarios, then 5 | from typer.testing import CliRunner 6 | 7 | from biomcp.cli.main import app 8 | 9 | # Link to the feature file 10 | scenarios("get.feature") 11 | 12 | runner = CliRunner() 13 | 14 | 15 | @given(parsers.parse('I run "{command}"'), target_fixture="cli_result") 16 | def cli_result(command): 17 | """ 18 | Run the given CLI command and return the parsed JSON output. 19 | The command is expected to include the '--json' flag. 20 | """ 21 | # Remove the initial token ("biomcp") if present. 22 | args = shlex.split(command)[1:] 23 | result = runner.invoke(app, args) 24 | assert result.exit_code == 0, f"CLI command failed: {result.stderr}" 25 | return json.loads(result.stdout) 26 | 27 | 28 | def get_field_value(data, field_path): 29 | """ 30 | Access a nested dictionary value using a dot-notation path. 31 | Supports array notation like "locations[0]". 32 | """ 33 | parts = field_path.split(".") 34 | value = data 35 | for part in parts: 36 | if "[" in part and part.endswith("]"): 37 | # e.g. "locations[0]" 38 | base, index_str = part[:-1].split("[") 39 | index = int(index_str) 40 | value = value[base][index] 41 | else: 42 | value = value[part] 43 | return value 44 | 45 | 46 | @then(parsers.parse('the field "{field}" should equal "{expected}"')) 47 | def field_should_equal(cli_result, field, expected): 48 | """ 49 | Verify that the value at the specified dot-notation field equals the expected value. 50 | """ 51 | actual = get_field_value(cli_result, field) 52 | # Compare as strings for simplicity. 53 | assert ( 54 | str(actual) == expected 55 | ), f"Expected field '{field}' to equal '{expected}', but got '{actual}'" 56 | ``` -------------------------------------------------------------------------------- /tests/bdd/conftest.py: -------------------------------------------------------------------------------- ```python 1 | import pytest 2 | 3 | 4 | def _recursive_extract(current_value, key_path, path_index): 5 | """Recursively extract values based on the key path.""" 6 | if path_index >= len(key_path): 7 | if isinstance(current_value, list): 8 | yield from current_value 9 | else: 10 | yield current_value 11 | 12 | else: 13 | k = key_path[path_index] 14 | if isinstance(current_value, dict): 15 | next_value = current_value.get(k) 16 | if next_value is not None: 17 | yield from _recursive_extract( 18 | next_value, 19 | key_path, 20 | path_index + 1, 21 | ) 22 | 23 | elif isinstance(current_value, list): 24 | for item in current_value: 25 | if isinstance(item, dict): 26 | next_value = item.get(k) 27 | if next_value is not None: 28 | yield from _recursive_extract( 29 | next_value, 30 | key_path, 31 | path_index + 1, 32 | ) 33 | 34 | 35 | def iter_value(field_map: dict, data: dict | list, key: str): 36 | """Iterates through a nested structure, yielding all values encountered.""" 37 | if isinstance(data, dict): 38 | # Handle new format with cBioPortal summary 39 | hits = data["variants"] if "variants" in data else data.get("hits", []) 40 | else: 41 | hits = data 42 | key_path = field_map.get(key, [key]) 43 | 44 | # num = variant number for tracking each individual variant 45 | for num, hit in enumerate(hits, 1): 46 | for value in _recursive_extract(hit, key_path, 0): 47 | yield num, value 48 | 49 | 50 | @pytest.fixture(scope="module") 51 | def it() -> callable: 52 | return iter_value 53 | ``` -------------------------------------------------------------------------------- /tests/data/openfda/enforcement_detail.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "meta": { 3 | "results": { 4 | "skip": 0, 5 | "limit": 1, 6 | "total": 1 7 | } 8 | }, 9 | "results": [ 10 | { 11 | "country": "United States", 12 | "city": "Princeton", 13 | "reason_for_recall": "Presence of N-Nitrosodimethylamine (NDMA) impurity above the acceptable daily intake limit", 14 | "address_1": "One Merck Drive", 15 | "address_2": "Building 5", 16 | "product_quantity": "5,432 bottles", 17 | "code_info": "Lot numbers: AB1234 (Exp 12/2024), CD5678 (Exp 01/2025), EF9012 (Exp 02/2025)", 18 | "center_classification_date": "20230615", 19 | "distribution_pattern": "Nationwide distribution to wholesalers and retail pharmacies in all 50 states", 20 | "state": "NJ", 21 | "product_description": "Valsartan Tablets USP, 160 mg, 90 count bottles, NDC 0378-5160-90", 22 | "report_date": "20230622", 23 | "classification": "Class II", 24 | "openfda": { 25 | "application_number": ["ANDA090802"], 26 | "brand_name": ["VALSARTAN"], 27 | "generic_name": ["VALSARTAN"], 28 | "manufacturer_name": ["Mylan Pharmaceuticals Inc."], 29 | "product_ndc": ["0378-5160"], 30 | "package_ndc": ["0378-5160-90"], 31 | "unii": ["80M03YXJ7I"], 32 | "spl_set_id": ["4b5c5f6d-7e8f-9g0h-1i2j-3k4l5m6n7o8p"] 33 | }, 34 | "more_code_info": "Manufacturing dates: January 2023 - March 2023", 35 | "recalling_firm": "Mylan Pharmaceuticals Inc.", 36 | "recall_number": "D-0001-2023", 37 | "initial_firm_notification": "Letter", 38 | "product_type": "Drugs", 39 | "event_id": "91234", 40 | "termination_date": "", 41 | "recall_initiation_date": "20230610", 42 | "postal_code": "08540-0004", 43 | "voluntary_mandated": "Voluntary: Firm Initiated", 44 | "status": "Ongoing" 45 | } 46 | ] 47 | } 48 | ``` -------------------------------------------------------------------------------- /tests/data/openfda/enforcement_search.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "meta": { 3 | "results": { 4 | "skip": 0, 5 | "limit": 10, 6 | "total": 45 7 | } 8 | }, 9 | "results": [ 10 | { 11 | "country": "United States", 12 | "city": "Princeton", 13 | "reason_for_recall": "Presence of N-Nitrosodimethylamine (NDMA) impurity", 14 | "address_1": "One Merck Drive", 15 | "address_2": "", 16 | "product_quantity": "5,432 bottles", 17 | "code_info": "Lot numbers: AB1234, CD5678, EF9012", 18 | "center_classification_date": "20230615", 19 | "distribution_pattern": "Nationwide", 20 | "state": "NJ", 21 | "product_description": "Valsartan Tablets USP, 160 mg, 90 count bottles", 22 | "report_date": "20230622", 23 | "classification": "Class II", 24 | "openfda": { 25 | "application_number": ["ANDA090802"], 26 | "brand_name": ["VALSARTAN"], 27 | "generic_name": ["VALSARTAN"], 28 | "manufacturer_name": ["Mylan Pharmaceuticals Inc."] 29 | }, 30 | "recalling_firm": "Mylan Pharmaceuticals Inc.", 31 | "recall_number": "D-0001-2023", 32 | "initial_firm_notification": "Letter", 33 | "product_type": "Drugs", 34 | "event_id": "91234", 35 | "recall_initiation_date": "20230610", 36 | "postal_code": "08540", 37 | "voluntary_mandated": "Voluntary: Firm Initiated", 38 | "status": "Ongoing" 39 | }, 40 | { 41 | "country": "United States", 42 | "city": "New York", 43 | "reason_for_recall": "Contamination with foreign substance", 44 | "product_quantity": "10,000 units", 45 | "classification": "Class I", 46 | "product_description": "Metformin Hydrochloride Extended-Release Tablets, 500 mg", 47 | "report_date": "20230515", 48 | "recalling_firm": "Generic Pharma Corp", 49 | "recall_number": "D-0002-2023", 50 | "recall_initiation_date": "20230510", 51 | "status": "Completed" 52 | } 53 | ] 54 | } 55 | ``` -------------------------------------------------------------------------------- /src/biomcp/logging_filter.py: -------------------------------------------------------------------------------- ```python 1 | """Logging filter to suppress non-critical ASGI errors.""" 2 | 3 | import logging 4 | 5 | 6 | class ASGIErrorFilter(logging.Filter): 7 | """Filter out non-critical ASGI/Starlette middleware errors.""" 8 | 9 | def filter(self, record: logging.LogRecord) -> bool: 10 | """Return False to suppress the log record, True to allow it.""" 11 | 12 | # Check if this is an ASGI error we want to suppress 13 | if record.levelname == "ERROR": 14 | message = str(record.getMessage()) 15 | 16 | # Suppress known non-critical ASGI errors 17 | if "Exception in ASGI application" in message: 18 | return False 19 | if "AssertionError" in message and "http.response.body" in message: 20 | return False 21 | if ( 22 | "unhandled errors in a TaskGroup" in message 23 | and hasattr(record, "exc_info") 24 | and record.exc_info 25 | ): 26 | exc_type, exc_value, _ = record.exc_info 27 | if exc_type and "AssertionError" in str(exc_type): 28 | return False 29 | 30 | # Allow all other logs 31 | return True 32 | 33 | 34 | def setup_logging_filters(): 35 | """Set up logging filters to suppress non-critical errors.""" 36 | 37 | # Add filter to uvicorn error logger 38 | uvicorn_logger = logging.getLogger("uvicorn.error") 39 | uvicorn_logger.addFilter(ASGIErrorFilter()) 40 | 41 | # Add filter to uvicorn access logger 42 | uvicorn_access_logger = logging.getLogger("uvicorn.access") 43 | uvicorn_access_logger.addFilter(ASGIErrorFilter()) 44 | 45 | # Add filter to starlette logger 46 | starlette_logger = logging.getLogger("starlette") 47 | starlette_logger.addFilter(ASGIErrorFilter()) 48 | 49 | # Add filter to fastapi logger 50 | fastapi_logger = logging.getLogger("fastapi") 51 | fastapi_logger.addFilter(ASGIErrorFilter()) 52 | ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/drug_shortages_detail_helpers.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Helper functions for formatting drug shortage details. 3 | """ 4 | 5 | from typing import Any 6 | 7 | 8 | def format_shortage_status(shortage: dict[str, Any]) -> list[str]: 9 | """Format status information for shortage detail.""" 10 | output = [] 11 | 12 | status = shortage.get("status", "Unknown") 13 | status_emoji = "🔴" if "current" in status.lower() else "🟢" 14 | output.append(f"{status_emoji} **Status**: {status}") 15 | 16 | return output 17 | 18 | 19 | def format_shortage_names(shortage: dict[str, Any]) -> list[str]: 20 | """Format drug names for shortage detail.""" 21 | output = [] 22 | 23 | if generic := shortage.get("generic_name"): 24 | output.append(f"**Generic Name**: {generic}") 25 | 26 | brands = shortage.get("brand_names") 27 | if brands and brands[0]: 28 | output.append(f"**Brand Names**: {', '.join(brands)}") 29 | 30 | return output 31 | 32 | 33 | def format_shortage_timeline(shortage: dict[str, Any]) -> list[str]: 34 | """Format timeline information for shortage detail.""" 35 | output = ["### Timeline"] 36 | 37 | if start_date := shortage.get("shortage_start_date"): 38 | output.append(f"**Shortage Started**: {start_date}") 39 | 40 | if resolution_date := shortage.get("resolution_date"): 41 | output.append(f"**Resolved**: {resolution_date}") 42 | elif estimated := shortage.get("estimated_resolution"): 43 | output.append(f"**Estimated Resolution**: {estimated}") 44 | else: 45 | output.append("**Estimated Resolution**: Unknown") 46 | 47 | return output 48 | 49 | 50 | def format_shortage_details_section(shortage: dict[str, Any]) -> list[str]: 51 | """Format details section for shortage detail.""" 52 | output = ["### Details"] 53 | 54 | if reason := shortage.get("reason"): 55 | output.append(f"**Reason for Shortage**:\n{reason}") 56 | 57 | if notes := shortage.get("notes"): 58 | from .utils import clean_text 59 | 60 | output.append(f"\n**Additional Notes**:\n{clean_text(notes)}") 61 | 62 | return output 63 | ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/exceptions.py: -------------------------------------------------------------------------------- ```python 1 | """Custom exceptions for OpenFDA integration.""" 2 | 3 | 4 | class OpenFDAError(Exception): 5 | """Base exception for OpenFDA-related errors.""" 6 | 7 | pass 8 | 9 | 10 | class OpenFDARateLimitError(OpenFDAError): 11 | """Raised when FDA API rate limit is exceeded.""" 12 | 13 | def __init__(self, message: str = "FDA API rate limit exceeded"): 14 | super().__init__(message) 15 | self.message = message 16 | 17 | 18 | class OpenFDAValidationError(OpenFDAError): 19 | """Raised when FDA response validation fails.""" 20 | 21 | def __init__(self, message: str = "Invalid FDA API response"): 22 | super().__init__(message) 23 | self.message = message 24 | 25 | 26 | class OpenFDAConnectionError(OpenFDAError): 27 | """Raised when connection to FDA API fails.""" 28 | 29 | def __init__(self, message: str = "Failed to connect to FDA API"): 30 | super().__init__(message) 31 | self.message = message 32 | 33 | 34 | class OpenFDANotFoundError(OpenFDAError): 35 | """Raised when requested resource is not found.""" 36 | 37 | def __init__(self, resource_type: str, resource_id: str): 38 | message = f"{resource_type} not found: {resource_id}" 39 | super().__init__(message) 40 | self.resource_type = resource_type 41 | self.resource_id = resource_id 42 | self.message = message 43 | 44 | 45 | class OpenFDATimeoutError(OpenFDAError): 46 | """Raised when FDA API request times out.""" 47 | 48 | def __init__(self, message: str = "FDA API request timeout"): 49 | super().__init__(message) 50 | self.message = message 51 | 52 | 53 | class OpenFDAInvalidParameterError(OpenFDAError): 54 | """Raised when invalid parameters are provided.""" 55 | 56 | def __init__(self, parameter: str, value: str, reason: str): 57 | message = ( 58 | f"Invalid parameter '{parameter}' with value '{value}': {reason}" 59 | ) 60 | super().__init__(message) 61 | self.parameter = parameter 62 | self.value = value 63 | self.reason = reason 64 | self.message = message 65 | ``` -------------------------------------------------------------------------------- /tests/bdd/fetch_articles/test_fetch.py: -------------------------------------------------------------------------------- ```python 1 | import json 2 | import shlex 3 | 4 | from pytest_bdd import given, parsers, scenarios, then 5 | from typer.testing import CliRunner 6 | 7 | from biomcp.cli.main import app 8 | 9 | scenarios("fetch.feature") 10 | 11 | runner = CliRunner() 12 | 13 | 14 | @given(parsers.parse('I run "{command}"'), target_fixture="cli_result") 15 | def cli_result(command): 16 | """Run the given CLI command and return the parsed JSON output.""" 17 | args = shlex.split(command)[1:] 18 | result = runner.invoke(app, args) 19 | return json.loads(result.stdout) 20 | 21 | 22 | @then("the JSON output should be a non-empty list") 23 | def check_non_empty_list(cli_result): 24 | """Check that the JSON output is a list with at least one article.""" 25 | assert isinstance(cli_result, list), "Expected JSON output to be a list" 26 | assert len(cli_result) > 0, "Expected at least one article in the output" 27 | 28 | 29 | @then("the first article's abstract should be populated") 30 | def check_abstract_populated(cli_result): 31 | """Check that the first article has a non-empty abstract.""" 32 | article = cli_result[0] 33 | abstract = article.get("abstract") 34 | assert abstract is not None, "Abstract field is missing" 35 | assert abstract.strip() != "", "Abstract field is empty" 36 | 37 | 38 | @then("the application should return an error") 39 | def step_impl(cli_result): 40 | assert cli_result == [ 41 | {"error": 'Error 400: {"detail":"Could not retrieve publications"}'} 42 | ] 43 | 44 | 45 | @then("the first article should have a DOI field") 46 | def check_doi_field(cli_result): 47 | """Check that the first article has a DOI field.""" 48 | article = cli_result[0] 49 | doi = article.get("doi") 50 | assert doi is not None, "DOI field is missing" 51 | assert doi.startswith("10."), f"Invalid DOI format: {doi}" 52 | 53 | 54 | @then("the source should be Europe PMC") 55 | def check_europe_pmc_source(cli_result): 56 | """Check that the article source is Europe PMC.""" 57 | article = cli_result[0] 58 | source = article.get("source") 59 | assert ( 60 | source == "Europe PMC" 61 | ), f"Expected source 'Europe PMC', got '{source}'" 62 | ``` -------------------------------------------------------------------------------- /src/biomcp/metrics_handler.py: -------------------------------------------------------------------------------- ```python 1 | """MCP handler for metrics collection.""" 2 | 3 | from typing import Annotated 4 | 5 | from biomcp.core import mcp_app 6 | from biomcp.metrics import get_all_metrics, get_metric_summary 7 | 8 | 9 | @mcp_app.tool() 10 | async def get_performance_metrics( 11 | metric_name: Annotated[ 12 | str | None, 13 | "Specific metric name to retrieve, or None for all metrics", 14 | ] = None, 15 | ) -> str: 16 | """Get performance metrics for BioMCP operations. 17 | 18 | Returns performance statistics including: 19 | - Request counts and success rates 20 | - Response time percentiles (p50, p95, p99) 21 | - Error rates and types 22 | - Domain-specific performance breakdown 23 | 24 | Parameters: 25 | metric_name: Optional specific metric to retrieve 26 | 27 | Returns: 28 | Formatted metrics report 29 | """ 30 | if metric_name: 31 | summary = await get_metric_summary(metric_name) 32 | if not summary: 33 | return f"No metrics found for '{metric_name}'" 34 | 35 | return _format_summary(summary) 36 | else: 37 | all_summaries = await get_all_metrics() 38 | if not all_summaries: 39 | return "No metrics collected yet" 40 | 41 | lines = ["# BioMCP Performance Metrics\n"] 42 | for name in sorted(all_summaries.keys()): 43 | summary = all_summaries[name] 44 | lines.append(f"## {name}") 45 | lines.append(_format_summary(summary)) 46 | lines.append("") 47 | 48 | return "\n".join(lines) 49 | 50 | 51 | def _format_summary(summary) -> str: 52 | """Format a metric summary for display.""" 53 | lines = [ 54 | f"- Total requests: {summary.count}", 55 | f"- Success rate: {(1 - summary.error_rate) * 100:.1f}%", 56 | f"- Errors: {summary.error_count}", 57 | "", 58 | "### Response Times", 59 | f"- Average: {summary.avg_duration * 1000:.1f}ms", 60 | f"- Min: {summary.min_duration * 1000:.1f}ms", 61 | f"- Max: {summary.max_duration * 1000:.1f}ms", 62 | f"- P50: {summary.p50_duration * 1000:.1f}ms", 63 | f"- P95: {summary.p95_duration * 1000:.1f}ms", 64 | f"- P99: {summary.p99_duration * 1000:.1f}ms", 65 | ] 66 | 67 | return "\n".join(lines) 68 | ``` -------------------------------------------------------------------------------- /scripts/check_docs_in_mkdocs.py: -------------------------------------------------------------------------------- ```python 1 | #!/usr/bin/env python3 2 | """Check that all markdown files in docs/ are referenced in mkdocs.yml.""" 3 | 4 | import sys 5 | from pathlib import Path 6 | 7 | import yaml # DEP004 8 | 9 | 10 | def main(): 11 | """Check documentation files are in mkdocs.yml.""" 12 | docs_dir = Path(__file__).parent.parent / "docs" 13 | mkdocs_path = Path(__file__).parent.parent / "mkdocs.yml" 14 | 15 | # Load mkdocs.yml 16 | with open(mkdocs_path) as f: 17 | mkdocs_config = yaml.safe_load(f) 18 | 19 | # Extract all referenced files from nav 20 | referenced_files = set() 21 | 22 | def extract_files(nav_item, prefix=""): 23 | """Recursively extract file paths from nav structure.""" 24 | if isinstance(nav_item, dict): 25 | for _key, value in nav_item.items(): 26 | extract_files(value, prefix) 27 | elif isinstance(nav_item, list): 28 | for item in nav_item: 29 | extract_files(item, prefix) 30 | elif isinstance(nav_item, str) and nav_item.endswith(".md"): 31 | referenced_files.add(nav_item) 32 | 33 | extract_files(mkdocs_config.get("nav", [])) 34 | 35 | # Find all markdown files in docs/ 36 | all_md_files = set() 37 | for md_file in docs_dir.rglob("*.md"): 38 | # Get relative path from docs/ 39 | rel_path = md_file.relative_to(docs_dir) 40 | all_md_files.add(str(rel_path)) 41 | 42 | # Find unreferenced files 43 | unreferenced = all_md_files - referenced_files 44 | 45 | # Exclude some files that shouldn't be in nav 46 | exclude_patterns = { 47 | "CNAME", # GitHub pages config 48 | "README.md", # If exists 49 | } 50 | 51 | unreferenced = { 52 | f 53 | for f in unreferenced 54 | if not any(pattern in f for pattern in exclude_patterns) 55 | } 56 | 57 | if unreferenced: 58 | print( 59 | "The following documentation files are not referenced in mkdocs.yml:" 60 | ) 61 | for file in sorted(unreferenced): 62 | print(f" - {file}") 63 | print("\nPlease add them to the appropriate section in mkdocs.yml") 64 | return 1 65 | else: 66 | print("All documentation files are referenced in mkdocs.yml ✓") 67 | return 0 68 | 69 | 70 | if __name__ == "__main__": 71 | sys.exit(main()) 72 | ``` -------------------------------------------------------------------------------- /src/biomcp/cbioportal_helper.py: -------------------------------------------------------------------------------- ```python 1 | """Helper module for cBioPortal integration across tools. 2 | 3 | This module centralizes cBioPortal summary generation logic to avoid duplication. 4 | """ 5 | 6 | import logging 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | async def get_cbioportal_summary_for_genes( 12 | genes: list[str] | None, request_params: dict | None = None 13 | ) -> str | None: 14 | """Get cBioPortal summary for given genes. 15 | 16 | Args: 17 | genes: List of gene symbols to get summaries for 18 | request_params: Optional additional parameters for the request 19 | 20 | Returns: 21 | Formatted cBioPortal summary or None if unavailable 22 | """ 23 | if not genes: 24 | return None 25 | 26 | try: 27 | from biomcp.articles.search import PubmedRequest 28 | from biomcp.articles.unified import _get_cbioportal_summary 29 | 30 | # Create a request object for cBioPortal summary 31 | request = PubmedRequest(genes=genes) 32 | 33 | # Add any additional parameters if provided 34 | if request_params: 35 | for key, value in request_params.items(): 36 | if hasattr(request, key): 37 | setattr(request, key, value) 38 | 39 | cbioportal_summary = await _get_cbioportal_summary(request) 40 | return cbioportal_summary 41 | 42 | except Exception as e: 43 | logger.warning(f"Failed to get cBioPortal summary: {e}") 44 | return None 45 | 46 | 47 | async def get_variant_cbioportal_summary(gene: str | None) -> str | None: 48 | """Get cBioPortal summary for variant searches. 49 | 50 | Args: 51 | gene: Gene symbol to get summary for 52 | 53 | Returns: 54 | Formatted cBioPortal summary or None if unavailable 55 | """ 56 | if not gene: 57 | return None 58 | 59 | try: 60 | from biomcp.variants.cbioportal_search import ( 61 | CBioPortalSearchClient, 62 | format_cbioportal_search_summary, 63 | ) 64 | 65 | client = CBioPortalSearchClient() 66 | summary = await client.get_gene_search_summary(gene) 67 | if summary: 68 | return format_cbioportal_search_summary(summary) 69 | return None 70 | 71 | except Exception as e: 72 | logger.warning( 73 | f"Failed to get cBioPortal summary for variant search: {e}" 74 | ) 75 | return None 76 | ``` -------------------------------------------------------------------------------- /src/biomcp/utils/rate_limiter.py: -------------------------------------------------------------------------------- ```python 1 | """Simple rate limiting utilities for API calls.""" 2 | 3 | import asyncio 4 | import time 5 | from collections import defaultdict 6 | 7 | 8 | class RateLimiter: 9 | """Simple token bucket rate limiter.""" 10 | 11 | def __init__(self, rate: int = 10, per_seconds: int = 1): 12 | """Initialize rate limiter. 13 | 14 | Args: 15 | rate: Number of allowed requests 16 | per_seconds: Time window in seconds 17 | """ 18 | self.rate = rate 19 | self.per_seconds = per_seconds 20 | self.allowance: dict[str, float] = defaultdict(lambda: float(rate)) 21 | self.last_check: dict[str, float] = defaultdict(float) 22 | self._lock = asyncio.Lock() 23 | 24 | async def check_rate_limit( 25 | self, key: str = "default" 26 | ) -> tuple[bool, float | None]: 27 | """Check if request is allowed under rate limit. 28 | 29 | Args: 30 | key: Identifier for rate limit bucket 31 | 32 | Returns: 33 | Tuple of (allowed, wait_time_if_not_allowed) 34 | """ 35 | async with self._lock: 36 | current = time.time() 37 | time_passed = current - self.last_check[key] 38 | self.last_check[key] = current 39 | 40 | # Replenish tokens 41 | self.allowance[key] += time_passed * (self.rate / self.per_seconds) 42 | 43 | # Cap at maximum rate 44 | if self.allowance[key] > self.rate: 45 | self.allowance[key] = float(self.rate) 46 | 47 | # Check if request allowed 48 | if self.allowance[key] >= 1.0: 49 | self.allowance[key] -= 1.0 50 | return True, None 51 | else: 52 | # Calculate wait time 53 | wait_time = (1.0 - self.allowance[key]) * ( 54 | self.per_seconds / self.rate 55 | ) 56 | return False, wait_time 57 | 58 | async def wait_if_needed(self, key: str = "default") -> None: 59 | """Wait if rate limited before allowing request.""" 60 | allowed, wait_time = await self.check_rate_limit(key) 61 | if not allowed and wait_time: 62 | await asyncio.sleep(wait_time) 63 | 64 | 65 | # Global rate limiter for cBioPortal API 66 | # Conservative: 5 requests per second 67 | cbioportal_limiter = RateLimiter(rate=5, per_seconds=1) 68 | ``` -------------------------------------------------------------------------------- /src/biomcp/articles/autocomplete.py: -------------------------------------------------------------------------------- ```python 1 | """Find entities for a given concept using the PUBTATOR API. 2 | 3 | Example URL: 4 | https://www.ncbi.nlm.nih.gov/research/pubtator3-api/entity/autocomplete/?query=BRAF 5 | """ 6 | 7 | from typing import Literal 8 | 9 | from pydantic import BaseModel, Field, RootModel 10 | 11 | from .. import http_client 12 | from ..constants import PUBTATOR3_BASE_URL 13 | 14 | Concept = Literal["variant", "chemical", "disease", "gene"] 15 | 16 | 17 | class EntityRequest(BaseModel): 18 | concept: Concept | None = None 19 | query: str 20 | limit: int = Field(default=1, ge=1, le=100) 21 | 22 | 23 | class Entity(BaseModel): 24 | entity_id: str = Field( 25 | alias="_id", 26 | examples=["@GENE_BRAF"], 27 | description="Text-based entity following @<biotype>_<n> format.", 28 | ) 29 | concept: Concept = Field( 30 | ..., 31 | alias="biotype", 32 | description="Entity label or concept type.", 33 | ) 34 | name: str = Field( 35 | ..., 36 | description="Preferred term of entity concept.", 37 | examples=[ 38 | "BRAF", 39 | "Adenocarcinoma of Lung", 40 | "Osimertinib", 41 | "EGFR L858R", 42 | ], 43 | ) 44 | match: str | None = Field( 45 | default=None, 46 | description="Reason for the entity match.", 47 | examples=["Multiple matches", "Matched on name <m>NAME</m>"], 48 | ) 49 | 50 | def __eq__(self, other) -> bool: 51 | return self.entity_id == other.entity_id 52 | 53 | 54 | class EntityList(RootModel): 55 | root: list[Entity] 56 | 57 | @property 58 | def first(self) -> Entity | None: 59 | return self.root[0] if self.root else None 60 | 61 | 62 | PUBTATOR3_AUTOCOMPLETE = f"{PUBTATOR3_BASE_URL}/entity/autocomplete/" 63 | 64 | 65 | async def autocomplete(request: EntityRequest) -> Entity | None: 66 | """Given a request of biotype and query, returns the best matching Entity. 67 | If API call fails or returns 0 results, then None is returned. 68 | 69 | Example Request: 70 | { 71 | "concept": "gene", 72 | "query": "BRAF" 73 | } 74 | Response: 75 | { 76 | "entity_id": "@GENE_BRAF", 77 | "biotype": "gene", 78 | "name": "BRAF", 79 | "match": "Matched on name <m>BRAF</m>" 80 | } 81 | """ 82 | response, _ = await http_client.request_api( 83 | url=PUBTATOR3_AUTOCOMPLETE, 84 | request=request, 85 | response_model_type=EntityList, 86 | domain="pubmed", 87 | ) 88 | return response.first if response else None 89 | ``` -------------------------------------------------------------------------------- /docs/reference/visual-architecture.md: -------------------------------------------------------------------------------- ```markdown 1 | # Visual Architecture Guide 2 | 3 | ## System Architecture 4 | 5 | BioMCP follows a clean architecture pattern with three main layers: 6 | 7 | ### 1. User Interface Layer 8 | 9 | - **biomcp CLI**: Command-line interface for direct usage 10 | - **Claude Desktop**: AI assistant integration via MCP 11 | - **Python SDK**: Programmatic access for custom applications 12 | 13 | ### 2. BioMCP Core Layer 14 | 15 | - **MCP Server**: Handles Model Context Protocol communication 16 | - **Cache System**: Smart caching for API responses 17 | - **Router**: Unified query routing across data sources 18 | 19 | ### 3. Data Source Layer 20 | 21 | - **PubMed/PubTator3**: Biomedical literature and annotations 22 | - **ClinicalTrials.gov**: Clinical trial registry 23 | - **MyVariant.info**: Genetic variant database 24 | - **cBioPortal**: Cancer genomics data 25 | - **NCI CTS API**: National Cancer Institute trial data 26 | - **BioThings APIs**: Gene, drug, and disease information 27 | 28 | ## Data Flow 29 | 30 | 1. **Request Processing**: 31 | 32 | - User sends query via CLI, Claude, or SDK 33 | - BioMCP server receives and validates request 34 | - Router determines appropriate data source(s) 35 | 36 | 2. **Caching Strategy**: 37 | 38 | - Check cache for existing results 39 | - If cache miss, fetch from external API 40 | - Store results with appropriate TTL 41 | - Return formatted results to user 42 | 43 | 3. **Response Formatting**: 44 | - Raw API data is normalized 45 | - Domain-specific enrichment applied 46 | - Results formatted for consumption 47 | 48 | ## Architecture References 49 | 50 | - [Detailed Architecture Diagrams](architecture-diagrams.md) 51 | - [Quick Architecture Reference](quick-architecture.md) 52 | 53 | ## Key Architecture Patterns 54 | 55 | ### Domain Separation 56 | 57 | Each data source has its own module with dedicated: 58 | 59 | - Search functions 60 | - Result parsers 61 | - Error handlers 62 | - Cache strategies 63 | 64 | ### Unified Interface 65 | 66 | All domains expose consistent methods: 67 | 68 | - `search()`: Query for multiple results 69 | - `fetch()`: Get detailed record by ID 70 | - Common parameter names across domains 71 | 72 | ### Smart Caching 73 | 74 | - API responses cached 15-30 minutes 75 | - Cache keys include query parameters 76 | - Automatic cache invalidation on errors 77 | - Per-domain cache configuration 78 | 79 | ### Error Resilience 80 | 81 | - Graceful degradation when APIs unavailable 82 | - Specific error messages for troubleshooting 83 | - Automatic retries with exponential backoff 84 | - Fallback to cached data when possible 85 | ``` -------------------------------------------------------------------------------- /docs/faq-condensed.md: -------------------------------------------------------------------------------- ```markdown 1 | # FAQ - Quick Answers 2 | 3 | ## Getting Started 4 | 5 | **Q: What is BioMCP?** 6 | A: A unified interface to biomedical databases (PubMed, ClinicalTrials.gov, MyVariant, etc.) for researchers and AI assistants. 7 | 8 | **Q: Do I need API keys?** 9 | A: No for basic use. Yes for: NCI trials (cancer-specific), AlphaGenome (variant predictions), enhanced cBioPortal features. 10 | 11 | **Q: How do I install it?** 12 | A: `uv tool install biomcp` (recommended) or `pip install biomcp-python` 13 | 14 | ## Common Issues 15 | 16 | **Q: "Command not found" after installation** 17 | A: Restart terminal, or use full path: `~/.local/bin/biomcp` 18 | 19 | **Q: No results for gene search** 20 | A: Use official symbols (ERBB2 not HER2). Check at [genenames.org](https://www.genenames.org) 21 | 22 | **Q: Location search not working** 23 | A: Must provide coordinates: `--latitude 42.3601 --longitude -71.0589` 24 | 25 | **Q: Why does the AI use 'think' first?** 26 | A: Required for systematic analysis. Improves search quality and prevents missed connections. 27 | 28 | ## Search Tips 29 | 30 | **Q: How to search variant notations?** 31 | A: Use OR syntax: `--keyword "V600E|p.V600E|c.1799T>A"` 32 | 33 | **Q: Include/exclude preprints?** 34 | A: Included by default. Use `--no-preprints` to exclude. 35 | 36 | **Q: Search multiple databases?** 37 | A: Use unified search: `search(query="gene:BRAF AND disease:melanoma")` 38 | 39 | ## Data Questions 40 | 41 | **Q: How current is the data?** 42 | A: Daily updates for PubMed/trials, weekly for BioThings, varies for cBioPortal. 43 | 44 | **Q: ClinicalTrials.gov vs NCI?** 45 | A: CT.gov = comprehensive, NCI = cancer-focused with biomarker filters (needs API key). 46 | 47 | **Q: What's MSI/TMB/VAF?** 48 | A: MSI = Microsatellite Instability, TMB = Tumor Mutational Burden, VAF = Variant Allele Frequency 49 | 50 | ## Technical 51 | 52 | **Q: Rate limits?** 53 | A: ~3 req/sec without keys, higher with keys. NCI = 1000/day with key. 54 | 55 | **Q: Cache issues?** 56 | A: Clear with: `rm -rf ~/.biomcp/cache` 57 | 58 | **Q: Which Python version?** 59 | A: 3.10+ required 60 | 61 | ## Quick References 62 | 63 | **Common Gene Aliases:** 64 | 65 | - HER2 → ERBB2 66 | - PD-L1 → CD274 67 | - c-MET → MET 68 | 69 | **City Coordinates:** 70 | 71 | - NYC: 40.7128, -74.0060 72 | - Boston: 42.3601, -71.0589 73 | - LA: 34.0522, -118.2437 74 | 75 | **Trial Status:** 76 | 77 | - RECRUITING = Currently enrolling 78 | - ACTIVE_NOT_RECRUITING = Ongoing 79 | - COMPLETED = Finished 80 | 81 | ## Getting Help 82 | 83 | 1. Check this FAQ 84 | 2. Read [Troubleshooting](troubleshooting.md) 85 | 3. Search [GitHub Issues](https://github.com/genomoncology/biomcp/issues) 86 | 4. Ask with version info: `biomcp --version` 87 | ``` -------------------------------------------------------------------------------- /src/biomcp/variants/filters.py: -------------------------------------------------------------------------------- ```python 1 | """Module for filtering variant data based on paths.""" 2 | 3 | from typing import Any 4 | 5 | 6 | def _get_nested_value(data: dict[str, Any], path: str) -> Any: 7 | """Get a nested value from a dictionary using dot notation path.""" 8 | keys = path.split(".") 9 | current = data 10 | for key in keys[:-1]: 11 | if not isinstance(current, dict) or key not in current: 12 | return None 13 | current = current[key] 14 | return current 15 | 16 | 17 | def _delete_nested_path(data: dict[str, Any], path: str) -> None: 18 | """Delete a nested path from a dictionary using dot notation.""" 19 | keys = path.split(".") 20 | current = data 21 | for key in keys[:-1]: 22 | if not isinstance(current, dict) or key not in current: 23 | return 24 | current = current[key] 25 | 26 | if isinstance(current, dict) and keys[-1] in current: 27 | del current[keys[-1]] 28 | 29 | 30 | def _deep_copy_dict(data: dict[str, Any]) -> dict[str, Any]: 31 | """Create a deep copy of a dictionary, handling nested dicts and lists.""" 32 | result: dict[str, Any] = {} 33 | for key, value in data.items(): 34 | if isinstance(value, dict): 35 | result[key] = _deep_copy_dict(value) 36 | elif isinstance(value, list): 37 | result[key] = [ 38 | _deep_copy_dict(item) if isinstance(item, dict) else item 39 | for item in value 40 | ] 41 | else: 42 | result[key] = value 43 | return result 44 | 45 | 46 | def filter_variants(variants: list[dict[str, Any]]) -> list[dict[str, Any]]: 47 | """ 48 | Filter out specified paths from variant data. 49 | 50 | Args: 51 | variants: List of variant dictionaries from MyVariant.info API 52 | 53 | Returns: 54 | List of variant dictionaries with specified paths removed 55 | """ 56 | # Create a deep copy to avoid modifying the input 57 | filtered_variants = [] 58 | for variant in variants: 59 | # Create a deep copy of the variant 60 | filtered_variant = _deep_copy_dict(variant) 61 | 62 | # Remove specified paths 63 | for path in PATH_FILTERS: 64 | _delete_nested_path(filtered_variant, path) 65 | 66 | filtered_variants.append(filtered_variant) 67 | 68 | return filtered_variants 69 | 70 | 71 | PATH_FILTERS = [ 72 | "civic.contributors", 73 | "civic.molecularProfiles", 74 | "dbsnp.gene.rnas", 75 | "dbnsfp.clinvar", # duplicate of root-level clinvar 76 | "civic.lastAcceptedRevisionEvent", 77 | "civic.lastSubmittedRevisionEvent", 78 | "civic.creationActivity", 79 | ] 80 | ``` -------------------------------------------------------------------------------- /.github/workflows/on-release-main.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: release-main 2 | 3 | on: 4 | release: 5 | types: [published] 6 | branches: [main] 7 | 8 | jobs: 9 | set-version: 10 | runs-on: ubuntu-24.04 11 | steps: 12 | - uses: actions/checkout@v5 13 | 14 | - name: Export tag 15 | id: vars 16 | run: echo tag=${GITHUB_REF#refs/*/} >> $GITHUB_OUTPUT 17 | if: ${{ github.event_name == 'release' }} 18 | 19 | - name: Update project version 20 | run: | 21 | sed -i "s/^version = \".*\"/version = \"$RELEASE_VERSION\"/" pyproject.toml 22 | env: 23 | RELEASE_VERSION: ${{ steps.vars.outputs.tag }} 24 | if: ${{ github.event_name == 'release' }} 25 | 26 | - name: Upload updated pyproject.toml 27 | uses: actions/upload-artifact@v4 28 | with: 29 | name: pyproject-toml 30 | path: pyproject.toml 31 | 32 | test: 33 | runs-on: ubuntu-latest 34 | needs: [set-version] 35 | steps: 36 | - name: Check out 37 | uses: actions/checkout@v5 38 | 39 | - name: Download updated pyproject.toml 40 | uses: actions/download-artifact@v5 41 | with: 42 | name: pyproject-toml 43 | 44 | - name: Set up Python 45 | uses: actions/setup-python@v6 46 | with: 47 | python-version: "3.12" 48 | 49 | - name: Install uv 50 | uses: astral-sh/setup-uv@v6 51 | with: 52 | version: "0.4.29" 53 | 54 | - name: Install dependencies 55 | run: uv sync --group dev 56 | 57 | - name: Run tests 58 | run: uv run python -m pytest tests --cov --cov-config=pyproject.toml --cov-report=xml 59 | 60 | publish: 61 | runs-on: ubuntu-latest 62 | needs: [set-version, test] 63 | permissions: 64 | id-token: write 65 | environment: release 66 | steps: 67 | - name: Check out 68 | uses: actions/checkout@v5 69 | 70 | - name: Set up the environment 71 | uses: ./.github/actions/setup-python-env 72 | 73 | - name: Download updated pyproject.toml 74 | uses: actions/download-artifact@v5 75 | with: 76 | name: pyproject-toml 77 | 78 | - name: Build package 79 | run: uvx --from build pyproject-build --installer uv 80 | 81 | - name: Check package 82 | run: uvx twine check dist/* 83 | 84 | - name: Publish package 85 | uses: pypa/gh-action-pypi-publish@release/v1 86 | with: 87 | verbose: true 88 | 89 | deploy-docs: 90 | needs: publish 91 | runs-on: ubuntu-latest 92 | steps: 93 | - name: Check out 94 | uses: actions/checkout@v5 95 | 96 | - name: Set up the environment 97 | uses: ./.github/actions/setup-python-env 98 | 99 | - name: Deploy documentation 100 | run: uv run mkdocs gh-deploy --force 101 | ``` -------------------------------------------------------------------------------- /tests/data/openfda/drugsfda_detail.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "meta": { 3 | "results": { 4 | "skip": 0, 5 | "limit": 1, 6 | "total": 1 7 | } 8 | }, 9 | "results": [ 10 | { 11 | "application_number": "BLA125514", 12 | "sponsor_name": "MERCK SHARP DOHME", 13 | "openfda": { 14 | "application_number": ["BLA125514"], 15 | "brand_name": ["KEYTRUDA"], 16 | "generic_name": ["PEMBROLIZUMAB"], 17 | "manufacturer_name": ["Merck Sharp & Dohme Corp."], 18 | "substance_name": ["PEMBROLIZUMAB"], 19 | "product_ndc": ["0006-3026-02", "0006-3029-02"], 20 | "spl_set_id": ["c0e2de11-29e0-48a1-92f0-d9cb4dd56b15"], 21 | "unii": ["DPT0O3T46P"] 22 | }, 23 | "products": [ 24 | { 25 | "product_number": "001", 26 | "reference_drug": "Yes", 27 | "brand_name": "KEYTRUDA", 28 | "active_ingredients": [ 29 | { 30 | "name": "PEMBROLIZUMAB", 31 | "strength": "100MG/4ML" 32 | } 33 | ], 34 | "reference_standard": "Yes", 35 | "dosage_form": "INJECTION, SOLUTION", 36 | "route": "INTRAVENOUS", 37 | "marketing_status": "Prescription" 38 | }, 39 | { 40 | "product_number": "002", 41 | "reference_drug": "Yes", 42 | "brand_name": "KEYTRUDA", 43 | "active_ingredients": [ 44 | { 45 | "name": "PEMBROLIZUMAB", 46 | "strength": "50MG/VIAL" 47 | } 48 | ], 49 | "reference_standard": "Yes", 50 | "dosage_form": "INJECTION, POWDER, LYOPHILIZED, FOR SOLUTION", 51 | "route": "INTRAVENOUS", 52 | "marketing_status": "Prescription" 53 | } 54 | ], 55 | "submissions": [ 56 | { 57 | "submission_type": "BLA", 58 | "submission_number": "125514", 59 | "submission_status": "AP", 60 | "submission_status_date": "20140904", 61 | "review_priority": "P", 62 | "submission_class_code": "BLA", 63 | "submission_class_code_description": "Biologic License Application", 64 | "application_docs": [ 65 | { 66 | "id": "52674", 67 | "url": "https://www.accessdata.fda.gov/drugsatfda_docs/label/2014/125514lbl.pdf", 68 | "date": "20140905", 69 | "type": "Label" 70 | } 71 | ] 72 | }, 73 | { 74 | "submission_type": "SUPPL", 75 | "submission_number": "109", 76 | "submission_status": "AP", 77 | "submission_status_date": "20230316", 78 | "submission_class_code": "SUPPL", 79 | "submission_class_code_description": "Supplement" 80 | } 81 | ] 82 | } 83 | ] 84 | } 85 | ``` -------------------------------------------------------------------------------- /src/biomcp/exceptions.py: -------------------------------------------------------------------------------- ```python 1 | """Custom exceptions for BioMCP.""" 2 | 3 | from typing import Any 4 | 5 | 6 | class BioMCPError(Exception): 7 | """Base exception for all BioMCP errors.""" 8 | 9 | def __init__(self, message: str, details: dict[str, Any] | None = None): 10 | super().__init__(message) 11 | self.message = message 12 | self.details = details or {} 13 | 14 | 15 | class BioMCPSearchError(BioMCPError): 16 | """Base exception for search-related errors.""" 17 | 18 | pass 19 | 20 | 21 | class InvalidDomainError(BioMCPSearchError): 22 | """Raised when an invalid domain is specified.""" 23 | 24 | def __init__(self, domain: str, valid_domains: list[str]): 25 | message = f"Unknown domain: {domain}. Valid domains are: {', '.join(valid_domains)}" 26 | super().__init__( 27 | message, {"domain": domain, "valid_domains": valid_domains} 28 | ) 29 | 30 | 31 | class InvalidParameterError(BioMCPSearchError): 32 | """Raised when invalid parameters are provided.""" 33 | 34 | def __init__(self, parameter: str, value: Any, expected: str): 35 | message = f"Invalid value for parameter '{parameter}': {value}. Expected: {expected}" 36 | super().__init__( 37 | message, 38 | {"parameter": parameter, "value": value, "expected": expected}, 39 | ) 40 | 41 | 42 | class SearchExecutionError(BioMCPSearchError): 43 | """Raised when a search fails to execute.""" 44 | 45 | def __init__(self, domain: str, error: Exception): 46 | message = f"Failed to execute search for domain '{domain}': {error!s}" 47 | super().__init__( 48 | message, {"domain": domain, "original_error": str(error)} 49 | ) 50 | 51 | 52 | class ResultParsingError(BioMCPSearchError): 53 | """Raised when results cannot be parsed.""" 54 | 55 | def __init__(self, domain: str, error: Exception): 56 | message = f"Failed to parse results for domain '{domain}': {error!s}" 57 | super().__init__( 58 | message, {"domain": domain, "original_error": str(error)} 59 | ) 60 | 61 | 62 | class QueryParsingError(BioMCPError): 63 | """Raised when a query cannot be parsed.""" 64 | 65 | def __init__(self, query: str, error: Exception): 66 | message = f"Failed to parse query '{query}': {error!s}" 67 | super().__init__( 68 | message, {"query": query, "original_error": str(error)} 69 | ) 70 | 71 | 72 | class ThinkingError(BioMCPError): 73 | """Raised when sequential thinking encounters an error.""" 74 | 75 | def __init__(self, thought_number: int, error: str): 76 | message = f"Error in thought {thought_number}: {error}" 77 | super().__init__( 78 | message, {"thought_number": thought_number, "error": error} 79 | ) 80 | ``` -------------------------------------------------------------------------------- /docs/stylesheets/announcement.css: -------------------------------------------------------------------------------- ```css 1 | /* Announcement Banner Styles */ 2 | .announcement-banner { 3 | background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); 4 | border-radius: 12px; 5 | padding: 2rem; 6 | margin: 2rem 0; 7 | box-shadow: 0 10px 30px rgba(0, 0, 0, 0.15); 8 | position: relative; 9 | overflow: hidden; 10 | } 11 | 12 | .announcement-banner::before { 13 | content: ""; 14 | position: absolute; 15 | top: -50%; 16 | right: -50%; 17 | width: 200%; 18 | height: 200%; 19 | background: radial-gradient( 20 | circle, 21 | rgba(255, 255, 255, 0.1) 0%, 22 | transparent 70% 23 | ); 24 | animation: shimmer 3s infinite; 25 | } 26 | 27 | @keyframes shimmer { 28 | 0% { 29 | transform: rotate(0deg); 30 | } 31 | 100% { 32 | transform: rotate(360deg); 33 | } 34 | } 35 | 36 | .announcement-content { 37 | position: relative; 38 | z-index: 1; 39 | } 40 | 41 | .announcement-banner h2 { 42 | color: white !important; 43 | margin-top: 0 !important; 44 | font-size: 1.8rem; 45 | display: flex; 46 | align-items: center; 47 | gap: 0.5rem; 48 | } 49 | 50 | .announcement-banner .badge-new { 51 | background: #ff6b6b; 52 | color: white; 53 | padding: 0.2rem 0.6rem; 54 | border-radius: 20px; 55 | font-size: 0.8rem; 56 | font-weight: bold; 57 | animation: pulse 2s infinite; 58 | } 59 | 60 | @keyframes pulse { 61 | 0%, 62 | 100% { 63 | transform: scale(1); 64 | } 65 | 50% { 66 | transform: scale(1.05); 67 | } 68 | } 69 | 70 | .announcement-banner p { 71 | color: rgba(255, 255, 255, 0.95) !important; 72 | font-size: 1.1rem; 73 | margin: 1rem 0; 74 | } 75 | 76 | .announcement-banner .announcement-features { 77 | display: grid; 78 | grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); 79 | gap: 1rem; 80 | margin: 1.5rem 0; 81 | } 82 | 83 | .announcement-banner .feature-item { 84 | background: rgba(255, 255, 255, 0.1); 85 | padding: 0.8rem; 86 | border-radius: 8px; 87 | backdrop-filter: blur(10px); 88 | border: 1px solid rgba(255, 255, 255, 0.2); 89 | } 90 | 91 | .announcement-banner .feature-item strong { 92 | color: white; 93 | display: block; 94 | margin-bottom: 0.3rem; 95 | } 96 | 97 | .announcement-banner .feature-item span { 98 | color: rgba(255, 255, 255, 0.85); 99 | font-size: 0.9rem; 100 | } 101 | 102 | .announcement-banner .cta-button { 103 | display: inline-block; 104 | background: white; 105 | color: #667eea !important; 106 | padding: 0.8rem 2rem; 107 | border-radius: 50px; 108 | text-decoration: none !important; 109 | font-weight: bold; 110 | margin-top: 1rem; 111 | transition: all 0.3s ease; 112 | box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2); 113 | } 114 | 115 | .announcement-banner .cta-button:hover { 116 | transform: translateY(-2px); 117 | box-shadow: 0 6px 20px rgba(0, 0, 0, 0.25); 118 | background: #f8f9fa; 119 | } 120 | 121 | .announcement-banner .cta-button::after { 122 | content: " →"; 123 | font-size: 1.2rem; 124 | transition: transform 0.3s ease; 125 | display: inline-block; 126 | } 127 | 128 | .announcement-banner .cta-button:hover::after { 129 | transform: translateX(5px); 130 | } 131 | ``` -------------------------------------------------------------------------------- /tests/integration/test_simple.py: -------------------------------------------------------------------------------- ```python 1 | """Simple test to verify APIs work without Mastermind key.""" 2 | 3 | import asyncio 4 | 5 | from biomcp.articles.preprints import EuropePMCClient 6 | from biomcp.variants.external import ExternalVariantAggregator 7 | 8 | 9 | async def test_preprints(): 10 | """Test that preprint search works.""" 11 | print("Testing Europe PMC preprint search...") 12 | client = EuropePMCClient() 13 | 14 | # Search for a common term 15 | results = await client.search("cancer") 16 | 17 | if results: 18 | print(f"✓ Found {len(results)} preprints") 19 | print(f" First: {results[0].title[:60]}...") 20 | return True 21 | else: 22 | print("✗ No results found") 23 | return False 24 | 25 | 26 | async def test_variants_without_mastermind(): 27 | """Test variant aggregator without Mastermind API key.""" 28 | print("\nTesting variant aggregator without Mastermind key...") 29 | 30 | # Create aggregator 31 | aggregator = ExternalVariantAggregator() 32 | 33 | # Test with a variant - even if individual sources fail, 34 | # the aggregator should handle it gracefully 35 | result = await aggregator.get_enhanced_annotations( 36 | "BRAF V600E", include_tcga=True, include_1000g=True 37 | ) 38 | 39 | print("✓ Aggregator completed without errors") 40 | print(f" Variant ID: {result.variant_id}") 41 | print(f" TCGA data: {'Found' if result.tcga else 'Not found'}") 42 | print( 43 | f" 1000G data: {'Found' if result.thousand_genomes else 'Not found'}" 44 | ) 45 | print( 46 | f" Errors: {result.error_sources if result.error_sources else 'None'}" 47 | ) 48 | 49 | # Key test: aggregator should complete successfully 50 | if True: # Always passes now without Mastermind 51 | print("✓ Mastermind correctly skipped without API key") 52 | return True 53 | else: 54 | print("✗ Mastermind handling incorrect") 55 | return False 56 | 57 | 58 | async def main(): 59 | """Run all tests.""" 60 | print("=" * 60) 61 | print("Testing BioMCP features without external API keys") 62 | print("=" * 60) 63 | 64 | # Test preprints 65 | preprint_ok = await test_preprints() 66 | 67 | # Test variants 68 | variant_ok = await test_variants_without_mastermind() 69 | 70 | print("\n" + "=" * 60) 71 | print("Summary:") 72 | print(f" Preprint search: {'✓ PASS' if preprint_ok else '✗ FAIL'}") 73 | print(f" Variant aggregator: {'✓ PASS' if variant_ok else '✗ FAIL'}") 74 | print("=" * 60) 75 | 76 | if preprint_ok and variant_ok: 77 | print("\n✓ All features work without external API keys!") 78 | return 0 79 | else: 80 | print("\n✗ Some features failed") 81 | return 1 82 | 83 | 84 | if __name__ == "__main__": 85 | exit_code = asyncio.run(main()) 86 | exit(exit_code) 87 | ``` -------------------------------------------------------------------------------- /tests/tdd/variants/test_links.py: -------------------------------------------------------------------------------- ```python 1 | """Tests for the links module.""" 2 | 3 | import json 4 | import os 5 | from typing import Any 6 | 7 | import pytest 8 | 9 | from biomcp.variants.links import inject_links 10 | 11 | 12 | @pytest.fixture 13 | def braf_variants() -> list[dict[str, Any]]: 14 | """Load BRAF V600 test data.""" 15 | test_data_path = os.path.join( 16 | os.path.dirname(__file__), 17 | "../../data/myvariant/variants_part_braf_v600_multiple.json", 18 | ) 19 | with open(test_data_path) as f: 20 | return json.load(f) 21 | 22 | 23 | def test_inject_links_braf_variants(braf_variants): 24 | """Test URL injection for BRAF variants data.""" 25 | result = inject_links(braf_variants) 26 | 27 | # Test first variant (no CIViC) 28 | variant0 = result[0] 29 | assert ( 30 | variant0["dbsnp"]["url"] 31 | == f"https://www.ncbi.nlm.nih.gov/snp/{variant0['dbsnp']['rsid']}" 32 | ) 33 | assert ( 34 | variant0["clinvar"]["url"] 35 | == f"https://www.ncbi.nlm.nih.gov/clinvar/variation/{variant0['clinvar']['variant_id']}/" 36 | ) 37 | assert ( 38 | variant0["cosmic"]["url"] 39 | == f"https://cancer.sanger.ac.uk/cosmic/mutation/overview?id={variant0['cosmic']['cosmic_id']}" 40 | ) 41 | assert "civic" not in variant0 or "url" not in variant0["civic"] 42 | assert ( 43 | variant0["url"]["ensembl"] 44 | == f"https://ensembl.org/Homo_sapiens/Variation/Explore?v={variant0['dbsnp']['rsid']}" 45 | ) 46 | assert variant0["url"]["ucsc_genome_browser"].startswith( 47 | "https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position=chr7:" 48 | ) 49 | assert ( 50 | variant0["url"]["hgnc"] 51 | == "https://www.genenames.org/data/gene-symbol-report/#!/symbol/BRAF" 52 | ) 53 | 54 | # Test second variant (with CIViC) 55 | variant1 = result[1] 56 | assert ( 57 | variant1["civic"]["url"] 58 | == f"https://civicdb.org/variants/{variant1['civic']['id']}/summary" 59 | ) 60 | 61 | # Test empty list 62 | assert inject_links([]) == [] 63 | 64 | # Test insertion (no REF) 65 | insertion = { 66 | "chrom": "7", 67 | "vcf": {"position": "123", "alt": "A"}, 68 | "dbnsfp": {"genename": "GENE1"}, 69 | } 70 | result = inject_links([insertion])[0] 71 | assert ( 72 | result["url"]["ucsc_genome_browser"] 73 | == "https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position=chr7:123-124" 74 | ) 75 | 76 | # Test deletion (no ALT) 77 | deletion = { 78 | "chrom": "7", 79 | "vcf": {"position": "123", "ref": "AAA"}, 80 | "dbnsfp": {"genename": "GENE1"}, 81 | } 82 | result = inject_links([deletion])[0] 83 | assert ( 84 | result["url"]["ucsc_genome_browser"] 85 | == "https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position=chr7:123-126" 86 | ) 87 | ``` -------------------------------------------------------------------------------- /docs/genomoncology.md: -------------------------------------------------------------------------------- ```markdown 1 | # **GenomOncology: Powering the Future of Precision Medicine** 2 | 3 | ## **Who We Are** 4 | 5 | GenomOncology is a leading healthcare technology company dedicated to transforming precision medicine through innovative genomic analysis solutions. We connect complex genomic data and actionable clinical insights, enabling healthcare providers to deliver personalized treatment strategies for cancer patients. 6 | 7 | ## **Our Commitment to Open Healthcare** 8 | 9 | We believe in the power of open source systems to further the impact of precision medicine. Through the BioMCP initiative we hope to engage the healthcare community in this open-access ecosystem designed to accelerate innovation in precision medicine. By evolving this open framework, we're moving to create a more collaborative, efficient, and transparent healthcare environment. 10 | 11 | ## **Our Precision Oncology Platform** 12 | 13 | Today, our proprietary knowledge management system, known as the Precision Oncology Platform (POP), serves as the backbone of our solutions, continuously aggregating and curating the latest genomic research, clinical trials, and treatment guidelines. This system: 14 | 15 | - Processes and harmonizes data from 40+ scientific and clinical sources 16 | - Updates weekly to incorporate the newest research findings 17 | - Utilizes advanced NLP to extract meaningful insights from unstructured text 18 | - Maintains a comprehensive database of 25,000+ variant-drug associations 19 | 20 | ## **Real-World Impact** 21 | 22 | Our technology currently powers precision medicine programs at: 23 | 24 | - 120+ hospitals and cancer centers 25 | - 15 academic medical centers 26 | - 8 commercial reference laboratories 27 | - 10+ pharmaceutical research programs 28 | 29 | Processing over 100,000 genomic profiles monthly, our solutions have helped match thousands of patients to targeted therapies and clinical trials, significantly improving outcomes. 30 | 31 | ## **Join Us In The Next Phase of Transforming Healthcare** 32 | 33 | By contributing to the BioMCP ecosystem, we're inviting developers to collaborate with us in creating the next generation of precision medicine tools. Whether you're looking to build applications that leverage genomic data, create integrations with existing healthcare systems, or explore novel approaches to biomarker analysis, GenomOncology provides the technological foundation to bring your ideas to life. 34 | 35 | ## **Get Started** 36 | 37 | Ready to explore what's possible with GenomOncology and BioMCP? 38 | 39 | - Clone our repositories on GitHub 40 | - Register for API access 41 | - Join our developer community 42 | 43 | Together, we can accelerate precision medicine through open collaboration and innovation. 44 | 45 | --- 46 | 47 | _GenomOncology: Transforming data into treatment decisions_ 48 | ``` -------------------------------------------------------------------------------- /src/biomcp/cli/biomarkers.py: -------------------------------------------------------------------------------- ```python 1 | """CLI commands for biomarker search.""" 2 | 3 | import asyncio 4 | from typing import Annotated 5 | 6 | import typer 7 | 8 | from ..biomarkers import search_biomarkers 9 | from ..biomarkers.search import format_biomarker_results 10 | from ..integrations.cts_api import CTSAPIError, get_api_key_instructions 11 | 12 | biomarker_app = typer.Typer( 13 | no_args_is_help=True, 14 | help="Search biomarkers used in clinical trial eligibility criteria", 15 | ) 16 | 17 | 18 | @biomarker_app.command("search") 19 | def search_biomarkers_cli( 20 | name: Annotated[ 21 | str | None, 22 | typer.Argument( 23 | help="Biomarker name to search for (e.g., 'PD-L1', 'EGFR mutation')" 24 | ), 25 | ] = None, 26 | biomarker_type: Annotated[ 27 | str | None, 28 | typer.Option( 29 | "--type", 30 | help="Type of biomarker ('reference_gene' or 'branch')", 31 | ), 32 | ] = None, 33 | page_size: Annotated[ 34 | int, 35 | typer.Option( 36 | "--page-size", 37 | help="Number of results per page", 38 | min=1, 39 | max=100, 40 | ), 41 | ] = 20, 42 | page: Annotated[ 43 | int, 44 | typer.Option( 45 | "--page", 46 | help="Page number", 47 | min=1, 48 | ), 49 | ] = 1, 50 | api_key: Annotated[ 51 | str | None, 52 | typer.Option( 53 | "--api-key", 54 | help="NCI API key (overrides NCI_API_KEY env var)", 55 | envvar="NCI_API_KEY", 56 | ), 57 | ] = None, 58 | ) -> None: 59 | """ 60 | Search for biomarkers used in clinical trial eligibility criteria. 61 | 62 | Note: Biomarker data availability may be limited in CTRP. Results focus on 63 | biomarkers referenced in trial eligibility criteria. For detailed variant 64 | annotations, use 'biomcp variant search' with MyVariant.info. 65 | 66 | Examples: 67 | # Search by biomarker name 68 | biomcp biomarker search "PD-L1" 69 | 70 | # Search by type 71 | biomcp biomarker search --type reference_gene 72 | 73 | # Search for specific biomarker 74 | biomcp biomarker search "EGFR mutation" 75 | """ 76 | try: 77 | results = asyncio.run( 78 | search_biomarkers( 79 | name=name, 80 | biomarker_type=biomarker_type, 81 | page_size=page_size, 82 | page=page, 83 | api_key=api_key, 84 | ) 85 | ) 86 | 87 | output = format_biomarker_results(results) 88 | typer.echo(output) 89 | 90 | except CTSAPIError as e: 91 | if "API key required" in str(e): 92 | typer.echo(get_api_key_instructions()) 93 | else: 94 | typer.echo(f"Error: {e}", err=True) 95 | raise typer.Exit(1) from e 96 | except Exception as e: 97 | typer.echo(f"Unexpected error: {e}", err=True) 98 | raise typer.Exit(1) from e 99 | ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/constants.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Constants for OpenFDA API integration. 3 | """ 4 | 5 | # OpenFDA API Base 6 | OPENFDA_BASE_URL = "https://api.fda.gov" 7 | 8 | # Drug endpoints 9 | OPENFDA_DRUG_EVENTS_URL = f"{OPENFDA_BASE_URL}/drug/event.json" 10 | OPENFDA_DRUG_LABELS_URL = f"{OPENFDA_BASE_URL}/drug/label.json" 11 | OPENFDA_DRUG_ENFORCEMENT_URL = f"{OPENFDA_BASE_URL}/drug/enforcement.json" 12 | OPENFDA_DRUGSFDA_URL = f"{OPENFDA_BASE_URL}/drug/drugsfda.json" 13 | 14 | # Device endpoints 15 | OPENFDA_DEVICE_EVENTS_URL = f"{OPENFDA_BASE_URL}/device/event.json" 16 | OPENFDA_DEVICE_CLASSIFICATION_URL = ( 17 | f"{OPENFDA_BASE_URL}/device/classification.json" 18 | ) 19 | OPENFDA_DEVICE_RECALL_URL = f"{OPENFDA_BASE_URL}/device/recall.json" 20 | 21 | # API limits 22 | OPENFDA_DEFAULT_LIMIT = 25 23 | OPENFDA_MAX_LIMIT = 100 24 | OPENFDA_RATE_LIMIT_NO_KEY = 40 # requests per minute without key 25 | OPENFDA_RATE_LIMIT_WITH_KEY = 240 # requests per minute with key 26 | 27 | # Genomic device filters - product codes for genomic/diagnostic devices 28 | GENOMIC_DEVICE_PRODUCT_CODES = [ 29 | "OOI", # Next Generation Sequencing Oncology Panel Test System 30 | "PQP", # Nucleic Acid Based In Vitro Diagnostic Devices 31 | "OYD", # Gene Mutation Detection System 32 | "NYE", # DNA Sequencer 33 | "OEO", # Hereditary or Somatic Variant Detection System 34 | "QIN", # Tumor Profiling Test 35 | "QDI", # Companion Diagnostic 36 | "PTA", # Cancer Predisposition Risk Assessment System 37 | ] 38 | 39 | # Common adverse event search fields 40 | ADVERSE_EVENT_FIELDS = [ 41 | "patient.drug.medicinalproduct", 42 | "patient.drug.openfda.brand_name", 43 | "patient.drug.openfda.generic_name", 44 | "patient.drug.drugindication", 45 | "patient.reaction.reactionmeddrapt", 46 | ] 47 | 48 | # Label search fields 49 | LABEL_FIELDS = [ 50 | "openfda.brand_name", 51 | "openfda.generic_name", 52 | "indications_and_usage", 53 | "boxed_warning", 54 | "warnings_and_precautions", 55 | "adverse_reactions", 56 | "drug_interactions", 57 | ] 58 | 59 | # Device event search fields 60 | DEVICE_FIELDS = [ 61 | "device.brand_name", 62 | "device.generic_name", 63 | "device.manufacturer_d_name", 64 | "device.openfda.device_name", 65 | "device.openfda.medical_specialty_description", 66 | ] 67 | 68 | # Disclaimer text 69 | OPENFDA_DISCLAIMER = ( 70 | "⚠️ **FDA Data Notice**: Information from openFDA API. " 71 | "Not for clinical decision-making. Adverse events don't prove causation. " 72 | "Data may be incomplete or delayed. Consult healthcare professionals and " 73 | "official FDA sources at fda.gov for medical decisions." 74 | ) 75 | 76 | OPENFDA_SHORTAGE_DISCLAIMER = ( 77 | "🚨 **Critical Warning**: Drug shortage information is time-sensitive. " 78 | "Always verify current availability with FDA Drug Shortages Database at " 79 | "https://www.accessdata.fda.gov/scripts/drugshortages/ before making " 80 | "supply chain or treatment decisions." 81 | ) 82 | ``` -------------------------------------------------------------------------------- /tests/tdd/utils/test_rate_limiter.py: -------------------------------------------------------------------------------- ```python 1 | """Tests for rate limiting utilities.""" 2 | 3 | import asyncio 4 | import time 5 | 6 | import pytest 7 | 8 | from biomcp.utils.rate_limiter import RateLimiter 9 | 10 | 11 | class TestRateLimiter: 12 | """Test rate limiting functionality.""" 13 | 14 | @pytest.mark.asyncio 15 | async def test_basic_rate_limiting(self): 16 | """Test basic rate limiting behavior.""" 17 | # Create limiter with 2 requests per second 18 | limiter = RateLimiter(rate=2, per_seconds=1) 19 | 20 | # First two requests should be allowed 21 | allowed1, wait1 = await limiter.check_rate_limit() 22 | assert allowed1 is True 23 | assert wait1 is None 24 | 25 | allowed2, wait2 = await limiter.check_rate_limit() 26 | assert allowed2 is True 27 | assert wait2 is None 28 | 29 | # Third request should be denied with wait time 30 | allowed3, wait3 = await limiter.check_rate_limit() 31 | assert allowed3 is False 32 | assert wait3 is not None 33 | assert wait3 > 0 34 | 35 | @pytest.mark.asyncio 36 | async def test_rate_limit_replenishment(self): 37 | """Test that tokens replenish over time.""" 38 | # Create limiter with 1 request per second 39 | limiter = RateLimiter(rate=1, per_seconds=1) 40 | 41 | # Use the token 42 | allowed1, _ = await limiter.check_rate_limit() 43 | assert allowed1 is True 44 | 45 | # Should be denied immediately 46 | allowed2, wait2 = await limiter.check_rate_limit() 47 | assert allowed2 is False 48 | 49 | # Wait for replenishment 50 | await asyncio.sleep(1.1) 51 | 52 | # Should be allowed now 53 | allowed3, _ = await limiter.check_rate_limit() 54 | assert allowed3 is True 55 | 56 | @pytest.mark.asyncio 57 | async def test_multiple_keys(self): 58 | """Test rate limiting with different keys.""" 59 | limiter = RateLimiter(rate=1, per_seconds=1) 60 | 61 | # Use token for key1 62 | allowed1, _ = await limiter.check_rate_limit("key1") 63 | assert allowed1 is True 64 | 65 | # key2 should still have tokens 66 | allowed2, _ = await limiter.check_rate_limit("key2") 67 | assert allowed2 is True 68 | 69 | # key1 should be limited 70 | allowed3, wait3 = await limiter.check_rate_limit("key1") 71 | assert allowed3 is False 72 | assert wait3 is not None 73 | 74 | @pytest.mark.asyncio 75 | async def test_wait_if_needed(self): 76 | """Test the wait_if_needed helper.""" 77 | limiter = RateLimiter(rate=1, per_seconds=1) 78 | 79 | # First call should not wait 80 | start = time.time() 81 | await limiter.wait_if_needed() 82 | elapsed = time.time() - start 83 | assert elapsed < 0.1 84 | 85 | # Second call should wait 86 | start = time.time() 87 | await limiter.wait_if_needed() 88 | elapsed = time.time() - start 89 | assert elapsed >= 0.9 # Should wait approximately 1 second 90 | ``` -------------------------------------------------------------------------------- /src/biomcp/utils/metrics.py: -------------------------------------------------------------------------------- ```python 1 | """Metrics and monitoring utilities.""" 2 | 3 | import asyncio 4 | import logging 5 | import time 6 | from collections.abc import Callable 7 | from functools import wraps 8 | from typing import Any, TypeVar, cast 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | T = TypeVar("T") 13 | 14 | 15 | def track_api_call(api_name: str): 16 | """Track API call metrics. 17 | 18 | Args: 19 | api_name: Name of the API being called 20 | 21 | Returns: 22 | Decorator function 23 | """ 24 | 25 | def decorator(func: Callable[..., T]) -> Callable[..., T]: 26 | @wraps(func) 27 | async def async_wrapper(*args: Any, **kwargs: Any) -> T: 28 | start_time = time.time() 29 | try: 30 | result = await func(*args, **kwargs) # type: ignore[misc] 31 | duration = time.time() - start_time 32 | logger.info( 33 | f"{api_name} call succeeded", 34 | extra={ 35 | "api": api_name, 36 | "duration": duration, 37 | "status": "success", 38 | }, 39 | ) 40 | return result 41 | except Exception as e: 42 | duration = time.time() - start_time 43 | logger.error( 44 | f"{api_name} call failed: {e}", 45 | extra={ 46 | "api": api_name, 47 | "duration": duration, 48 | "status": "error", 49 | "error_type": type(e).__name__, 50 | }, 51 | ) 52 | raise 53 | 54 | @wraps(func) 55 | def sync_wrapper(*args: Any, **kwargs: Any) -> T: 56 | start_time = time.time() 57 | try: 58 | result = func(*args, **kwargs) 59 | duration = time.time() - start_time 60 | logger.info( 61 | f"{api_name} call succeeded", 62 | extra={ 63 | "api": api_name, 64 | "duration": duration, 65 | "status": "success", 66 | }, 67 | ) 68 | return result 69 | except Exception as e: 70 | duration = time.time() - start_time 71 | logger.error( 72 | f"{api_name} call failed: {e}", 73 | extra={ 74 | "api": api_name, 75 | "duration": duration, 76 | "status": "error", 77 | "error_type": type(e).__name__, 78 | }, 79 | ) 80 | raise 81 | 82 | # Return appropriate wrapper based on function type 83 | if asyncio.iscoroutinefunction(func): 84 | return cast(Callable[..., T], async_wrapper) 85 | else: 86 | return cast(Callable[..., T], sync_wrapper) 87 | 88 | return decorator 89 | ``` -------------------------------------------------------------------------------- /tests/tdd/trials/test_getter.py: -------------------------------------------------------------------------------- ```python 1 | from biomcp.trials.getter import Module, get_trial, modules 2 | 3 | 4 | async def test_get_protocol(anyio_backend): 5 | markdown = await get_trial("NCT04280705", Module.PROTOCOL) 6 | assert markdown.startswith("Url: https://clinicaltrials.gov/study/") 7 | assert len(markdown) > 10000 # 10370 on 2025-03-23 8 | 9 | 10 | async def test_get_locations(anyio_backend): 11 | markdown = await get_trial("NCT04280705", Module.LOCATIONS) 12 | starts_with = """Url: https://clinicaltrials.gov/study/NCT04280705 13 | 14 | # Protocol Section 15 | """ 16 | assert markdown.startswith(starts_with) 17 | assert "University of California San Francisco" in markdown 18 | assert len(markdown) > 12000 # 12295 on 2025-03-23 19 | 20 | 21 | async def test_get_references(anyio_backend): 22 | markdown = await get_trial("NCT04280705", Module.REFERENCES) 23 | assert "# Protocol Section" in markdown 24 | assert "## References Module" in markdown 25 | assert len(markdown) > 0 26 | 27 | 28 | async def test_get_outcomes(anyio_backend): 29 | markdown = await get_trial("NCT04280705", Module.OUTCOMES) 30 | assert "# Protocol Section" in markdown 31 | assert ( 32 | "## Outcomes Module" in markdown or "## Results Sections" in markdown 33 | ) 34 | assert len(markdown) > 0 35 | 36 | 37 | async def test_invalid_nct_id(anyio_backend): 38 | markdown = await get_trial("NCT99999999") 39 | assert "NCT number NCT99999999 not found" in markdown 40 | 41 | 42 | def test_all_modules_exist(): 43 | # Verify all modules are defined 44 | assert "Protocol" in modules 45 | assert "Locations" in modules 46 | assert "References" in modules 47 | assert "Outcomes" in modules 48 | 49 | # Verify protocol module contains critical sections 50 | protocol_sections = modules[Module.PROTOCOL] 51 | assert "IdentificationModule" in protocol_sections 52 | assert "StatusModule" in protocol_sections 53 | assert "DescriptionModule" in protocol_sections 54 | 55 | 56 | async def test_cli_default_module_functionality(anyio_backend): 57 | # Test directly with both explicit Protocol and None (which should use Protocol) 58 | markdown_with_protocol = await get_trial("NCT04280705", Module.PROTOCOL) 59 | assert len(markdown_with_protocol) > 10000 60 | 61 | # In a real CLI context, the default would be set at the CLI level 62 | # This test ensures the Protocol module is valid for that purpose 63 | assert "Protocol Section" in markdown_with_protocol 64 | 65 | 66 | async def test_json_output(anyio_backend): 67 | # Test JSON output format 68 | json_output = await get_trial( 69 | "NCT04280705", Module.PROTOCOL, output_json=True 70 | ) 71 | assert json_output.startswith("{") 72 | assert "URL" in json_output 73 | assert "NCT04280705" in json_output 74 | 75 | 76 | async def test_error_handling_json_output(anyio_backend): 77 | # Test error handling with JSON output 78 | json_output = await get_trial( 79 | "NCT99999999", Module.PROTOCOL, output_json=True 80 | ) 81 | assert "error" in json_output 82 | assert "NCT99999999" in json_output 83 | ``` -------------------------------------------------------------------------------- /wrangler.toml: -------------------------------------------------------------------------------- ```toml 1 | name = "biomcp-worker" 2 | main = "src/biomcp/workers/worker_entry_stytch.js" 3 | compatibility_date = "2025-04-28" 4 | 5 | [vars] 6 | # Environment variables for the worker 7 | # These can be overridden in several ways: 8 | # 1. In the Cloudflare dashboard under Workers & Pages > your-worker > Settings > Variables 9 | # 2. Using wrangler CLI: wrangler secret put REMOTE_MCP_SERVER_URL 10 | # 3. During local development: wrangler dev --var REMOTE_MCP_SERVER_URL="http://localhost:8000" 11 | # 4. In your CI/CD pipeline using environment variables with the format CF_REMOTE_MCP_SERVER_URL 12 | REMOTE_MCP_SERVER_URL = "http://localhost:8000" # Replace with your MCP server URL in production 13 | 14 | # Stytch OAuth Configuration 15 | # Replace these placeholder values with your actual Stytch credentials 16 | # For development, use test credentials from https://stytch.com/dashboard 17 | # For production, use production credentials and api.stytch.com instead of test.stytch.com 18 | STYTCH_PROJECT_ID = "project-test-xxxxxxxxxxxx" # Replace with your Stytch Project ID 19 | STYTCH_SECRET = "secret-test-xxxxxxxxxxxx" # Replace with your Stytch Secret (use wrangler secret for production) 20 | STYTCH_PUBLIC_TOKEN = "public-token-test-xxxxxxxxxxxx" # Replace with your Stytch Public Token 21 | STYTCH_API_URL = "https://test.stytch.com/v1" # Use https://api.stytch.com/v1 for production 22 | STYTCH_OAUTH_URL = "https://test.stytch.com/v1/public/oauth/google/start" # Update for production 23 | 24 | # Debug mode - set to true for development, false for production 25 | DEBUG = false 26 | 27 | # JWT Secret for signing tokens - use a strong, unique secret in production 28 | # For production, set this as a secret: wrangler secret put JWT_SECRET 29 | JWT_SECRET = "replace-with-a-strong-secret-key" 30 | 31 | # BigQuery variables 32 | # For production, set these as secrets or environment variables: 33 | # wrangler secret put BQ_PROJECT_ID 34 | # wrangler secret put BQ_DATASET 35 | # wrangler secret put BQ_SA_KEY_JSON 36 | BQ_PROJECT_ID = "your-project-id" # Replace with your actual project ID in production 37 | BQ_DATASET = "your_dataset_name" # Replace with your actual dataset in production 38 | BQ_TABLE="worker_logs" 39 | 40 | # Sensitive variables should be stored in the Cloudflare dashboard under Workers & Pages > your-worker > Settings > Secrets 41 | # OR you can declare them using npx wrangler secret put BQ_SA_KEY_JSON 42 | # Potential secrets: 43 | # BQ_SA_KEY_JSON 44 | # STYTCH_SECRET 45 | 46 | 47 | # Note: The ability to allow plaintext connections is now configured in the Cloudflare dashboard 48 | # under Security settings for your Worker 49 | 50 | [build] 51 | command = "" 52 | 53 | [triggers] 54 | crons = [] 55 | 56 | [observability.logs] 57 | enabled = true 58 | 59 | # KV namespace for storing OAuth tokens and state 60 | # Create your KV namespace with: wrangler kv:namespace create OAUTH_KV 61 | # Then replace the ID below with your namespace ID 62 | [[kv_namespaces]] 63 | binding = "OAUTH_KV" 64 | id = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" # Replace with your KV namespace ID 65 | ``` -------------------------------------------------------------------------------- /docs/concepts/01-what-is-biomcp.md: -------------------------------------------------------------------------------- ```markdown 1 | # What is BioMCP? 2 | 3 | BioMCP is an open-source implementation of the Model Context Protocol (MCP) designed for biomedical research. It connects AI assistants to specialized biomedical databases, enabling natural language access to complex scientific data. 4 | 5 | [](https://www.youtube.com/watch?v=bKxOWrWUUhM) 6 | 7 | ## The Bridge to Biomedical Data 8 | 9 | BioMCP provides AI assistants with direct access to specialized biomedical databases that aren't available through general web search. Built on Anthropic's Model Context Protocol standard, it creates a toolbox that enables natural language queries across multiple scientific data sources. 10 | 11 | ## Connected Data Sources 12 | 13 | - **PubMed/PubTator3**: 30M+ research articles with entity recognition for genes, diseases, drugs, and variants 14 | - **ClinicalTrials.gov**: 400K+ clinical trials searchable by condition, location, phase, and eligibility 15 | - **MyVariant.info**: Comprehensive variant annotations with clinical significance 16 | - **cBioPortal**: Cancer genomics data automatically integrated with searches 17 | - **BioThings APIs**: Real-time gene, drug, and disease information 18 | - **NCI CTS API**: Enhanced cancer trial search with biomarker filtering 19 | - **AlphaGenome**: Variant effect predictions using Google DeepMind's AI 20 | 21 | ## How Does It Transform Research? 22 | 23 | What makes BioMCP particularly powerful is its conversational nature. A 24 | researcher might begin with a simple question about a disease, then naturally 25 | progress to exploring related clinical trials, and finally investigate genetic 26 | variants that affect treatment efficacy—all within a single, flowing 27 | conversation. 28 | 29 | The system remembers context throughout the interaction, allowing for natural 30 | follow-up questions and a research experience that mirrors how scientists 31 | actually work. Instead of requiring researchers to master complex query 32 | languages for each database, BioMCP translates natural language into the 33 | precise syntax each system requires. 34 | 35 | ## Why This Matters 36 | 37 | BioMCP represents a significant advancement in making specialized biomedical 38 | knowledge accessible. For researchers and clinicians, it means spending less 39 | time wrestling with complex database interfaces and more time advancing their 40 | work. For the broader field of AI in healthcare, it demonstrates how 41 | specialized knowledge domains can be made accessible through conversation. 42 | 43 | As both AI assistants (synchronous conversation partners) and AI agents ( 44 | autonomous systems working toward goals over time) continue to evolve, tools 45 | like BioMCP will be essential in connecting these systems to the specialized 46 | knowledge they need to deliver meaningful insights in complex domains. 47 | 48 | By open-sourcing BioMCP, we're inviting the community to build upon this 49 | foundation, creating more powerful and accessible tools for biomedical research 50 | and ultimately accelerating the pace of scientific discovery. 51 | ``` -------------------------------------------------------------------------------- /tests/tdd/variants/test_alphagenome.py: -------------------------------------------------------------------------------- ```python 1 | """Tests for AlphaGenome integration.""" 2 | 3 | from unittest.mock import patch 4 | 5 | import pytest 6 | 7 | from biomcp.variants.alphagenome import predict_variant_effects 8 | 9 | 10 | @pytest.mark.asyncio 11 | async def test_predict_variant_effects_no_api_key(): 12 | """Test that missing API key returns helpful error message.""" 13 | with patch.dict("os.environ", {}, clear=True): 14 | result = await predict_variant_effects( 15 | chromosome="chr7", 16 | position=140753336, 17 | reference="A", 18 | alternate="T", 19 | ) 20 | 21 | assert "AlphaGenome API key required" in result 22 | assert "https://deepmind.google.com/science/alphagenome" in result 23 | assert "ALPHAGENOME_API_KEY" in result 24 | 25 | 26 | @pytest.mark.asyncio 27 | async def test_predict_variant_effects_not_installed(): 28 | """Test that missing AlphaGenome package returns installation instructions or API error.""" 29 | # Since AlphaGenome might be installed in test environments, we need to test both cases 30 | # We'll set a dummy API key and check what error we get 31 | import os 32 | 33 | original_key = os.environ.get("ALPHAGENOME_API_KEY") 34 | try: 35 | os.environ["ALPHAGENOME_API_KEY"] = "test-key" 36 | 37 | result = await predict_variant_effects( 38 | chromosome="chr7", 39 | position=140753336, 40 | reference="A", 41 | alternate="T", 42 | skip_cache=True, # Skip cache to ensure fresh results 43 | ) 44 | 45 | # The function should either: 46 | # 1. Handle ImportError if AlphaGenome is not installed 47 | # 2. Return API error if AlphaGenome is installed but API key is invalid 48 | # 3. Return a prediction failure for other errors 49 | assert any([ 50 | "AlphaGenome not installed" in result, 51 | "AlphaGenome prediction failed" in result, 52 | "API key not valid" 53 | in result, # This can happen with invalid test keys 54 | ]) 55 | 56 | if "AlphaGenome not installed" in result: 57 | assert "git clone" in result 58 | assert "pip install" in result 59 | finally: 60 | # Restore original key 61 | if original_key is None: 62 | os.environ.pop("ALPHAGENOME_API_KEY", None) 63 | else: 64 | os.environ["ALPHAGENOME_API_KEY"] = original_key 65 | 66 | 67 | @pytest.mark.asyncio 68 | async def test_predict_variant_effects_basic_parameters(): 69 | """Test that function accepts the expected parameters.""" 70 | # This tests the function interface without requiring AlphaGenome 71 | with patch.dict("os.environ", {}, clear=True): 72 | # Test with all parameters 73 | result = await predict_variant_effects( 74 | chromosome="chrX", 75 | position=12345, 76 | reference="G", 77 | alternate="C", 78 | interval_size=500_000, 79 | tissue_types=["UBERON:0002367", "UBERON:0001157"], 80 | ) 81 | 82 | # Should get API key error (not import error), proving parameters were accepted 83 | assert "AlphaGenome API key required" in result 84 | ``` -------------------------------------------------------------------------------- /example_scripts/mcp_integration.py: -------------------------------------------------------------------------------- ```python 1 | #!/usr/bin/env -S uv --quiet run --script 2 | # /// script 3 | # requires-python = ">=3.11" 4 | # dependencies = [ 5 | # "mcp", 6 | # ] 7 | # /// 8 | 9 | # Scripts to reproduce this page: 10 | # https://biomcp.org/mcp_integration/ 11 | 12 | import asyncio 13 | 14 | from mcp.client.session import ClientSession 15 | from mcp.client.stdio import StdioServerParameters, stdio_client 16 | from mcp.types import TextContent 17 | 18 | 19 | async def check_server(): 20 | # Run with pypi package using `uv` not `uvx` 21 | server_params = StdioServerParameters( 22 | command="uv", 23 | args=["run", "--with", "biomcp-python", "biomcp", "run"], 24 | ) 25 | # 26 | # Run with local code 27 | # server_params = StdioServerParameters( 28 | # command="python", 29 | # args=["-m", "biomcp", "run"], 30 | # ) 31 | 32 | async with ( 33 | stdio_client(server_params) as (read, write), 34 | ClientSession(read, write) as session, 35 | ): 36 | await session.initialize() 37 | 38 | # list prompts 39 | prompts = await session.list_prompts() 40 | print("Available prompts:", prompts) 41 | 42 | # list resources 43 | resources = await session.list_resources() 44 | print("Available resources:", resources) 45 | 46 | # list tools 47 | tool_result = await session.list_tools() 48 | tools = tool_result.tools 49 | print("Available tools:", tools) 50 | assert len(tools) == 13 # 3 core tools + 10 individual tools 51 | 52 | # IMPORTANT: Always use think tool first! 53 | think_result = await session.call_tool( 54 | "think", 55 | { 56 | "thought": "Planning to analyze variant rs113488022 for BRAF gene...", 57 | "thoughtNumber": 1, 58 | "totalThoughts": 2, 59 | "nextThoughtNeeded": True, 60 | }, 61 | ) 62 | assert ( 63 | think_result.isError is False 64 | ), f"Think error: {think_result.content}" 65 | 66 | # Now fetch variant details using unified fetch tool 67 | tool_name = "fetch" 68 | tool_args = {"domain": "variant", "id_": "rs113488022"} 69 | result = await session.call_tool(tool_name, tool_args) 70 | assert result.isError is False, f"Error: {result.content}" 71 | 72 | # --- Assertions --- 73 | # 1. Check the call was successful (not an error) 74 | assert ( 75 | result.isError is False 76 | ), f"Tool call resulted in error: {result.content}" 77 | 78 | # 2. Check there is content 79 | assert result.content is not None 80 | assert len(result.content) >= 1 81 | 82 | # 3. Check the type of the first content block 83 | content_block = result.content[0] 84 | assert isinstance(content_block, TextContent) 85 | 86 | markdown_output = content_block.text 87 | # print(markdown_output) 88 | assert isinstance(markdown_output, str) 89 | assert "rs113488022" in markdown_output 90 | assert "BRAF" in markdown_output 91 | assert "Pathogenic" in markdown_output 92 | print(f"Successfully called tool '{tool_name}' with args {tool_args}") 93 | 94 | 95 | if __name__ == "__main__": 96 | asyncio.run(check_server()) 97 | ``` -------------------------------------------------------------------------------- /src/biomcp/variants/cancer_types.py: -------------------------------------------------------------------------------- ```python 1 | """Cancer type configuration for gene-specific studies.""" 2 | 3 | # Gene to cancer type keyword mapping 4 | # These keywords are used to filter relevant studies from cBioPortal 5 | GENE_CANCER_KEYWORDS = { 6 | "BRAF": [ 7 | "skcm", # melanoma 8 | "thca", # thyroid 9 | "coad", # colorectal 10 | "lung", 11 | "glioma", # brain 12 | "hairy_cell", # hairy cell leukemia 13 | ], 14 | "KRAS": [ 15 | "coad", # colorectal 16 | "paad", # pancreatic 17 | "lung", 18 | "stad", # stomach 19 | "coadread", # colorectal adenocarcinoma 20 | "ampca", # ampullary carcinoma 21 | ], 22 | "TP53": [ 23 | "brca", # breast 24 | "ov", # ovarian 25 | "lung", 26 | "hnsc", # head/neck 27 | "lgg", # lower grade glioma 28 | "gbm", # glioblastoma 29 | "blca", # bladder 30 | "lihc", # liver 31 | ], 32 | "EGFR": [ 33 | "lung", 34 | "nsclc", # non-small cell lung cancer 35 | "gbm", # glioblastoma 36 | "hnsc", # head/neck 37 | ], 38 | "PIK3CA": [ 39 | "brca", # breast 40 | "hnsc", # head/neck 41 | "coad", # colorectal 42 | "ucec", # endometrial 43 | ], 44 | "PTEN": [ 45 | "prad", # prostate 46 | "gbm", # glioblastoma 47 | "ucec", # endometrial 48 | "brca", # breast 49 | ], 50 | "APC": [ 51 | "coad", # colorectal 52 | "coadread", 53 | "stad", # stomach 54 | ], 55 | "VHL": [ 56 | "rcc", # renal cell carcinoma 57 | "ccrcc", # clear cell RCC 58 | "kirc", # kidney clear cell 59 | ], 60 | "RB1": [ 61 | "rbl", # retinoblastoma 62 | "sclc", # small cell lung cancer 63 | "blca", # bladder 64 | ], 65 | "BRCA1": [ 66 | "brca", # breast 67 | "ov", # ovarian 68 | "prad", # prostate 69 | "paad", # pancreatic 70 | ], 71 | "BRCA2": [ 72 | "brca", # breast 73 | "ov", # ovarian 74 | "prad", # prostate 75 | "paad", # pancreatic 76 | ], 77 | "ALK": [ 78 | "lung", 79 | "nsclc", # non-small cell lung cancer 80 | "alcl", # anaplastic large cell lymphoma 81 | "nbl", # neuroblastoma 82 | ], 83 | "MYC": [ 84 | "burkitt", # Burkitt lymphoma 85 | "dlbcl", # diffuse large B-cell lymphoma 86 | "mm", # multiple myeloma 87 | "nbl", # neuroblastoma 88 | ], 89 | "NRAS": [ 90 | "mel", # melanoma 91 | "skcm", 92 | "thca", # thyroid 93 | "aml", # acute myeloid leukemia 94 | ], 95 | "KIT": [ 96 | "gist", # gastrointestinal stromal tumor 97 | "mel", # melanoma 98 | "aml", # acute myeloid leukemia 99 | ], 100 | } 101 | 102 | # Default keywords for genes not in the mapping 103 | DEFAULT_CANCER_KEYWORDS = ["msk", "tcga", "metabric", "dfci", "broad"] 104 | 105 | # Maximum number of studies to query per gene 106 | MAX_STUDIES_PER_GENE = 20 107 | 108 | # Maximum mutations to process per study 109 | MAX_MUTATIONS_PER_STUDY = 5000 110 | 111 | 112 | def get_cancer_keywords(gene: str) -> list[str]: 113 | """Get cancer type keywords for a given gene. 114 | 115 | Args: 116 | gene: Gene symbol (e.g., "BRAF") 117 | 118 | Returns: 119 | List of cancer type keywords to search for 120 | """ 121 | return GENE_CANCER_KEYWORDS.get(gene.upper(), DEFAULT_CANCER_KEYWORDS) 122 | ``` -------------------------------------------------------------------------------- /src/biomcp/cli/main.py: -------------------------------------------------------------------------------- ```python 1 | import importlib.metadata 2 | from typing import Annotated 3 | 4 | import typer 5 | 6 | from .articles import article_app 7 | from .biomarkers import biomarker_app 8 | from .diseases import disease_app 9 | from .health import health_app 10 | from .interventions import intervention_app 11 | from .openfda import openfda_app 12 | from .organizations import organization_app 13 | from .server import run_server 14 | from .trials import trial_app 15 | from .variants import variant_app 16 | 17 | # --- Get version from installed package metadata --- 18 | try: 19 | __version__ = importlib.metadata.version("biomcp-python") 20 | except importlib.metadata.PackageNotFoundError: 21 | __version__ = "unknown" # Fallback if package not installed properly 22 | 23 | 24 | # --- Callback for --version option --- 25 | def version_callback(value: bool): 26 | if value: 27 | typer.echo(f"biomcp version: {__version__}") 28 | raise typer.Exit() 29 | 30 | 31 | # --- Main Typer App --- 32 | app = typer.Typer( 33 | help="BioMCP: Biomedical Model Context Protocol", 34 | no_args_is_help=True, 35 | # Add a callback to handle top-level options like --version 36 | # This callback itself doesn't do much, but allows defining eager options 37 | callback=lambda: None, 38 | ) 39 | 40 | app.add_typer( 41 | trial_app, 42 | name="trial", 43 | no_args_is_help=True, 44 | ) 45 | 46 | app.add_typer( 47 | article_app, 48 | name="article", 49 | no_args_is_help=True, 50 | ) 51 | 52 | app.add_typer( 53 | variant_app, 54 | name="variant", 55 | no_args_is_help=True, 56 | ) 57 | 58 | app.add_typer( 59 | health_app, 60 | name="health", 61 | no_args_is_help=True, 62 | ) 63 | 64 | app.add_typer( 65 | organization_app, 66 | name="organization", 67 | no_args_is_help=True, 68 | ) 69 | 70 | app.add_typer( 71 | intervention_app, 72 | name="intervention", 73 | no_args_is_help=True, 74 | ) 75 | 76 | app.add_typer( 77 | biomarker_app, 78 | name="biomarker", 79 | no_args_is_help=True, 80 | ) 81 | 82 | app.add_typer( 83 | disease_app, 84 | name="disease", 85 | no_args_is_help=True, 86 | ) 87 | 88 | app.add_typer( 89 | openfda_app, 90 | name="openfda", 91 | no_args_is_help=True, 92 | ) 93 | 94 | 95 | # --- Add --version Option using Annotation --- 96 | # We add this directly to the app's callback invocation signature via annotation 97 | # Note: This relies on Typer magic linking Annotated options in the callback signature 98 | # This approach is cleaner than adding it to every subcommand. 99 | @app.callback() 100 | def main_callback( 101 | version: Annotated[ 102 | bool | None, # Allows the option to not be present 103 | typer.Option( 104 | "--version", # The flag name 105 | callback=version_callback, # Function to call when flag is used 106 | is_eager=True, # Process this option before any commands 107 | help="Show the application's version and exit.", 108 | ), 109 | ] = None, # Default value 110 | ): 111 | """ 112 | BioMCP main application callback. Handles global options like --version. 113 | """ 114 | # The actual logic is in version_callback due to is_eager=True 115 | pass 116 | 117 | 118 | # --- Add Explicit 'version' Command --- 119 | @app.command() 120 | def version(): 121 | """ 122 | Display the installed biomcp version. 123 | """ 124 | typer.echo(f"biomcp version: {__version__}") 125 | 126 | 127 | # Directly expose run_server as the 'run' command with all its options 128 | app.command("run")(run_server) 129 | 130 | 131 | if __name__ == "__main__": 132 | app() 133 | ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/drug_shortages_helpers.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Helper functions for drug shortage search to reduce complexity. 3 | """ 4 | 5 | from datetime import datetime 6 | from typing import Any 7 | 8 | 9 | def matches_drug_filter(shortage: dict[str, Any], drug: str | None) -> bool: 10 | """Check if shortage matches drug name filter.""" 11 | if not drug: 12 | return True 13 | 14 | drug_lower = drug.lower() 15 | generic = shortage.get("generic_name", "").lower() 16 | brands = [b.lower() for b in shortage.get("brand_names", [])] 17 | 18 | return drug_lower in generic or any(drug_lower in b for b in brands) 19 | 20 | 21 | def matches_status_filter( 22 | shortage: dict[str, Any], status: str | None 23 | ) -> bool: 24 | """Check if shortage matches status filter.""" 25 | if not status: 26 | return True 27 | 28 | status_lower = status.lower() 29 | shortage_status = shortage.get("status", "").lower() 30 | 31 | if status_lower == "current": 32 | return "current" in shortage_status 33 | elif status_lower == "resolved": 34 | return "resolved" in shortage_status 35 | 36 | return False 37 | 38 | 39 | def matches_category_filter( 40 | shortage: dict[str, Any], therapeutic_category: str | None 41 | ) -> bool: 42 | """Check if shortage matches therapeutic category filter.""" 43 | if not therapeutic_category: 44 | return True 45 | 46 | cat_lower = therapeutic_category.lower() 47 | shortage_cat = shortage.get("therapeutic_category", "").lower() 48 | 49 | return cat_lower in shortage_cat 50 | 51 | 52 | def filter_shortages( 53 | shortages: list[dict[str, Any]], 54 | drug: str | None, 55 | status: str | None, 56 | therapeutic_category: str | None, 57 | ) -> list[dict[str, Any]]: 58 | """Filter shortage list based on criteria.""" 59 | filtered = [] 60 | 61 | for shortage in shortages: 62 | if not matches_drug_filter(shortage, drug): 63 | continue 64 | if not matches_status_filter(shortage, status): 65 | continue 66 | if not matches_category_filter(shortage, therapeutic_category): 67 | continue 68 | 69 | filtered.append(shortage) 70 | 71 | return filtered 72 | 73 | 74 | def format_shortage_search_header( 75 | drug: str | None, 76 | status: str | None, 77 | therapeutic_category: str | None, 78 | last_updated: str | None, 79 | ) -> list[str]: 80 | """Format header for shortage search results.""" 81 | output = [] 82 | 83 | # Add last updated time 84 | if last_updated: 85 | try: 86 | updated_dt = datetime.fromisoformat(last_updated) 87 | output.append( 88 | f"*Last Updated: {updated_dt.strftime('%Y-%m-%d %H:%M')}*\n" 89 | ) 90 | except (ValueError, TypeError): 91 | pass 92 | 93 | if drug: 94 | output.append(f"**Drug**: {drug}") 95 | if status: 96 | output.append(f"**Status Filter**: {status}") 97 | if therapeutic_category: 98 | output.append(f"**Category**: {therapeutic_category}") 99 | 100 | return output 101 | 102 | 103 | def format_cache_timestamp(data: dict[str, Any]) -> str | None: 104 | """Format cache timestamp from data.""" 105 | last_updated = data.get("last_updated") or data.get("_fetched_at") 106 | if not last_updated: 107 | return None 108 | 109 | try: 110 | updated_dt = datetime.fromisoformat(last_updated) 111 | return f"*Data Updated: {updated_dt.strftime('%Y-%m-%d %H:%M')}*\n" 112 | except (ValueError, TypeError): 113 | return None 114 | ```