genomoncology/biomcp # codebase.md

This is page 1 of 19. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .github
│   ├── actions
│   │   └── setup-python-env
│   │       └── action.yml
│   ├── dependabot.yml
│   └── workflows
│       ├── ci.yml
│       ├── deploy-docs.yml
│       ├── main.yml.disabled
│       ├── on-release-main.yml
│       └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── apis
│   │   ├── error-codes.md
│   │   ├── overview.md
│   │   └── python-sdk.md
│   ├── assets
│   │   ├── biomcp-cursor-locations.png
│   │   ├── favicon.ico
│   │   ├── icon.png
│   │   ├── logo.png
│   │   ├── mcp_architecture.txt
│   │   └── remote-connection
│   │       ├── 00_connectors.png
│   │       ├── 01_add_custom_connector.png
│   │       ├── 02_connector_enabled.png
│   │       ├── 03_connect_to_biomcp.png
│   │       ├── 04_select_google_oauth.png
│   │       └── 05_success_connect.png
│   ├── backend-services-reference
│   │   ├── 01-overview.md
│   │   ├── 02-biothings-suite.md
│   │   ├── 03-cbioportal.md
│   │   ├── 04-clinicaltrials-gov.md
│   │   ├── 05-nci-cts-api.md
│   │   ├── 06-pubtator3.md
│   │   └── 07-alphagenome.md
│   ├── blog
│   │   ├── ai-assisted-clinical-trial-search-analysis.md
│   │   ├── images
│   │   │   ├── deep-researcher-video.png
│   │   │   ├── researcher-announce.png
│   │   │   ├── researcher-drop-down.png
│   │   │   ├── researcher-prompt.png
│   │   │   ├── trial-search-assistant.png
│   │   │   └── what_is_biomcp_thumbnail.png
│   │   └── researcher-persona-resource.md
│   ├── changelog.md
│   ├── CNAME
│   ├── concepts
│   │   ├── 01-what-is-biomcp.md
│   │   ├── 02-the-deep-researcher-persona.md
│   │   └── 03-sequential-thinking-with-the-think-tool.md
│   ├── developer-guides
│   │   ├── 01-server-deployment.md
│   │   ├── 02-contributing-and-testing.md
│   │   ├── 03-third-party-endpoints.md
│   │   ├── 04-transport-protocol.md
│   │   ├── 05-error-handling.md
│   │   ├── 06-http-client-and-caching.md
│   │   ├── 07-performance-optimizations.md
│   │   └── generate_endpoints.py
│   ├── faq-condensed.md
│   ├── FDA_SECURITY.md
│   ├── genomoncology.md
│   ├── getting-started
│   │   ├── 01-quickstart-cli.md
│   │   ├── 02-claude-desktop-integration.md
│   │   └── 03-authentication-and-api-keys.md
│   ├── how-to-guides
│   │   ├── 01-find-articles-and-cbioportal-data.md
│   │   ├── 02-find-trials-with-nci-and-biothings.md
│   │   ├── 03-get-comprehensive-variant-annotations.md
│   │   ├── 04-predict-variant-effects-with-alphagenome.md
│   │   ├── 05-logging-and-monitoring-with-bigquery.md
│   │   └── 06-search-nci-organizations-and-interventions.md
│   ├── index.md
│   ├── policies.md
│   ├── reference
│   │   ├── architecture-diagrams.md
│   │   ├── quick-architecture.md
│   │   ├── quick-reference.md
│   │   └── visual-architecture.md
│   ├── robots.txt
│   ├── stylesheets
│   │   ├── announcement.css
│   │   └── extra.css
│   ├── troubleshooting.md
│   ├── tutorials
│   │   ├── biothings-prompts.md
│   │   ├── claude-code-biomcp-alphagenome.md
│   │   ├── nci-prompts.md
│   │   ├── openfda-integration.md
│   │   ├── openfda-prompts.md
│   │   ├── pydantic-ai-integration.md
│   │   └── remote-connection.md
│   ├── user-guides
│   │   ├── 01-command-line-interface.md
│   │   ├── 02-mcp-tools-reference.md
│   │   └── 03-integrating-with-ides-and-clients.md
│   └── workflows
│       └── all-workflows.md
├── example_scripts
│   ├── mcp_integration.py
│   └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│   ├── check_docs_in_mkdocs.py
│   ├── check_http_imports.py
│   └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│   └── biomcp
│       ├── __init__.py
│       ├── __main__.py
│       ├── articles
│       │   ├── __init__.py
│       │   ├── autocomplete.py
│       │   ├── fetch.py
│       │   ├── preprints.py
│       │   ├── search_optimized.py
│       │   ├── search.py
│       │   └── unified.py
│       ├── biomarkers
│       │   ├── __init__.py
│       │   └── search.py
│       ├── cbioportal_helper.py
│       ├── circuit_breaker.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── articles.py
│       │   ├── biomarkers.py
│       │   ├── diseases.py
│       │   ├── health.py
│       │   ├── interventions.py
│       │   ├── main.py
│       │   ├── openfda.py
│       │   ├── organizations.py
│       │   ├── server.py
│       │   ├── trials.py
│       │   └── variants.py
│       ├── connection_pool.py
│       ├── constants.py
│       ├── core.py
│       ├── diseases
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── domain_handlers.py
│       ├── drugs
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── exceptions.py
│       ├── genes
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── http_client_simple.py
│       ├── http_client.py
│       ├── individual_tools.py
│       ├── integrations
│       │   ├── __init__.py
│       │   ├── biothings_client.py
│       │   └── cts_api.py
│       ├── interventions
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── logging_filter.py
│       ├── metrics_handler.py
│       ├── metrics.py
│       ├── openfda
│       │   ├── __init__.py
│       │   ├── adverse_events_helpers.py
│       │   ├── adverse_events.py
│       │   ├── cache.py
│       │   ├── constants.py
│       │   ├── device_events_helpers.py
│       │   ├── device_events.py
│       │   ├── drug_approvals.py
│       │   ├── drug_labels_helpers.py
│       │   ├── drug_labels.py
│       │   ├── drug_recalls_helpers.py
│       │   ├── drug_recalls.py
│       │   ├── drug_shortages_detail_helpers.py
│       │   ├── drug_shortages_helpers.py
│       │   ├── drug_shortages.py
│       │   ├── exceptions.py
│       │   ├── input_validation.py
│       │   ├── rate_limiter.py
│       │   ├── utils.py
│       │   └── validation.py
│       ├── organizations
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── parameter_parser.py
│       ├── prefetch.py
│       ├── query_parser.py
│       ├── query_router.py
│       ├── rate_limiter.py
│       ├── render.py
│       ├── request_batcher.py
│       ├── resources
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── instructions.md
│       │   └── researcher.md
│       ├── retry.py
│       ├── router_handlers.py
│       ├── router.py
│       ├── shared_context.py
│       ├── thinking
│       │   ├── __init__.py
│       │   ├── sequential.py
│       │   └── session.py
│       ├── thinking_tool.py
│       ├── thinking_tracker.py
│       ├── trials
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── nci_getter.py
│       │   ├── nci_search.py
│       │   └── search.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cancer_types_api.py
│       │   ├── cbio_http_adapter.py
│       │   ├── endpoint_registry.py
│       │   ├── gene_validator.py
│       │   ├── metrics.py
│       │   ├── mutation_filter.py
│       │   ├── query_utils.py
│       │   ├── rate_limiter.py
│       │   └── request_cache.py
│       ├── variants
│       │   ├── __init__.py
│       │   ├── alphagenome.py
│       │   ├── cancer_types.py
│       │   ├── cbio_external_client.py
│       │   ├── cbioportal_mutations.py
│       │   ├── cbioportal_search_helpers.py
│       │   ├── cbioportal_search.py
│       │   ├── constants.py
│       │   ├── external.py
│       │   ├── filters.py
│       │   ├── getter.py
│       │   ├── links.py
│       │   └── search.py
│       └── workers
│           ├── __init__.py
│           ├── worker_entry_stytch.js
│           ├── worker_entry.js
│           └── worker.py
├── tests
│   ├── bdd
│   │   ├── cli_help
│   │   │   ├── help.feature
│   │   │   └── test_help.py
│   │   ├── conftest.py
│   │   ├── features
│   │   │   └── alphagenome_integration.feature
│   │   ├── fetch_articles
│   │   │   ├── fetch.feature
│   │   │   └── test_fetch.py
│   │   ├── get_trials
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── get_variants
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── search_articles
│   │   │   ├── autocomplete.feature
│   │   │   ├── search.feature
│   │   │   ├── test_autocomplete.py
│   │   │   └── test_search.py
│   │   ├── search_trials
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   ├── search_variants
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   └── steps
│   │       └── test_alphagenome_steps.py
│   ├── config
│   │   └── test_smithery_config.py
│   ├── conftest.py
│   ├── data
│   │   ├── ct_gov
│   │   │   ├── clinical_trials_api_v2.yaml
│   │   │   ├── trials_NCT04280705.json
│   │   │   └── trials_NCT04280705.txt
│   │   ├── myvariant
│   │   │   ├── myvariant_api.yaml
│   │   │   ├── myvariant_field_descriptions.csv
│   │   │   ├── variants_full_braf_v600e.json
│   │   │   ├── variants_full_braf_v600e.txt
│   │   │   └── variants_part_braf_v600_multiple.json
│   │   ├── openfda
│   │   │   ├── drugsfda_detail.json
│   │   │   ├── drugsfda_search.json
│   │   │   ├── enforcement_detail.json
│   │   │   └── enforcement_search.json
│   │   └── pubtator
│   │       ├── pubtator_autocomplete.json
│   │       └── pubtator3_paper.txt
│   ├── integration
│   │   ├── test_openfda_integration.py
│   │   ├── test_preprints_integration.py
│   │   ├── test_simple.py
│   │   └── test_variants_integration.py
│   ├── tdd
│   │   ├── articles
│   │   │   ├── test_autocomplete.py
│   │   │   ├── test_cbioportal_integration.py
│   │   │   ├── test_fetch.py
│   │   │   ├── test_preprints.py
│   │   │   ├── test_search.py
│   │   │   └── test_unified.py
│   │   ├── conftest.py
│   │   ├── drugs
│   │   │   ├── __init__.py
│   │   │   └── test_drug_getter.py
│   │   ├── openfda
│   │   │   ├── __init__.py
│   │   │   ├── test_adverse_events.py
│   │   │   ├── test_device_events.py
│   │   │   ├── test_drug_approvals.py
│   │   │   ├── test_drug_labels.py
│   │   │   ├── test_drug_recalls.py
│   │   │   ├── test_drug_shortages.py
│   │   │   └── test_security.py
│   │   ├── test_biothings_integration_real.py
│   │   ├── test_biothings_integration.py
│   │   ├── test_circuit_breaker.py
│   │   ├── test_concurrent_requests.py
│   │   ├── test_connection_pool.py
│   │   ├── test_domain_handlers.py
│   │   ├── test_drug_approvals.py
│   │   ├── test_drug_recalls.py
│   │   ├── test_drug_shortages.py
│   │   ├── test_endpoint_documentation.py
│   │   ├── test_error_scenarios.py
│   │   ├── test_europe_pmc_fetch.py
│   │   ├── test_mcp_integration.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_metrics.py
│   │   ├── test_nci_integration.py
│   │   ├── test_nci_mcp_tools.py
│   │   ├── test_network_policies.py
│   │   ├── test_offline_mode.py
│   │   ├── test_openfda_unified.py
│   │   ├── test_pten_r173_search.py
│   │   ├── test_render.py
│   │   ├── test_request_batcher.py.disabled
│   │   ├── test_retry.py
│   │   ├── test_router.py
│   │   ├── test_shared_context.py.disabled
│   │   ├── test_unified_biothings.py
│   │   ├── thinking
│   │   │   ├── __init__.py
│   │   │   └── test_sequential.py
│   │   ├── trials
│   │   │   ├── test_backward_compatibility.py
│   │   │   ├── test_getter.py
│   │   │   └── test_search.py
│   │   ├── utils
│   │   │   ├── test_gene_validator.py
│   │   │   ├── test_mutation_filter.py
│   │   │   ├── test_rate_limiter.py
│   │   │   └── test_request_cache.py
│   │   ├── variants
│   │   │   ├── constants.py
│   │   │   ├── test_alphagenome_api_key.py
│   │   │   ├── test_alphagenome_comprehensive.py
│   │   │   ├── test_alphagenome.py
│   │   │   ├── test_cbioportal_mutations.py
│   │   │   ├── test_cbioportal_search.py
│   │   │   ├── test_external_integration.py
│   │   │   ├── test_external.py
│   │   │   ├── test_extract_gene_aa_change.py
│   │   │   ├── test_filters.py
│   │   │   ├── test_getter.py
│   │   │   ├── test_links.py
│   │   │   └── test_search.py
│   │   └── workers
│   │       └── test_worker_sanitization.js
│   └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```

# Files

--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------

```yaml
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: "v4.4.0"
 4 |     hooks:
 5 |       - id: check-case-conflict
 6 |       - id: check-merge-conflict
 7 |       - id: check-toml
 8 |       - id: check-yaml
 9 |       - id: end-of-file-fixer
10 |       - id: trailing-whitespace
11 | 
12 |   - repo: https://github.com/astral-sh/ruff-pre-commit
13 |     rev: "v0.6.3"
14 |     hooks:
15 |       - id: ruff
16 |         args: [--exit-non-zero-on-fix]
17 |       - id: ruff-format
18 | 
19 |   - repo: local
20 |     hooks:
21 |       - id: update-endpoints-doc
22 |         name: Update THIRD_PARTY_ENDPOINTS.md
23 |         entry: uv run python scripts/generate_endpoints_doc.py
24 |         language: system
25 |         pass_filenames: false
26 |         files: 'src/biomcp/utils/endpoint_registry\.py$'
27 |       - id: check-http-imports
28 |         name: Check for direct HTTP library imports
29 |         entry: uv run python scripts/check_http_imports.py
30 |         language: system
31 |         pass_filenames: false
32 |         always_run: true
33 |         files: '\.py$'
34 |       - id: check-docs-in-mkdocs
35 |         name: Check documentation files are in mkdocs.yml
36 |         entry: uv run python scripts/check_docs_in_mkdocs.py
37 |         language: system
38 |         pass_filenames: false
39 |         files: '^docs/.*\.md$|^mkdocs\.yml$'
40 | 
41 |   - repo: https://github.com/pre-commit/mirrors-prettier
42 |     rev: "v3.0.3"
43 |     hooks:
44 |       - id: prettier
45 | 
```

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
  1 | docs/source
  2 | 
  3 | # From https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | .pybuilder/
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # IPython
 86 | profile_default/
 87 | ipython_config.py
 88 | 
 89 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 90 | __pypackages__/
 91 | 
 92 | # Celery stuff
 93 | celerybeat-schedule
 94 | celerybeat.pid
 95 | 
 96 | # SageMath parsed files
 97 | *.sage.py
 98 | 
 99 | # Environments
100 | .env
101 | .venv
102 | env/
103 | venv/
104 | ENV/
105 | env.bak/
106 | venv.bak/
107 | 
108 | # Spyder project settings
109 | .spyderproject
110 | .spyproject
111 | 
112 | # Rope project settings
113 | .ropeproject
114 | 
115 | # mkdocs documentation
116 | /site
117 | 
118 | # ruff
119 | .ruff_cache
120 | 
121 | # mypy
122 | .mypy_cache/
123 | .dmypy.json
124 | dmypy.json
125 | 
126 | # Pyre type checker
127 | .pyre/
128 | 
129 | # pytype static type analyzer
130 | .pytype/
131 | 
132 | # Cython debug symbols
133 | cython_debug/
134 | 
135 | # Vscode config files
136 | .vscode/
137 | 
138 | # PyCharm
139 | .idea/
140 | 
141 | # LLMs Notes
142 | llms/
143 | vault/
144 | 
145 | .DS_Store
146 | /node_modules/
147 | 
148 | CLAUDE.md
149 | lzyank.toml
150 | experiment/
151 | alphagenome
152 | spike/
153 | 
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
  1 | # BioMCP: Biomedical Model Context Protocol
  2 | 
  3 | BioMCP is an open source (MIT License) toolkit that empowers AI assistants and
  4 | agents with specialized biomedical knowledge. Built following the Model Context
  5 | Protocol (MCP), it connects AI systems to authoritative biomedical data
  6 | sources, enabling them to answer questions about clinical trials, scientific
  7 | literature, and genomic variants with precision and depth.
  8 | 
  9 | [![▶️ Watch the video](./docs/blog/images/what_is_biomcp_thumbnail.png)](https://www.youtube.com/watch?v=bKxOWrWUUhM)
 10 | 
 11 | ## MCPHub Certification
 12 | 
 13 | BioMCP is certified by [MCPHub](https://mcphub.com/mcp-servers/genomoncology/biomcp). This certification ensures that BioMCP follows best practices for Model Context Protocol implementation and provides reliable biomedical data access.
 14 | 
 15 | ## Why BioMCP?
 16 | 
 17 | While Large Language Models have broad general knowledge, they often lack
 18 | specialized domain-specific information or access to up-to-date resources.
 19 | BioMCP bridges this gap for biomedicine by:
 20 | 
 21 | - Providing **structured access** to clinical trials, biomedical literature,
 22 |   and genomic variants
 23 | - Enabling **natural language queries** to specialized databases without
 24 |   requiring knowledge of their specific syntax
 25 | - Supporting **biomedical research** workflows through a consistent interface
 26 | - Functioning as an **MCP server** for AI assistants and agents
 27 | 
 28 | ## Biomedical Data Sources
 29 | 
 30 | BioMCP integrates with multiple biomedical data sources:
 31 | 
 32 | ### Literature Sources
 33 | 
 34 | - **PubTator3/PubMed** - Peer-reviewed biomedical literature with entity annotations
 35 | - **bioRxiv/medRxiv** - Preprint servers for biology and health sciences
 36 | - **Europe PMC** - Open science platform including preprints
 37 | 
 38 | ### Clinical & Genomic Sources
 39 | 
 40 | - **ClinicalTrials.gov** - Clinical trial registry and results database
 41 | - **NCI Clinical Trials Search API** - National Cancer Institute's curated cancer trials database
 42 |   - Advanced search filters (biomarkers, prior therapies, brain metastases)
 43 |   - Organization and intervention databases
 44 |   - Disease vocabulary with synonyms
 45 | - **BioThings Suite** - Comprehensive biomedical data APIs:
 46 |   - **MyVariant.info** - Consolidated genetic variant annotation
 47 |   - **MyGene.info** - Real-time gene annotations and information
 48 |   - **MyDisease.info** - Disease ontology and synonym information
 49 |   - **MyChem.info** - Drug/chemical annotations and properties
 50 | - **TCGA/GDC** - The Cancer Genome Atlas for cancer variant data
 51 | - **1000 Genomes** - Population frequency data via Ensembl
 52 | - **cBioPortal** - Cancer genomics portal with mutation occurrence data
 53 | 
 54 | ### Regulatory & Safety Sources
 55 | 
 56 | - **OpenFDA** - FDA regulatory and safety data:
 57 |   - **Drug Adverse Events (FAERS)** - Post-market drug safety reports
 58 |   - **Drug Labels (SPL)** - Official prescribing information
 59 |   - **Device Events (MAUDE)** - Medical device adverse events, with genomic device filtering
 60 | 
 61 | ## Available MCP Tools
 62 | 
 63 | BioMCP provides 24 specialized tools for biomedical research:
 64 | 
 65 | ### Core Tools (3)
 66 | 
 67 | #### 1. Think Tool (ALWAYS USE FIRST!)
 68 | 
 69 | **CRITICAL**: The `think` tool MUST be your first step for ANY biomedical research task.
 70 | 
 71 | ```python
 72 | # Start analysis with sequential thinking
 73 | think(
 74 |     thought="Breaking down the query about BRAF mutations in melanoma...",
 75 |     thoughtNumber=1,
 76 |     totalThoughts=3,
 77 |     nextThoughtNeeded=True
 78 | )
 79 | ```
 80 | 
 81 | The sequential thinking tool helps:
 82 | 
 83 | - Break down complex biomedical problems systematically
 84 | - Plan multi-step research approaches
 85 | - Track reasoning progress
 86 | - Ensure comprehensive analysis
 87 | 
 88 | #### 2. Search Tool
 89 | 
 90 | The search tool supports two modes:
 91 | 
 92 | ##### Unified Query Language (Recommended)
 93 | 
 94 | Use the `query` parameter with structured field syntax for powerful cross-domain searches:
 95 | 
 96 | ```python
 97 | # Simple natural language
 98 | search(query="BRAF melanoma")
 99 | 
100 | # Field-specific search
101 | search(query="gene:BRAF AND trials.condition:melanoma")
102 | 
103 | # Complex queries
104 | search(query="gene:BRAF AND variants.significance:pathogenic AND articles.date:>2023")
105 | 
106 | # Get searchable fields schema
107 | search(get_schema=True)
108 | 
109 | # Explain how a query is parsed
110 | search(query="gene:BRAF", explain_query=True)
111 | ```
112 | 
113 | **Supported Fields:**
114 | 
115 | - **Cross-domain**: `gene:`, `variant:`, `disease:`
116 | - **Trials**: `trials.condition:`, `trials.phase:`, `trials.status:`, `trials.intervention:`
117 | - **Articles**: `articles.author:`, `articles.journal:`, `articles.date:`
118 | - **Variants**: `variants.significance:`, `variants.rsid:`, `variants.frequency:`
119 | 
120 | ##### Domain-Based Search
121 | 
122 | Use the `domain` parameter with specific filters:
123 | 
124 | ```python
125 | # Search articles (includes automatic cBioPortal integration)
126 | search(domain="article", genes=["BRAF"], diseases=["melanoma"])
127 | 
128 | # Search with mutation-specific cBioPortal data
129 | search(domain="article", genes=["BRAF"], keywords=["V600E"])
130 | search(domain="article", genes=["SRSF2"], keywords=["F57*"])  # Wildcard patterns
131 | 
132 | # Search trials
133 | search(domain="trial", conditions=["lung cancer"], phase="3")
134 | 
135 | # Search variants
136 | search(domain="variant", gene="TP53", significance="pathogenic")
137 | ```
138 | 
139 | **Note**: When searching articles with a gene parameter, cBioPortal data is automatically included:
140 | 
141 | - Gene-level summaries show mutation frequency across cancer studies
142 | - Mutation-specific searches (e.g., "V600E") show study-level occurrence data
143 | - Cancer types are dynamically resolved from cBioPortal API
144 | 
145 | #### 3. Fetch Tool
146 | 
147 | Retrieve full details for a single article, trial, or variant:
148 | 
149 | ```python
150 | # Fetch article details (supports both PMID and DOI)
151 | fetch(domain="article", id="34567890")  # PMID
152 | fetch(domain="article", id="10.1101/2024.01.20.23288905")  # DOI
153 | 
154 | # Fetch trial with all sections
155 | fetch(domain="trial", id="NCT04280705", detail="all")
156 | 
157 | # Fetch variant details
158 | fetch(domain="variant", id="rs113488022")
159 | ```
160 | 
161 | **Domain-specific options:**
162 | 
163 | - **Articles**: `detail="full"` retrieves full text if available
164 | - **Trials**: `detail` can be "protocol", "locations", "outcomes", "references", or "all"
165 | - **Variants**: Always returns full details
166 | 
167 | ### Individual Tools (21)
168 | 
169 | For users who prefer direct access to specific functionality, BioMCP also provides 21 individual tools:
170 | 
171 | #### Article Tools (2)
172 | 
173 | - **article_searcher**: Search PubMed/PubTator3 and preprints
174 | - **article_getter**: Fetch detailed article information (supports PMID and DOI)
175 | 
176 | #### Trial Tools (5)
177 | 
178 | - **trial_searcher**: Search ClinicalTrials.gov or NCI CTS API (via source parameter)
179 | - **trial_getter**: Fetch all trial details from either source
180 | - **trial_protocol_getter**: Fetch protocol information only (ClinicalTrials.gov)
181 | - **trial_references_getter**: Fetch trial publications (ClinicalTrials.gov)
182 | - **trial_outcomes_getter**: Fetch outcome measures and results (ClinicalTrials.gov)
183 | - **trial_locations_getter**: Fetch site locations and contacts (ClinicalTrials.gov)
184 | 
185 | #### Variant Tools (2)
186 | 
187 | - **variant_searcher**: Search MyVariant.info database
188 | - **variant_getter**: Fetch comprehensive variant details
189 | 
190 | #### NCI-Specific Tools (6)
191 | 
192 | - **nci_organization_searcher**: Search NCI's organization database
193 | - **nci_organization_getter**: Get organization details by ID
194 | - **nci_intervention_searcher**: Search NCI's intervention database (drugs, devices, procedures)
195 | - **nci_intervention_getter**: Get intervention details by ID
196 | - **nci_biomarker_searcher**: Search biomarkers used in trial eligibility criteria
197 | - **nci_disease_searcher**: Search NCI's controlled vocabulary of cancer conditions
198 | 
199 | #### Gene, Disease & Drug Tools (3)
200 | 
201 | - **gene_getter**: Get real-time gene information from MyGene.info
202 | - **disease_getter**: Get disease definitions and synonyms from MyDisease.info
203 | - **drug_getter**: Get drug/chemical information from MyChem.info
204 | 
205 | **Note**: All individual tools that search by gene automatically include cBioPortal summaries when the `include_cbioportal` parameter is True (default). Trial searches can expand disease conditions with synonyms when `expand_synonyms` is True (default).
206 | 
207 | ## Quick Start
208 | 
209 | ### For Claude Desktop Users
210 | 
211 | 1. **Install `uv`** if you don't have it (recommended):
212 | 
213 |    ```bash
214 |    # MacOS
215 |    brew install uv
216 | 
217 |    # Windows/Linux
218 |    pip install uv
219 |    ```
220 | 
221 | 2. **Configure Claude Desktop**:
222 |    - Open Claude Desktop settings
223 |    - Navigate to Developer section
224 |    - Click "Edit Config" and add:
225 |    ```json
226 |    {
227 |      "mcpServers": {
228 |        "biomcp": {
229 |          "command": "uv",
230 |          "args": ["run", "--with", "biomcp-python", "biomcp", "run"]
231 |        }
232 |      }
233 |    }
234 |    ```
235 |    - Restart Claude Desktop and start chatting about biomedical topics!
236 | 
237 | ### Python Package Installation
238 | 
239 | ```bash
240 | # Using pip
241 | pip install biomcp-python
242 | 
243 | # Using uv (recommended for faster installation)
244 | uv pip install biomcp-python
245 | 
246 | # Run directly without installation
247 | uv run --with biomcp-python biomcp trial search --condition "lung cancer"
248 | ```
249 | 
250 | ## Configuration
251 | 
252 | ### Environment Variables
253 | 
254 | BioMCP supports optional environment variables for enhanced functionality:
255 | 
256 | ```bash
257 | # cBioPortal API authentication (optional)
258 | export CBIO_TOKEN="your-api-token"  # For authenticated access
259 | export CBIO_BASE_URL="https://www.cbioportal.org/api"  # Custom API endpoint
260 | 
261 | # Performance tuning
262 | export BIOMCP_USE_CONNECTION_POOL="true"  # Enable HTTP connection pooling (default: true)
263 | export BIOMCP_METRICS_ENABLED="false"     # Enable performance metrics (default: false)
264 | ```
265 | 
266 | ## Running BioMCP Server
267 | 
268 | BioMCP supports multiple transport protocols to suit different deployment scenarios:
269 | 
270 | ### Local Development (STDIO)
271 | 
272 | For direct integration with Claude Desktop or local MCP clients:
273 | 
274 | ```bash
275 | # Default STDIO mode for local development
276 | biomcp run
277 | 
278 | # Or explicitly specify STDIO
279 | biomcp run --mode stdio
280 | ```
281 | 
282 | ### HTTP Server Mode
283 | 
284 | BioMCP supports multiple HTTP transport protocols:
285 | 
286 | #### Legacy SSE Transport (Worker Mode)
287 | 
288 | For backward compatibility with existing SSE clients:
289 | 
290 | ```bash
291 | biomcp run --mode worker
292 | # Server available at http://localhost:8000/sse
293 | ```
294 | 
295 | #### Streamable HTTP Transport (Recommended)
296 | 
297 | The new MCP-compliant Streamable HTTP transport provides optimal performance and standards compliance:
298 | 
299 | ```bash
300 | biomcp run --mode streamable_http
301 | 
302 | # Custom host and port
303 | biomcp run --mode streamable_http --host 127.0.0.1 --port 8080
304 | ```
305 | 
306 | Features of Streamable HTTP transport:
307 | 
308 | - Single `/mcp` endpoint for all operations
309 | - Dynamic response mode (JSON for quick operations, SSE for long-running)
310 | - Session management support (future)
311 | - Full MCP specification compliance (2025-03-26)
312 | - Better scalability for cloud deployments
313 | 
314 | ### Deployment Options
315 | 
316 | #### Docker
317 | 
318 | ```bash
319 | # Build the Docker image locally
320 | docker build -t biomcp:latest .
321 | 
322 | # Run the container
323 | docker run -p 8000:8000 biomcp:latest biomcp run --mode streamable_http
324 | ```
325 | 
326 | #### Cloudflare Workers
327 | 
328 | The worker mode can be deployed to Cloudflare Workers for global edge deployment.
329 | 
330 | Note: All APIs work without authentication, but tokens may provide higher rate limits.
331 | 
332 | ## Command Line Interface
333 | 
334 | BioMCP provides a comprehensive CLI for direct database interaction:
335 | 
336 | ```bash
337 | # Get help
338 | biomcp --help
339 | 
340 | # Run the MCP server
341 | biomcp run
342 | 
343 | # Article search examples
344 | biomcp article search --gene BRAF --disease Melanoma  # Includes preprints by default
345 | biomcp article search --gene BRAF --no-preprints      # Exclude preprints
346 | biomcp article get 21717063 --full
347 | 
348 | # Clinical trial examples
349 | biomcp trial search --condition "Lung Cancer" --phase PHASE3
350 | biomcp trial search --condition melanoma --source nci --api-key YOUR_KEY  # Use NCI API
351 | biomcp trial get NCT04280705 Protocol
352 | biomcp trial get NCT04280705 --source nci --api-key YOUR_KEY  # Get from NCI
353 | 
354 | # Variant examples with external annotations
355 | biomcp variant search --gene TP53 --significance pathogenic
356 | biomcp variant get rs113488022  # Includes TCGA, 1000 Genomes, and cBioPortal data by default
357 | biomcp variant get rs113488022 --no-external  # Core annotations only
358 | 
359 | # NCI-specific examples (requires NCI API key)
360 | biomcp organization search "MD Anderson" --api-key YOUR_KEY
361 | biomcp organization get ORG123456 --api-key YOUR_KEY
362 | biomcp intervention search pembrolizumab --api-key YOUR_KEY
363 | biomcp intervention search --type Device --api-key YOUR_KEY
364 | biomcp biomarker search "PD-L1" --api-key YOUR_KEY
365 | biomcp disease search melanoma --source nci --api-key YOUR_KEY
366 | ```
367 | 
368 | ## Testing & Verification
369 | 
370 | Test your BioMCP setup with the MCP Inspector:
371 | 
372 | ```bash
373 | npx @modelcontextprotocol/inspector uv run --with biomcp-python biomcp run
374 | ```
375 | 
376 | This opens a web interface where you can explore and test all available tools.
377 | 
378 | ## Enterprise Version: OncoMCP
379 | 
380 | OncoMCP extends BioMCP with GenomOncology's enterprise-grade precision oncology
381 | platform (POP), providing:
382 | 
383 | - **HIPAA-Compliant Deployment**: Secure on-premise options
384 | - **Real-Time Trial Matching**: Up-to-date status and arm-level matching
385 | - **Healthcare Integration**: Seamless EHR and data warehouse connectivity
386 | - **Curated Knowledge Base**: 15,000+ trials and FDA approvals
387 | - **Sophisticated Patient Matching**: Using integrated clinical and molecular
388 |   profiles
389 | - **Advanced NLP**: Structured extraction from unstructured text
390 | - **Comprehensive Biomarker Processing**: Mutation and rule processing
391 | 
392 | Learn more: [GenomOncology](https://genomoncology.com/)
393 | 
394 | ## MCP Registries
395 | 
396 | [![smithery badge](https://smithery.ai/badge/@genomoncology/biomcp)](https://smithery.ai/server/@genomoncology/biomcp)
397 | 
398 | <a href="https://glama.ai/mcp/servers/@genomoncology/biomcp">
399 | <img width="380" height="200" src="https://glama.ai/mcp/servers/@genomoncology/biomcp/badge" />
400 | </a>
401 | 
402 | ## Example Use Cases
403 | 
404 | ### Gene Information Retrieval
405 | 
406 | ```python
407 | # Get comprehensive gene information
408 | gene_getter(gene_id_or_symbol="TP53")
409 | # Returns: Official name, summary, aliases, links to databases
410 | ```
411 | 
412 | ### Disease Synonym Expansion
413 | 
414 | ```python
415 | # Get disease information with synonyms
416 | disease_getter(disease_id_or_name="GIST")
417 | # Returns: "gastrointestinal stromal tumor" and other synonyms
418 | 
419 | # Search trials with automatic synonym expansion
420 | trial_searcher(conditions=["GIST"], expand_synonyms=True)
421 | # Searches for: GIST OR "gastrointestinal stromal tumor" OR "GI stromal tumor"
422 | ```
423 | 
424 | ### Integrated Biomedical Research
425 | 
426 | ```python
427 | # 1. Always start with thinking
428 | think(thought="Analyzing BRAF V600E in melanoma treatment", thoughtNumber=1)
429 | 
430 | # 2. Get gene context
431 | gene_getter("BRAF")
432 | 
433 | # 3. Search for pathogenic variants
434 | variant_searcher(gene="BRAF", hgvsp="V600E", significance="pathogenic")
435 | 
436 | # 4. Find relevant clinical trials with disease expansion
437 | trial_searcher(conditions=["melanoma"], interventions=["BRAF inhibitor"])
438 | ```
439 | 
440 | ## Documentation
441 | 
442 | For comprehensive documentation, visit [https://biomcp.org](https://biomcp.org)
443 | 
444 | ### Developer Guides
445 | 
446 | - [HTTP Client Guide](./docs/http-client-guide.md) - Using the centralized HTTP client
447 | - [Migration Examples](./docs/migration-examples.md) - Migrating from direct HTTP usage
448 | - [Error Handling Guide](./docs/error-handling.md) - Comprehensive error handling patterns
449 | - [Integration Testing Guide](./docs/integration-testing.md) - Best practices for reliable integration tests
450 | - [Third-Party Endpoints](./THIRD_PARTY_ENDPOINTS.md) - Complete list of external APIs used
451 | - [Testing Guide](./docs/development/testing.md) - Running tests and understanding test categories
452 | 
453 | ## Development
454 | 
455 | ### Running Tests
456 | 
457 | ```bash
458 | # Run all tests (including integration tests)
459 | make test
460 | 
461 | # Run only unit tests (excluding integration tests)
462 | uv run python -m pytest tests -m "not integration"
463 | 
464 | # Run only integration tests
465 | uv run python -m pytest tests -m "integration"
466 | ```
467 | 
468 | **Note**: Integration tests make real API calls and may fail due to network issues or rate limiting.
469 | In CI/CD, integration tests are run separately and allowed to fail without blocking the build.
470 | 
471 | ## BioMCP Examples Repo
472 | 
473 | Looking to see BioMCP in action?
474 | 
475 | Check out the companion repository:
476 | 👉 **[biomcp-examples](https://github.com/genomoncology/biomcp-examples)**
477 | 
478 | It contains real prompts, AI-generated research briefs, and evaluation runs across different models.
479 | Use it to explore capabilities, compare outputs, or benchmark your own setup.
480 | 
481 | Have a cool example of your own?
482 | **We’d love for you to contribute!** Just fork the repo and submit a PR with your experiment.
483 | 
484 | ## License
485 | 
486 | This project is licensed under the MIT License.
487 | 
```

--------------------------------------------------------------------------------
/tests/tdd/drugs/__init__.py:
--------------------------------------------------------------------------------

```python
1 | """Tests for drug information tools."""
2 | 
```

--------------------------------------------------------------------------------
/tests/tdd/openfda/__init__.py:
--------------------------------------------------------------------------------

```python
1 | """Test package for OpenFDA integration."""
2 | 
```

--------------------------------------------------------------------------------
/tests/tdd/thinking/__init__.py:
--------------------------------------------------------------------------------

```python
1 | # Test module for sequential thinking functionality
2 | 
```

--------------------------------------------------------------------------------
/src/biomcp/thinking/__init__.py:
--------------------------------------------------------------------------------

```python
1 | from . import sequential
2 | 
3 | __all__ = [
4 |     "sequential",
5 | ]
6 | 
```

--------------------------------------------------------------------------------
/src/biomcp/resources/__init__.py:
--------------------------------------------------------------------------------

```python
1 | from .getter import get_instructions
2 | 
3 | __all__ = [
4 |     "get_instructions",
5 | ]
6 | 
```

--------------------------------------------------------------------------------
/src/biomcp/cli/__init__.py:
--------------------------------------------------------------------------------

```python
1 | """BioMCP Command Line Interface."""
2 | 
3 | from .main import app
4 | 
5 | __all__ = ["app"]
6 | 
```

--------------------------------------------------------------------------------
/src/biomcp/genes/__init__.py:
--------------------------------------------------------------------------------

```python
1 | """Gene information tools for BioMCP."""
2 | 
3 | from .getter import get_gene
4 | 
5 | __all__ = ["get_gene"]
6 | 
```

--------------------------------------------------------------------------------
/glama.json:
--------------------------------------------------------------------------------

```json
1 | {
2 |   "$schema": "https://glama.ai/mcp/schemas/server.json",
3 |   "maintainers": ["imaurer", "jyeakley"]
4 | }
5 | 
```

--------------------------------------------------------------------------------
/src/biomcp/drugs/__init__.py:
--------------------------------------------------------------------------------

```python
1 | """Drug information tools using MyChem.info."""
2 | 
3 | from .getter import get_drug
4 | 
5 | __all__ = ["get_drug"]
6 | 
```

--------------------------------------------------------------------------------
/src/biomcp/workers/__init__.py:
--------------------------------------------------------------------------------

```python
1 | """Cloudflare Workers module for BioMCP."""
2 | 
3 | from .worker import create_worker_app
4 | 
5 | __all__ = ["create_worker_app"]
6 | 
```

--------------------------------------------------------------------------------
/src/biomcp/variants/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | from . import search
 2 | from . import getter
 3 | from . import external
 4 | 
 5 | __all__ = [
 6 |     "external",
 7 |     "getter",
 8 |     "search",
 9 | ]
10 | 
```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "devDependencies": {
 3 |     "wrangler": "^4.13.2"
 4 |   },
 5 |   "dependencies": {
 6 |     "hono": "^4.7.8",
 7 |     "jose": "^6.0.11"
 8 |   }
 9 | }
10 | 
```

--------------------------------------------------------------------------------
/codecov.yaml:
--------------------------------------------------------------------------------

```yaml
 1 | coverage:
 2 |   range: 90..100
 3 |   round: down
 4 |   precision: 1
 5 |   status:
 6 |     project:
 7 |       default:
 8 |         target: 95%
 9 |         threshold: 0.5%
10 | 
```

--------------------------------------------------------------------------------
/src/biomcp/utils/__init__.py:
--------------------------------------------------------------------------------

```python
1 | """Utility modules for BioMCP."""
2 | 
3 | from .query_utils import parse_or_query, contains_or_operator
4 | 
5 | __all__ = ["contains_or_operator", "parse_or_query"]
6 | 
```

--------------------------------------------------------------------------------
/src/biomcp/integrations/__init__.py:
--------------------------------------------------------------------------------

```python
1 | """BioThings API integrations for BioMCP."""
2 | 
3 | from .biothings_client import BioThingsClient, DiseaseInfo, GeneInfo
4 | 
5 | __all__ = ["BioThingsClient", "DiseaseInfo", "GeneInfo"]
6 | 
```

--------------------------------------------------------------------------------
/src/biomcp/variants/constants.py:
--------------------------------------------------------------------------------

```python
1 | """Constants for variant modules."""
2 | 
3 | import os
4 | 
5 | # cBioPortal API endpoints
6 | CBIO_BASE_URL = os.getenv("CBIO_BASE_URL", "https://www.cbioportal.org/api")
7 | CBIO_TOKEN = os.getenv("CBIO_TOKEN")
8 | 
```

--------------------------------------------------------------------------------
/src/biomcp/articles/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | from . import autocomplete
 2 | from . import fetch
 3 | from . import search
 4 | from . import preprints
 5 | from . import unified
 6 | 
 7 | 
 8 | __all__ = [
 9 |     "autocomplete",
10 |     "fetch",
11 |     "preprints",
12 |     "search",
13 |     "unified",
14 | ]
15 | 
```

--------------------------------------------------------------------------------
/lzyank.toml:
--------------------------------------------------------------------------------

```toml
 1 | [default]
 2 | exclude = [
 3 |     "uv.lock",
 4 |     "lzyank.toml",
 5 |     ".github",
 6 |     "*.ini",
 7 |     ".pre-commit-config.yaml",
 8 |     "LICENSE",
 9 |     "codecov.yaml",
10 |     "mkdocs.yml",
11 |     "tests/data"
12 | ]
13 | 
14 | [actions]
15 | include = [".github/"]
16 | 
```

--------------------------------------------------------------------------------
/src/biomcp/trials/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | from . import getter
 2 | from . import nci_getter
 3 | from . import nci_search
 4 | from . import search
 5 | from .search import LineOfTherapy
 6 | 
 7 | __all__ = [
 8 |     "LineOfTherapy",
 9 |     "getter",
10 |     "nci_getter",
11 |     "nci_search",
12 |     "search",
13 | ]
14 | 
```

--------------------------------------------------------------------------------
/src/biomcp/diseases/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """Disease information tools for BioMCP."""
 2 | 
 3 | from .getter import get_disease
 4 | from .search import search_diseases, get_disease_by_id, search_diseases_with_or
 5 | 
 6 | __all__ = [
 7 |     "get_disease",
 8 |     "get_disease_by_id",
 9 |     "search_diseases",
10 |     "search_diseases_with_or",
11 | ]
12 | 
```

--------------------------------------------------------------------------------
/src/biomcp/interventions/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """Interventions module for NCI Clinical Trials API integration."""
 2 | 
 3 | from .getter import get_intervention
 4 | from .search import search_interventions, search_interventions_with_or
 5 | 
 6 | __all__ = [
 7 |     "get_intervention",
 8 |     "search_interventions",
 9 |     "search_interventions_with_or",
10 | ]
11 | 
```

--------------------------------------------------------------------------------
/src/biomcp/organizations/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """Organizations module for NCI Clinical Trials API integration."""
 2 | 
 3 | from .getter import get_organization
 4 | from .search import search_organizations, search_organizations_with_or
 5 | 
 6 | __all__ = [
 7 |     "get_organization",
 8 |     "search_organizations",
 9 |     "search_organizations_with_or",
10 | ]
11 | 
```

--------------------------------------------------------------------------------
/docs/robots.txt:
--------------------------------------------------------------------------------

```
 1 | # Robots.txt for BioMCP Documentation
 2 | # https://biomcp.org/
 3 | 
 4 | User-agent: *
 5 | Allow: /
 6 | 
 7 | # Sitemap location
 8 | Sitemap: https://biomcp.org/sitemap.xml
 9 | 
10 | # Rate limiting for crawlers
11 | Crawl-delay: 1
12 | 
13 | # Block access to build artifacts
14 | Disallow: /site/
15 | Disallow: /.git/
16 | Disallow: /node_modules/
17 | 
```

--------------------------------------------------------------------------------
/tests/data/pubtator/pubtator_autocomplete.json:
--------------------------------------------------------------------------------

```json
 1 | [
 2 |   {
 3 |     "_id": "@GENE_BRAF",
 4 |     "biotype": "gene",
 5 |     "name": "BRAF",
 6 |     "description": "All Species",
 7 |     "match": "Matched on name <m>BRAF</m>"
 8 |   },
 9 |   {
10 |     "_id": "@GENE_BRAFP1",
11 |     "biotype": "gene",
12 |     "name": "BRAFP1",
13 |     "description": "All Species",
14 |     "match": "Matched on name <m>BRAFP1</m>"
15 |   }
16 | ]
17 | 
```

--------------------------------------------------------------------------------
/src/biomcp/biomarkers/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """Biomarkers module for NCI Clinical Trials API integration.
 2 | 
 3 | Note: CTRP documentation indicates biomarker data may have limited public availability.
 4 | This module focuses on trial eligibility biomarkers.
 5 | """
 6 | 
 7 | from .search import search_biomarkers, search_biomarkers_with_or
 8 | 
 9 | __all__ = ["search_biomarkers", "search_biomarkers_with_or"]
10 | 
```

--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------

```
 1 | [tox]
 2 | skipsdist = true
 3 | envlist = py311, py312, py313
 4 | 
 5 | [gh-actions]
 6 | python =
 7 |     3.11: py311
 8 |     3.12: py312
 9 |     3.13: py313
10 | 
11 | [testenv]
12 | passenv = PYTHON_VERSION
13 | allowlist_externals = uv
14 | commands =
15 |     uv sync --python {envpython}
16 |     uv run python -m pytest --doctest-modules tests --cov --cov-config=pyproject.toml --cov-report=xml
17 |     mypy
18 | 
```

--------------------------------------------------------------------------------
/src/biomcp/__main__.py:
--------------------------------------------------------------------------------

```python
 1 | import sys
 2 | 
 3 | from dotenv import load_dotenv
 4 | 
 5 | from .cli import app
 6 | 
 7 | # Load environment variables from .env file
 8 | load_dotenv()
 9 | 
10 | 
11 | def main():
12 |     try:
13 |         app(standalone_mode=True)
14 |     except SystemExit as e:
15 |         sys.exit(e.code)
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     main()
20 | 
21 | # Make main() the callable when importing __main__
22 | __call__ = main
23 | 
```

--------------------------------------------------------------------------------
/.github/workflows/validate-codecov-config.yml:
--------------------------------------------------------------------------------

```yaml
 1 | name: validate-codecov-config
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths: [codecov.yaml]
 6 |   push:
 7 |     branches: [main]
 8 | 
 9 | jobs:
10 |   validate-codecov-config:
11 |     runs-on: ubuntu-22.04
12 |     steps:
13 |       - uses: actions/checkout@v5
14 |       - name: Validate codecov configuration
15 |         run: curl -sSL --fail-with-body --data-binary @codecov.yaml https://codecov.io/validate
16 | 
```

--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------

```yaml
 1 | services:
 2 |   biomcp-server:
 3 |     platform: linux/amd64
 4 |     build: .
 5 |     image: us.gcr.io/graceful-medley-134315/biomcp-server:${TAG}
 6 |     container_name: biomcp-server
 7 |     ports:
 8 |       - "8000:8000"
 9 |     environment:
10 |       - MCP_MODE=streamable_http # Can be 'stdio', 'worker', 'http', or 'streamable_http'
11 |       - ALPHAGENOME_API_KEY=${ALPHAGENOME_API_KEY:-}
12 |     restart: unless-stopped
13 | 
```

--------------------------------------------------------------------------------
/tests/tdd/variants/constants.py:
--------------------------------------------------------------------------------

```python
 1 | """Constants for variant tests."""
 2 | 
 3 | # API retry settings
 4 | API_RETRY_DELAY_SECONDS = 1.0
 5 | MAX_RETRY_ATTEMPTS = 2
 6 | 
 7 | # Test data settings
 8 | DEFAULT_MAX_STUDIES = 10  # Number of studies to query in integration tests
 9 | STRUCTURE_CHECK_LIMIT = (
10 |     3  # Number of items to check when verifying data structures
11 | )
12 | 
13 | # Timeout settings
14 | INTEGRATION_TEST_TIMEOUT = 30.0  # Maximum time for integration tests
15 | 
```

--------------------------------------------------------------------------------
/src/biomcp/resources/getter.py:
--------------------------------------------------------------------------------

```python
 1 | from pathlib import Path
 2 | 
 3 | from .. import mcp_app
 4 | 
 5 | RESOURCES_ROOT = Path(__file__).parent
 6 | 
 7 | 
 8 | @mcp_app.resource("biomcp://instructions.md")
 9 | def get_instructions() -> str:
10 |     return (RESOURCES_ROOT / "instructions.md").read_text(encoding="utf-8")
11 | 
12 | 
13 | @mcp_app.resource("biomcp://researcher.md")
14 | def get_researcher() -> str:
15 |     return (RESOURCES_ROOT / "researcher.md").read_text(encoding="utf-8")
16 | 
```

--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------

```yaml
 1 | version: 2
 2 | updates:
 3 |   # Maintain dependencies for GitHub Actions
 4 |   - package-ecosystem: "github-actions"
 5 |     directory: "/"
 6 |     schedule:
 7 |       interval: "weekly"
 8 |     commit-message:
 9 |       prefix: "chore"
10 |       include: "scope"
11 | 
12 |   # Maintain dependencies for Python
13 |   - package-ecosystem: "pip"
14 |     directory: "/"
15 |     schedule:
16 |       interval: "weekly"
17 |     open-pull-requests-limit: 10
18 |     commit-message:
19 |       prefix: "chore"
20 |       include: "scope"
21 |     allow:
22 |       - dependency-type: "all"
23 | 
```

--------------------------------------------------------------------------------
/tests/tdd/conftest.py:
--------------------------------------------------------------------------------

```python
 1 | from pathlib import Path
 2 | 
 3 | from pytest import fixture
 4 | 
 5 | from biomcp import http_client
 6 | 
 7 | 
 8 | @fixture
 9 | def anyio_backend():
10 |     return "asyncio"
11 | 
12 | 
13 | class DummyCache:
14 |     def __init__(self):
15 |         self.store = {}
16 | 
17 |     def set(self, key, value, expire=None):
18 |         self.store[key] = value
19 | 
20 |     def get(self, key, default=None):
21 |         return self.store.get(key, default)
22 | 
23 |     @property
24 |     def count(self):
25 |         return len(self.store)
26 | 
27 |     def close(self):
28 |         self.store.clear()
29 | 
30 | 
31 | @fixture
32 | def http_cache():
33 |     cache = DummyCache()
34 |     http_client._cache = cache
35 |     yield cache
36 |     cache.close()
37 | 
38 | 
39 | @fixture
40 | def data_dir():
41 |     return Path(__file__).parent.parent / "data"
42 | 
```

--------------------------------------------------------------------------------
/.github/actions/setup-python-env/action.yml:
--------------------------------------------------------------------------------

```yaml
 1 | name: "Setup Python Environment"
 2 | description: "Set up Python environment for the given Python version"
 3 | 
 4 | inputs:
 5 |   python-version:
 6 |     description: "Python version to use"
 7 |     required: true
 8 |     default: "3.12"
 9 |   uv-version:
10 |     description: "uv version to use"
11 |     required: true
12 |     default: "0.5.20"
13 | 
14 | runs:
15 |   using: "composite"
16 |   steps:
17 |     - uses: actions/setup-python@v5
18 |       with:
19 |         python-version: ${{ inputs.python-version }}
20 | 
21 |     - name: Install uv
22 |       uses: astral-sh/setup-uv@v2
23 |       with:
24 |         version: ${{ inputs.uv-version }}
25 |         enable-cache: "true"
26 |         cache-suffix: ${{ matrix.python-version }}
27 | 
28 |     - name: Install Python dependencies
29 |       run: uv sync --frozen
30 |       shell: bash
31 | 
```

--------------------------------------------------------------------------------
/src/biomcp/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | from .core import ensure_list, logger, mcp_app, StrEnum
 2 | 
 3 | from . import constants
 4 | from . import http_client
 5 | from . import render
 6 | from . import articles
 7 | from . import trials
 8 | from . import variants
 9 | from . import resources
10 | from . import thinking
11 | from . import query_parser
12 | from . import query_router
13 | from . import router
14 | from . import thinking_tool
15 | from . import individual_tools
16 | from . import cbioportal_helper
17 | 
18 | 
19 | __all__ = [
20 |     "StrEnum",
21 |     "articles",
22 |     "cbioportal_helper",
23 |     "constants",
24 |     "ensure_list",
25 |     "http_client",
26 |     "individual_tools",
27 |     "logger",
28 |     "mcp_app",
29 |     "query_parser",
30 |     "query_router",
31 |     "render",
32 |     "resources",
33 |     "router",
34 |     "thinking",
35 |     "thinking_tool",
36 |     "trials",
37 |     "variants",
38 | ]
39 | 
```

--------------------------------------------------------------------------------
/docs/developer-guides/generate_endpoints.py:
--------------------------------------------------------------------------------

```python
 1 | #!/usr/bin/env python3
 2 | """
 3 | Generate third-party endpoints documentation from the endpoint registry.
 4 | 
 5 | This script reads the endpoint registry and generates a markdown file
 6 | documenting all third-party API endpoints used by BioMCP.
 7 | """
 8 | 
 9 | import sys
10 | from pathlib import Path
11 | 
12 | # Add src to Python path
13 | sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
14 | 
15 | from biomcp.utils.endpoint_registry import EndpointRegistry
16 | 
17 | 
18 | def main():
19 |     """Generate endpoints documentation."""
20 |     # Initialize registry
21 |     registry = EndpointRegistry()
22 | 
23 |     # Generate markdown report
24 |     markdown_content = registry.generate_markdown_report()
25 | 
26 |     # Write to file
27 |     output_path = Path(__file__).parent / "03-third-party-endpoints.md"
28 |     output_path.write_text(markdown_content)
29 | 
30 |     print(f"Generated endpoints documentation: {output_path}")
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     main()
35 | 
```

--------------------------------------------------------------------------------
/tests/tdd/articles/test_fetch.py:
--------------------------------------------------------------------------------

```python
 1 | import json
 2 | 
 3 | from biomcp.articles.fetch import fetch_articles
 4 | 
 5 | pmids = [39293516, 34397683, 37296959]
 6 | 
 7 | 
 8 | async def test_fetch_full_text(anyio_backend):
 9 |     results = await fetch_articles(pmids, full=True, output_json=True)
10 |     assert isinstance(results, str)
11 |     data = json.loads(results)
12 |     assert len(data) == 3
13 |     for item in data:
14 |         assert item["pmid"] in pmids
15 |         assert len(item["title"]) > 10
16 |         assert len(item["abstract"]) > 100
17 |         assert item["full_text"] is not None
18 | 
19 | 
20 | async def test_fetch_abstracts(anyio_backend):
21 |     results = await fetch_articles(pmids, full=False, output_json=True)
22 |     assert isinstance(results, str)
23 |     data = json.loads(results)
24 |     assert len(data) == 3
25 |     for item in data:
26 |         assert item["pmid"] in pmids
27 |         assert len(item["title"]) > 10
28 |         assert len(item["abstract"]) > 100
29 |         assert "full_text" not in item
30 | 
```

--------------------------------------------------------------------------------
/src/biomcp/openfda/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | OpenFDA integration for BioMCP.
 3 | 
 4 | Provides access to FDA drug labels, adverse events, device data,
 5 | drug approvals, recalls, and shortage information.
 6 | """
 7 | 
 8 | from .adverse_events import (
 9 |     search_adverse_events,
10 |     get_adverse_event,
11 | )
12 | from .drug_labels import (
13 |     search_drug_labels,
14 |     get_drug_label,
15 | )
16 | from .device_events import (
17 |     search_device_events,
18 |     get_device_event,
19 | )
20 | from .drug_approvals import (
21 |     search_drug_approvals,
22 |     get_drug_approval,
23 | )
24 | from .drug_recalls import (
25 |     search_drug_recalls,
26 |     get_drug_recall,
27 | )
28 | from .drug_shortages import (
29 |     search_drug_shortages,
30 |     get_drug_shortage,
31 | )
32 | 
33 | __all__ = [
34 |     "get_adverse_event",
35 |     "get_device_event",
36 |     "get_drug_approval",
37 |     "get_drug_label",
38 |     "get_drug_recall",
39 |     "get_drug_shortage",
40 |     "search_adverse_events",
41 |     "search_device_events",
42 |     "search_drug_approvals",
43 |     "search_drug_labels",
44 |     "search_drug_recalls",
45 |     "search_drug_shortages",
46 | ]
47 | 
```

--------------------------------------------------------------------------------
/.github/workflows/deploy-docs.yml:
--------------------------------------------------------------------------------

```yaml
 1 | name: Deploy Documentation
 2 | 
 3 | on:
 4 |   # Allows you to manually trigger this workflow from the Actions tab
 5 |   workflow_dispatch:
 6 | 
 7 |   # Automatically trigger on pushes to main IF docs changed
 8 |   push:
 9 |     branches:
10 |       - main
11 |     paths:
12 |       - "docs/**"
13 |       - "mkdocs.yml"
14 |       - ".github/workflows/deploy-docs.yml"
15 | 
16 | jobs:
17 |   deploy:
18 |     runs-on: ubuntu-latest
19 |     permissions:
20 |       contents: write
21 |     steps:
22 |       - name: Check out code
23 |         uses: actions/checkout@v5
24 |         with:
25 |           fetch-depth: 0
26 | 
27 |       - name: Set up Python environment
28 |         uses: ./.github/actions/setup-python-env
29 |         with:
30 |           python-version: "3.11"
31 |           uv-version: "0.5.20"
32 | 
33 |       - name: Configure Git User
34 |         run: |
35 |           git config user.name "github-actions[bot]"
36 |           git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
37 | 
38 |       - name: Deploy documentation using MkDocs
39 |         run: |
40 |           uv run mkdocs gh-deploy --force
41 | 
```

--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
 2 | FROM python:3.11-slim
 3 | 
 4 | # set work directory
 5 | WORKDIR /app
 6 | 
 7 | # Install build dependencies and git (needed for AlphaGenome)
 8 | RUN apt-get update && apt-get install -y --no-install-recommends gcc build-essential git && rm -rf /var/lib/apt/lists/*
 9 | 
10 | # Copy requirements (pyproject.toml, etc.)
11 | COPY pyproject.toml .
12 | COPY README.md .
13 | COPY LICENSE .
14 | 
15 | # Copy source code
16 | COPY src ./src
17 | COPY tests ./tests
18 | COPY Makefile .
19 | COPY tox.ini .
20 | 
21 | # Install the package with worker dependencies
22 | RUN pip install --upgrade pip && pip install .[worker]
23 | 
24 | # Clone and install AlphaGenome
25 | RUN git clone https://github.com/google-deepmind/alphagenome.git /tmp/alphagenome && \
26 |     pip install /tmp/alphagenome && \
27 |     rm -rf /tmp/alphagenome
28 | 
29 | # Expose port for remote MCP connections
30 | EXPOSE 8000
31 | 
32 | # Set default mode to worker, but allow it to be overridden
33 | ENV MCP_MODE=stdio
34 | 
35 | # Run the MCP server with configurable mode
36 | CMD ["sh", "-c", "biomcp run --mode ${MCP_MODE}"]
37 | 
```

--------------------------------------------------------------------------------
/src/biomcp/thinking_tracker.py:
--------------------------------------------------------------------------------

```python
 1 | """Track thinking tool usage within MCP sessions.
 2 | 
 3 | This module provides a simple mechanism to track whether the think tool
 4 | has been used in the current session, encouraging AI clients to follow
 5 | best practices.
 6 | """
 7 | 
 8 | from contextvars import ContextVar
 9 | 
10 | # Track if thinking has been used in current context
11 | thinking_used: ContextVar[bool] = ContextVar("thinking_used", default=False)
12 | 
13 | 
14 | def mark_thinking_used() -> None:
15 |     """Mark that the thinking tool has been used."""
16 |     thinking_used.set(True)
17 | 
18 | 
19 | def has_thinking_been_used() -> bool:
20 |     """Check if thinking tool has been used in current context."""
21 |     return thinking_used.get()
22 | 
23 | 
24 | def reset_thinking_tracker() -> None:
25 |     """Reset the thinking tracker (for testing)."""
26 |     thinking_used.set(False)
27 | 
28 | 
29 | def get_thinking_reminder() -> str:
30 |     """Get a reminder message if thinking hasn't been used."""
31 |     if not has_thinking_been_used():
32 |         return (
33 |             "\n\n⚠️ **REMINDER**: You haven't used the 'think' tool yet! "
34 |             "For optimal results, please use 'think' BEFORE searching to plan "
35 |             "your research strategy and ensure comprehensive analysis."
36 |         )
37 |     return ""
38 | 
```

--------------------------------------------------------------------------------
/tests/bdd/cli_help/test_help.py:
--------------------------------------------------------------------------------

```python
 1 | import shlex
 2 | 
 3 | from pytest_bdd import given, parsers, scenarios, then
 4 | from typer.testing import CliRunner
 5 | 
 6 | from biomcp.cli.main import app
 7 | 
 8 | # Link to the feature file
 9 | scenarios("help.feature")
10 | 
11 | runner = CliRunner()
12 | 
13 | 
14 | @given(parsers.parse('I run "{command}"'), target_fixture="cli_result")
15 | def cli_result(command):
16 |     """
17 |     Run the given CLI command and return the result.
18 |     """
19 |     # Remove the initial token ("biomcp") if present
20 |     args = (
21 |         shlex.split(command)[1:]
22 |         if command.startswith("biomcp")
23 |         else shlex.split(command)
24 |     )
25 |     result = runner.invoke(app, args)
26 |     assert result.exit_code == 0, f"CLI command failed: {result.stderr}"
27 |     return result
28 | 
29 | 
30 | @then(parsers.parse('the output should contain "{expected}"'))
31 | def output_should_contain(cli_result, expected):
32 |     """
33 |     Verify that the output contains the expected text.
34 |     This helper handles both plain text and rich-formatted text outputs.
35 |     """
36 |     # Check if the expected text is in the output, ignoring case
37 |     assert (
38 |         expected.lower() in cli_result.stdout.lower()
39 |     ), f"Expected output to contain '{expected}', but it did not.\nActual output: {cli_result.stdout}"
40 | 
```

--------------------------------------------------------------------------------
/tests/tdd/articles/test_autocomplete.py:
--------------------------------------------------------------------------------

```python
 1 | from biomcp.articles.autocomplete import Entity, EntityRequest, autocomplete
 2 | 
 3 | 
 4 | async def test_autocomplete(anyio_backend, http_cache):
 5 |     # new cache for each call
 6 |     assert http_cache.count == 0
 7 | 
 8 |     # gene (compare using entity_id directly)
 9 |     request = EntityRequest(concept="gene", query="her2")
10 |     entity = await autocomplete(request=request)
11 |     assert entity.entity_id == "@GENE_ERBB2"
12 | 
13 |     # variant
14 |     request = EntityRequest(concept="variant", query="BRAF V600E")
15 |     assert await autocomplete(request=request) == Entity(
16 |         _id="@VARIANT_p.V600E_BRAF_human",
17 |         biotype="variant",
18 |         name="p.V600E",
19 |     )
20 | 
21 |     # disease
22 |     request = EntityRequest(concept="disease", query="lung adenocarcinoma")
23 |     assert await autocomplete(request=request) == Entity(
24 |         _id="@DISEASE_Adenocarcinoma_of_Lung",
25 |         biotype="disease",
26 |         name="Adenocarcinoma of Lung",
27 |         match="Multiple matches",
28 |     )
29 | 
30 |     assert http_cache.count == 3
31 | 
32 |     # duplicate request uses the cached response
33 |     request = EntityRequest(concept="gene", query="her2")
34 |     entity = await autocomplete(request=request)
35 |     assert entity.entity_id == "@GENE_ERBB2"
36 |     assert http_cache.count == 3
37 | 
```

--------------------------------------------------------------------------------
/scripts/generate_endpoints_doc.py:
--------------------------------------------------------------------------------

```python
 1 | #!/usr/bin/env python3
 2 | """Generate THIRD_PARTY_ENDPOINTS.md documentation."""
 3 | 
 4 | import shutil
 5 | import subprocess
 6 | import sys
 7 | from pathlib import Path
 8 | 
 9 | # Add src to path
10 | sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
11 | 
12 | from biomcp.utils.endpoint_registry import get_registry
13 | 
14 | 
15 | def main():
16 |     """Generate the endpoints documentation."""
17 |     registry = get_registry()
18 |     output_path = Path(__file__).parent.parent / "THIRD_PARTY_ENDPOINTS.md"
19 | 
20 |     # Generate new content
21 |     new_content = registry.generate_markdown_report()
22 | 
23 |     # Write new content
24 |     output_path.write_text(new_content)
25 | 
26 |     # Run prettier to format the file
27 |     npx_path = shutil.which("npx")
28 |     if npx_path:
29 |         try:
30 |             # Safe: npx_path from shutil.which, output_path is controlled
31 |             subprocess.run(  # noqa: S603
32 |                 [npx_path, "prettier", "--write", str(output_path)],
33 |                 check=True,
34 |                 capture_output=True,
35 |                 text=True,
36 |             )
37 |         except subprocess.CalledProcessError as e:
38 |             print(f"Warning: prettier formatting failed: {e.stderr}")
39 |     else:
40 |         print("Warning: npx not found, skipping prettier formatting")
41 | 
42 |     print(f"Generated {output_path}")
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     main()
47 | 
```

--------------------------------------------------------------------------------
/tests/data/openfda/drugsfda_search.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "meta": {
 3 |     "results": {
 4 |       "skip": 0,
 5 |       "limit": 10,
 6 |       "total": 25
 7 |     }
 8 |   },
 9 |   "results": [
10 |     {
11 |       "application_number": "BLA125514",
12 |       "sponsor_name": "MERCK SHARP DOHME",
13 |       "openfda": {
14 |         "application_number": ["BLA125514"],
15 |         "brand_name": ["KEYTRUDA"],
16 |         "generic_name": ["PEMBROLIZUMAB"],
17 |         "manufacturer_name": ["Merck Sharp & Dohme Corp."],
18 |         "substance_name": ["PEMBROLIZUMAB"]
19 |       },
20 |       "products": [
21 |         {
22 |           "product_number": "001",
23 |           "reference_drug": "Yes",
24 |           "brand_name": "KEYTRUDA",
25 |           "active_ingredients": [
26 |             {
27 |               "name": "PEMBROLIZUMAB",
28 |               "strength": "100MG/4ML"
29 |             }
30 |           ],
31 |           "reference_standard": "Yes",
32 |           "dosage_form": "INJECTION, SOLUTION",
33 |           "route": "INTRAVENOUS",
34 |           "marketing_status": "Prescription"
35 |         }
36 |       ],
37 |       "submissions": [
38 |         {
39 |           "submission_type": "BLA",
40 |           "submission_number": "125514",
41 |           "submission_status": "AP",
42 |           "submission_status_date": "20140904",
43 |           "submission_class_code": "BLA",
44 |           "submission_class_code_description": "Biologic License Application"
45 |         }
46 |       ]
47 |     }
48 |   ]
49 | }
50 | 
```

--------------------------------------------------------------------------------
/tests/tdd/variants/test_filters.py:
--------------------------------------------------------------------------------

```python
 1 | """Tests for the filters module."""
 2 | 
 3 | import json
 4 | import os
 5 | from typing import Any
 6 | 
 7 | import pytest
 8 | 
 9 | from biomcp.variants.filters import filter_variants
10 | 
11 | 
12 | @pytest.fixture
13 | def braf_v600e_variants() -> list[dict[str, Any]]:
14 |     """Load BRAF V600E test data."""
15 |     test_data_path = os.path.join(
16 |         os.path.dirname(__file__),
17 |         "../../data/myvariant/variants_full_braf_v600e.json",
18 |     )
19 |     with open(test_data_path) as f:
20 |         data = json.load(f)
21 |         return data.get("hits", [])
22 | 
23 | 
24 | def test_filter_variants_civic_contributors(braf_v600e_variants):
25 |     """Test filtering out civic.contributors path."""
26 |     # Verify that civic.contributors exists in original data
27 |     variant = braf_v600e_variants[0]
28 |     assert "civic" in variant
29 |     assert "contributors" in variant["civic"]
30 |     assert variant["civic"]["contributors"] is not None
31 | 
32 |     # Filter out civic.contributors
33 |     filtered = filter_variants(braf_v600e_variants)
34 | 
35 |     # Verify civic.contributors is removed but civic section remains
36 |     filtered_variant = filtered[0]
37 |     assert "civic" in filtered_variant
38 |     assert "contributors" not in filtered_variant["civic"]
39 | 
40 |     # Verify other civic data is preserved
41 |     assert "id" in filtered_variant["civic"]
42 |     assert filtered_variant["civic"]["id"] == variant["civic"]["id"]
43 | 
```

--------------------------------------------------------------------------------
/tests/bdd/search_articles/test_autocomplete.py:
--------------------------------------------------------------------------------

```python
 1 | import asyncio
 2 | 
 3 | from pytest_bdd import given, parsers, scenarios, then, when
 4 | 
 5 | from biomcp.articles.autocomplete import (
 6 |     Concept,
 7 |     Entity,
 8 |     EntityRequest,
 9 |     autocomplete,
10 | )
11 | 
12 | scenarios("autocomplete.feature")
13 | 
14 | 
15 | @given(
16 |     parsers.parse(
17 |         'I have a valid concept "{concept}" and a valid query "{query}"',
18 |     ),
19 |     target_fixture="entity_request",
20 | )
21 | def entity_request(concept: Concept, query: str):
22 |     return EntityRequest(concept=concept, query=query)
23 | 
24 | 
25 | @given(
26 |     parsers.parse(
27 |         'I have a valid concept "{concept}" and an invalid query "{query}"',
28 |     ),
29 |     target_fixture="entity_request",
30 | )
31 | def invalid_query_request(concept: Concept, query: str):
32 |     return EntityRequest(concept=concept, query=query)
33 | 
34 | 
35 | @when(
36 |     "I call the Pubtator Autocomplete API",
37 |     target_fixture="entity",
38 | )
39 | def entity(entity_request) -> Entity | None:
40 |     return asyncio.run(autocomplete(request=entity_request))
41 | 
42 | 
43 | @then(parsers.parse('the response entity_id should be "{expected_id}"'))
44 | def check_entity_id(entity, expected_id):
45 |     assert entity.entity_id == expected_id
46 | 
47 | 
48 | @then(parsers.parse('the response concept should be "{concept}"'))
49 | def check_concept(entity, concept):
50 |     assert entity.concept == concept
51 | 
52 | 
53 | @then("the response should be empty")
54 | def check_empty_response(entity):
55 |     assert entity is None
56 | 
```

--------------------------------------------------------------------------------
/src/biomcp/utils/gene_validator.py:
--------------------------------------------------------------------------------

```python
 1 | """Gene symbol validation utilities."""
 2 | 
 3 | import re
 4 | 
 5 | # Common gene symbol patterns
 6 | GENE_SYMBOL_PATTERN = re.compile(r"^[A-Z][A-Z0-9-]*(\.[0-9]+)?$")
 7 | 
 8 | # Known problematic or invalid gene symbols
 9 | INVALID_GENES = {
10 |     "INVALID",
11 |     "UNKNOWN",
12 |     "NULL",
13 |     "NONE",
14 |     "TEST",
15 |     "INVALID_GENE_XYZ",
16 | }
17 | 
18 | 
19 | def is_valid_gene_symbol(gene: str | None) -> bool:
20 |     """Validate if a string is a valid gene symbol.
21 | 
22 |     Args:
23 |         gene: The gene symbol to validate
24 | 
25 |     Returns:
26 |         True if the gene symbol appears valid, False otherwise
27 | 
28 |     Notes:
29 |         - Gene symbols should start with a letter
30 |         - Can contain letters, numbers, and hyphens
31 |         - May have a version suffix (e.g., .1, .2)
32 |         - Should be uppercase
33 |         - Should not be in the invalid genes list
34 |     """
35 |     if not gene:
36 |         return False
37 | 
38 |     gene = gene.strip()
39 | 
40 |     # Check length constraints
41 |     if len(gene) < 2 or len(gene) > 20:
42 |         return False
43 | 
44 |     # Check against known invalid genes
45 |     if gene.upper() in INVALID_GENES:
46 |         return False
47 | 
48 |     # Check pattern
49 |     return bool(GENE_SYMBOL_PATTERN.match(gene))
50 | 
51 | 
52 | def sanitize_gene_symbol(gene: str) -> str:
53 |     """Sanitize a gene symbol for API calls.
54 | 
55 |     Args:
56 |         gene: The gene symbol to sanitize
57 | 
58 |     Returns:
59 |         Sanitized gene symbol in uppercase with whitespace stripped
60 |     """
61 |     return gene.strip().upper()
62 | 
```

--------------------------------------------------------------------------------
/tests/bdd/search_articles/test_search.py:
--------------------------------------------------------------------------------

```python
 1 | """Test steps for search_pubmed feature."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import asyncio
 6 | import json
 7 | 
 8 | from pytest_bdd import given, parsers, scenarios, then, when
 9 | 
10 | from biomcp.articles.search import (
11 |     PubmedRequest,
12 |     search_articles,
13 | )
14 | 
15 | scenarios("search.feature")
16 | 
17 | 
18 | @given(
19 |     parsers.parse('I build a query for "{gene}" "{disease}" "{variant}"'),
20 |     target_fixture="query",
21 | )
22 | def query(gene, disease, variant) -> PubmedRequest:
23 |     return PubmedRequest(
24 |         genes=[gene],
25 |         diseases=[disease],
26 |         variants=[variant],
27 |     )
28 | 
29 | 
30 | @when("I perform a search with that query", target_fixture="result")
31 | def result(query) -> list[dict]:
32 |     text = asyncio.run(search_articles(query, output_json=True))
33 |     return json.loads(text)
34 | 
35 | 
36 | @then(parsers.parse('the response should contain the article "{pmid:d}"'))
37 | def step_impl(result: list[dict], pmid: int):
38 |     pm_ids = [article["pmid"] for article in result]
39 |     assert pmid in pm_ids, "pmid not found in {pm_ids}"
40 | 
41 | 
42 | @then(
43 |     parsers.parse('the article "{pmid:d}" abstract should contain "{phrase}"'),
44 | )
45 | def step_check_abstract(result: list[dict], pmid: int, phrase: str):
46 |     for r in result:
47 |         if r["pmid"] == pmid and r.get("abstract"):
48 |             assert (
49 |                 phrase in r["abstract"]
50 |             ), f"Phrase '{phrase}' not found in article {pmid}'s abstract"
51 |             return
52 |     raise AssertionError(f"Article {pmid} not found or has no abstract")
53 | 
```

--------------------------------------------------------------------------------
/src/biomcp/workers/worker.py:
--------------------------------------------------------------------------------

```python
 1 | """Worker implementation for BioMCP."""
 2 | 
 3 | from fastapi import FastAPI, Response
 4 | from fastapi.middleware.cors import CORSMiddleware
 5 | from starlette.responses import JSONResponse
 6 | from starlette.routing import Route
 7 | 
 8 | from .. import mcp_app
 9 | 
10 | app = FastAPI(title="BioMCP Worker", version="0.1.10")
11 | 
12 | # Add CORS middleware
13 | app.add_middleware(
14 |     CORSMiddleware,
15 |     allow_origins=["*"],
16 |     allow_credentials=True,
17 |     allow_methods=["*"],
18 |     allow_headers=["*"],
19 | )
20 | 
21 | streamable_app = mcp_app.streamable_http_app()
22 | 
23 | 
24 | # Add health endpoint to the streamable app before mounting
25 | async def health_check(request):
26 |     return JSONResponse({"status": "healthy"})
27 | 
28 | 
29 | health_route = Route("/health", health_check, methods=["GET"])
30 | streamable_app.routes.append(health_route)
31 | 
32 | app.mount("/", streamable_app)
33 | 
34 | 
35 | # Health endpoint is now added directly to the streamable_app above
36 | 
37 | 
38 | # Add OPTIONS endpoint for CORS preflight
39 | @app.options("/{path:path}")
40 | async def options_handler(path: str):
41 |     """Handle CORS preflight requests."""
42 |     return Response(
43 |         content="",
44 |         status_code=204,
45 |         headers={
46 |             "Access-Control-Allow-Origin": "*",
47 |             "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
48 |             "Access-Control-Allow-Headers": "*",
49 |             "Access-Control-Max-Age": "86400",  # 24 hours
50 |         },
51 |     )
52 | 
53 | 
54 | # Create a stub for create_worker_app to satisfy imports
55 | def create_worker_app() -> FastAPI:
56 |     """Stub for create_worker_app to satisfy import in __init__.py."""
57 |     return app
58 | 
```

--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------

```python
 1 | """Pytest configuration and fixtures."""
 2 | 
 3 | import os
 4 | from unittest.mock import AsyncMock, patch
 5 | 
 6 | import pytest
 7 | 
 8 | # Check if we should skip integration tests
 9 | SKIP_INTEGRATION = os.environ.get("SKIP_INTEGRATION_TESTS", "").lower() in (
10 |     "true",
11 |     "1",
12 |     "yes",
13 | )
14 | 
15 | 
16 | def pytest_configure(config):
17 |     """Configure pytest with custom markers."""
18 |     config.addinivalue_line(
19 |         "markers",
20 |         "integration: marks tests as integration tests (deselect with '-m \"not integration\"')",
21 |     )
22 | 
23 | 
24 | def pytest_collection_modifyitems(config, items):
25 |     """Modify test collection to handle integration tests."""
26 |     if SKIP_INTEGRATION:
27 |         skip_integration = pytest.mark.skip(
28 |             reason="Integration tests disabled via SKIP_INTEGRATION_TESTS env var"
29 |         )
30 |         for item in items:
31 |             if "integration" in item.keywords:
32 |                 item.add_marker(skip_integration)
33 | 
34 | 
35 | @pytest.fixture
36 | def mock_cbioportal_api():
37 |     """Mock cBioPortal API responses for testing."""
38 |     with patch(
39 |         "biomcp.variants.cbioportal_search.CBioPortalSearchClient.get_gene_search_summary"
40 |     ) as mock:
41 |         # Return a mock summary
42 |         mock.return_value = AsyncMock(
43 |             gene="BRAF",
44 |             total_mutations=1000,
45 |             total_samples_tested=2000,
46 |             mutation_frequency=50.0,
47 |             hotspots=[
48 |                 AsyncMock(amino_acid_change="V600E", count=800),
49 |                 AsyncMock(amino_acid_change="V600K", count=100),
50 |             ],
51 |             cancer_distribution=["Melanoma", "Colorectal Cancer"],
52 |             study_count=10,
53 |         )
54 |         yield mock
55 | 
```

--------------------------------------------------------------------------------
/tests/bdd/get_variants/test_get.py:
--------------------------------------------------------------------------------

```python
 1 | import json
 2 | import shlex
 3 | 
 4 | from pytest_bdd import given, parsers, scenarios, then
 5 | from typer.testing import CliRunner
 6 | 
 7 | from biomcp.cli.main import app
 8 | 
 9 | # Link to the feature file
10 | scenarios("get.feature")
11 | 
12 | runner = CliRunner()
13 | 
14 | 
15 | @given(parsers.parse('I run "{command}"'), target_fixture="cli_result")
16 | def cli_result(command):
17 |     """
18 |     Run the given CLI command and return the parsed JSON output.
19 |     The command is expected to include the '--json' flag.
20 |     """
21 |     args = shlex.split(command)[1:]  # remove the leading "biomcp" token
22 |     result = runner.invoke(app, args)
23 |     assert result.exit_code == 0, f"CLI command failed: {result.stderr}"
24 |     return json.loads(result.stdout)
25 | 
26 | 
27 | def get_field_value_from_variant(variant, field_path):
28 |     """
29 |     Retrieve a value from a variant dictionary using a simple dot-notation path.
30 |     (This version does not support array indexing.)
31 |     """
32 |     parts = field_path.split(".")
33 |     value = variant
34 |     for part in parts:
35 |         value = value.get(part)
36 |         if value is None:
37 |             break
38 |     return value
39 | 
40 | 
41 | @then(
42 |     parsers.parse(
43 |         'at least one variant should have field "{field}" equal to "{expected}"'
44 |     )
45 | )
46 | def variant_field_should_equal(cli_result, field, expected):
47 |     """
48 |     Verify that at least one variant in the returned list has the specified field equal to the expected value.
49 |     """
50 |     # cli_result is already a list of variant dicts.
51 |     matching = [
52 |         v
53 |         for v in cli_result
54 |         if str(get_field_value_from_variant(v, field)) == expected
55 |     ]
56 |     assert (
57 |         matching
58 |     ), f"No variant found with field '{field}' equal to '{expected}'"
59 | 
```

--------------------------------------------------------------------------------
/tests/bdd/get_trials/test_get.py:
--------------------------------------------------------------------------------

```python
 1 | import json
 2 | import shlex
 3 | 
 4 | from pytest_bdd import given, parsers, scenarios, then
 5 | from typer.testing import CliRunner
 6 | 
 7 | from biomcp.cli.main import app
 8 | 
 9 | # Link to the feature file
10 | scenarios("get.feature")
11 | 
12 | runner = CliRunner()
13 | 
14 | 
15 | @given(parsers.parse('I run "{command}"'), target_fixture="cli_result")
16 | def cli_result(command):
17 |     """
18 |     Run the given CLI command and return the parsed JSON output.
19 |     The command is expected to include the '--json' flag.
20 |     """
21 |     # Remove the initial token ("biomcp") if present.
22 |     args = shlex.split(command)[1:]
23 |     result = runner.invoke(app, args)
24 |     assert result.exit_code == 0, f"CLI command failed: {result.stderr}"
25 |     return json.loads(result.stdout)
26 | 
27 | 
28 | def get_field_value(data, field_path):
29 |     """
30 |     Access a nested dictionary value using a dot-notation path.
31 |     Supports array notation like "locations[0]".
32 |     """
33 |     parts = field_path.split(".")
34 |     value = data
35 |     for part in parts:
36 |         if "[" in part and part.endswith("]"):
37 |             # e.g. "locations[0]"
38 |             base, index_str = part[:-1].split("[")
39 |             index = int(index_str)
40 |             value = value[base][index]
41 |         else:
42 |             value = value[part]
43 |     return value
44 | 
45 | 
46 | @then(parsers.parse('the field "{field}" should equal "{expected}"'))
47 | def field_should_equal(cli_result, field, expected):
48 |     """
49 |     Verify that the value at the specified dot-notation field equals the expected value.
50 |     """
51 |     actual = get_field_value(cli_result, field)
52 |     # Compare as strings for simplicity.
53 |     assert (
54 |         str(actual) == expected
55 |     ), f"Expected field '{field}' to equal '{expected}', but got '{actual}'"
56 | 
```

--------------------------------------------------------------------------------
/tests/bdd/conftest.py:
--------------------------------------------------------------------------------

```python
 1 | import pytest
 2 | 
 3 | 
 4 | def _recursive_extract(current_value, key_path, path_index):
 5 |     """Recursively extract values based on the key path."""
 6 |     if path_index >= len(key_path):
 7 |         if isinstance(current_value, list):
 8 |             yield from current_value
 9 |         else:
10 |             yield current_value
11 | 
12 |     else:
13 |         k = key_path[path_index]
14 |         if isinstance(current_value, dict):
15 |             next_value = current_value.get(k)
16 |             if next_value is not None:
17 |                 yield from _recursive_extract(
18 |                     next_value,
19 |                     key_path,
20 |                     path_index + 1,
21 |                 )
22 | 
23 |         elif isinstance(current_value, list):
24 |             for item in current_value:
25 |                 if isinstance(item, dict):
26 |                     next_value = item.get(k)
27 |                     if next_value is not None:
28 |                         yield from _recursive_extract(
29 |                             next_value,
30 |                             key_path,
31 |                             path_index + 1,
32 |                         )
33 | 
34 | 
35 | def iter_value(field_map: dict, data: dict | list, key: str):
36 |     """Iterates through a nested structure, yielding all values encountered."""
37 |     if isinstance(data, dict):
38 |         # Handle new format with cBioPortal summary
39 |         hits = data["variants"] if "variants" in data else data.get("hits", [])
40 |     else:
41 |         hits = data
42 |     key_path = field_map.get(key, [key])
43 | 
44 |     # num = variant number for tracking each individual variant
45 |     for num, hit in enumerate(hits, 1):
46 |         for value in _recursive_extract(hit, key_path, 0):
47 |             yield num, value
48 | 
49 | 
50 | @pytest.fixture(scope="module")
51 | def it() -> callable:
52 |     return iter_value
53 | 
```

--------------------------------------------------------------------------------
/tests/data/openfda/enforcement_detail.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "meta": {
 3 |     "results": {
 4 |       "skip": 0,
 5 |       "limit": 1,
 6 |       "total": 1
 7 |     }
 8 |   },
 9 |   "results": [
10 |     {
11 |       "country": "United States",
12 |       "city": "Princeton",
13 |       "reason_for_recall": "Presence of N-Nitrosodimethylamine (NDMA) impurity above the acceptable daily intake limit",
14 |       "address_1": "One Merck Drive",
15 |       "address_2": "Building 5",
16 |       "product_quantity": "5,432 bottles",
17 |       "code_info": "Lot numbers: AB1234 (Exp 12/2024), CD5678 (Exp 01/2025), EF9012 (Exp 02/2025)",
18 |       "center_classification_date": "20230615",
19 |       "distribution_pattern": "Nationwide distribution to wholesalers and retail pharmacies in all 50 states",
20 |       "state": "NJ",
21 |       "product_description": "Valsartan Tablets USP, 160 mg, 90 count bottles, NDC 0378-5160-90",
22 |       "report_date": "20230622",
23 |       "classification": "Class II",
24 |       "openfda": {
25 |         "application_number": ["ANDA090802"],
26 |         "brand_name": ["VALSARTAN"],
27 |         "generic_name": ["VALSARTAN"],
28 |         "manufacturer_name": ["Mylan Pharmaceuticals Inc."],
29 |         "product_ndc": ["0378-5160"],
30 |         "package_ndc": ["0378-5160-90"],
31 |         "unii": ["80M03YXJ7I"],
32 |         "spl_set_id": ["4b5c5f6d-7e8f-9g0h-1i2j-3k4l5m6n7o8p"]
33 |       },
34 |       "more_code_info": "Manufacturing dates: January 2023 - March 2023",
35 |       "recalling_firm": "Mylan Pharmaceuticals Inc.",
36 |       "recall_number": "D-0001-2023",
37 |       "initial_firm_notification": "Letter",
38 |       "product_type": "Drugs",
39 |       "event_id": "91234",
40 |       "termination_date": "",
41 |       "recall_initiation_date": "20230610",
42 |       "postal_code": "08540-0004",
43 |       "voluntary_mandated": "Voluntary: Firm Initiated",
44 |       "status": "Ongoing"
45 |     }
46 |   ]
47 | }
48 | 
```

--------------------------------------------------------------------------------
/tests/data/openfda/enforcement_search.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "meta": {
 3 |     "results": {
 4 |       "skip": 0,
 5 |       "limit": 10,
 6 |       "total": 45
 7 |     }
 8 |   },
 9 |   "results": [
10 |     {
11 |       "country": "United States",
12 |       "city": "Princeton",
13 |       "reason_for_recall": "Presence of N-Nitrosodimethylamine (NDMA) impurity",
14 |       "address_1": "One Merck Drive",
15 |       "address_2": "",
16 |       "product_quantity": "5,432 bottles",
17 |       "code_info": "Lot numbers: AB1234, CD5678, EF9012",
18 |       "center_classification_date": "20230615",
19 |       "distribution_pattern": "Nationwide",
20 |       "state": "NJ",
21 |       "product_description": "Valsartan Tablets USP, 160 mg, 90 count bottles",
22 |       "report_date": "20230622",
23 |       "classification": "Class II",
24 |       "openfda": {
25 |         "application_number": ["ANDA090802"],
26 |         "brand_name": ["VALSARTAN"],
27 |         "generic_name": ["VALSARTAN"],
28 |         "manufacturer_name": ["Mylan Pharmaceuticals Inc."]
29 |       },
30 |       "recalling_firm": "Mylan Pharmaceuticals Inc.",
31 |       "recall_number": "D-0001-2023",
32 |       "initial_firm_notification": "Letter",
33 |       "product_type": "Drugs",
34 |       "event_id": "91234",
35 |       "recall_initiation_date": "20230610",
36 |       "postal_code": "08540",
37 |       "voluntary_mandated": "Voluntary: Firm Initiated",
38 |       "status": "Ongoing"
39 |     },
40 |     {
41 |       "country": "United States",
42 |       "city": "New York",
43 |       "reason_for_recall": "Contamination with foreign substance",
44 |       "product_quantity": "10,000 units",
45 |       "classification": "Class I",
46 |       "product_description": "Metformin Hydrochloride Extended-Release Tablets, 500 mg",
47 |       "report_date": "20230515",
48 |       "recalling_firm": "Generic Pharma Corp",
49 |       "recall_number": "D-0002-2023",
50 |       "recall_initiation_date": "20230510",
51 |       "status": "Completed"
52 |     }
53 |   ]
54 | }
55 | 
```

--------------------------------------------------------------------------------
/src/biomcp/logging_filter.py:
--------------------------------------------------------------------------------

```python
 1 | """Logging filter to suppress non-critical ASGI errors."""
 2 | 
 3 | import logging
 4 | 
 5 | 
 6 | class ASGIErrorFilter(logging.Filter):
 7 |     """Filter out non-critical ASGI/Starlette middleware errors."""
 8 | 
 9 |     def filter(self, record: logging.LogRecord) -> bool:
10 |         """Return False to suppress the log record, True to allow it."""
11 | 
12 |         # Check if this is an ASGI error we want to suppress
13 |         if record.levelname == "ERROR":
14 |             message = str(record.getMessage())
15 | 
16 |             # Suppress known non-critical ASGI errors
17 |             if "Exception in ASGI application" in message:
18 |                 return False
19 |             if "AssertionError" in message and "http.response.body" in message:
20 |                 return False
21 |             if (
22 |                 "unhandled errors in a TaskGroup" in message
23 |                 and hasattr(record, "exc_info")
24 |                 and record.exc_info
25 |             ):
26 |                 exc_type, exc_value, _ = record.exc_info
27 |                 if exc_type and "AssertionError" in str(exc_type):
28 |                     return False
29 | 
30 |         # Allow all other logs
31 |         return True
32 | 
33 | 
34 | def setup_logging_filters():
35 |     """Set up logging filters to suppress non-critical errors."""
36 | 
37 |     # Add filter to uvicorn error logger
38 |     uvicorn_logger = logging.getLogger("uvicorn.error")
39 |     uvicorn_logger.addFilter(ASGIErrorFilter())
40 | 
41 |     # Add filter to uvicorn access logger
42 |     uvicorn_access_logger = logging.getLogger("uvicorn.access")
43 |     uvicorn_access_logger.addFilter(ASGIErrorFilter())
44 | 
45 |     # Add filter to starlette logger
46 |     starlette_logger = logging.getLogger("starlette")
47 |     starlette_logger.addFilter(ASGIErrorFilter())
48 | 
49 |     # Add filter to fastapi logger
50 |     fastapi_logger = logging.getLogger("fastapi")
51 |     fastapi_logger.addFilter(ASGIErrorFilter())
52 | 
```

--------------------------------------------------------------------------------
/src/biomcp/openfda/drug_shortages_detail_helpers.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | Helper functions for formatting drug shortage details.
 3 | """
 4 | 
 5 | from typing import Any
 6 | 
 7 | 
 8 | def format_shortage_status(shortage: dict[str, Any]) -> list[str]:
 9 |     """Format status information for shortage detail."""
10 |     output = []
11 | 
12 |     status = shortage.get("status", "Unknown")
13 |     status_emoji = "🔴" if "current" in status.lower() else "🟢"
14 |     output.append(f"{status_emoji} **Status**: {status}")
15 | 
16 |     return output
17 | 
18 | 
19 | def format_shortage_names(shortage: dict[str, Any]) -> list[str]:
20 |     """Format drug names for shortage detail."""
21 |     output = []
22 | 
23 |     if generic := shortage.get("generic_name"):
24 |         output.append(f"**Generic Name**: {generic}")
25 | 
26 |     brands = shortage.get("brand_names")
27 |     if brands and brands[0]:
28 |         output.append(f"**Brand Names**: {', '.join(brands)}")
29 | 
30 |     return output
31 | 
32 | 
33 | def format_shortage_timeline(shortage: dict[str, Any]) -> list[str]:
34 |     """Format timeline information for shortage detail."""
35 |     output = ["### Timeline"]
36 | 
37 |     if start_date := shortage.get("shortage_start_date"):
38 |         output.append(f"**Shortage Started**: {start_date}")
39 | 
40 |     if resolution_date := shortage.get("resolution_date"):
41 |         output.append(f"**Resolved**: {resolution_date}")
42 |     elif estimated := shortage.get("estimated_resolution"):
43 |         output.append(f"**Estimated Resolution**: {estimated}")
44 |     else:
45 |         output.append("**Estimated Resolution**: Unknown")
46 | 
47 |     return output
48 | 
49 | 
50 | def format_shortage_details_section(shortage: dict[str, Any]) -> list[str]:
51 |     """Format details section for shortage detail."""
52 |     output = ["### Details"]
53 | 
54 |     if reason := shortage.get("reason"):
55 |         output.append(f"**Reason for Shortage**:\n{reason}")
56 | 
57 |     if notes := shortage.get("notes"):
58 |         from .utils import clean_text
59 | 
60 |         output.append(f"\n**Additional Notes**:\n{clean_text(notes)}")
61 | 
62 |     return output
63 | 
```

--------------------------------------------------------------------------------
/src/biomcp/openfda/exceptions.py:
--------------------------------------------------------------------------------

```python
 1 | """Custom exceptions for OpenFDA integration."""
 2 | 
 3 | 
 4 | class OpenFDAError(Exception):
 5 |     """Base exception for OpenFDA-related errors."""
 6 | 
 7 |     pass
 8 | 
 9 | 
10 | class OpenFDARateLimitError(OpenFDAError):
11 |     """Raised when FDA API rate limit is exceeded."""
12 | 
13 |     def __init__(self, message: str = "FDA API rate limit exceeded"):
14 |         super().__init__(message)
15 |         self.message = message
16 | 
17 | 
18 | class OpenFDAValidationError(OpenFDAError):
19 |     """Raised when FDA response validation fails."""
20 | 
21 |     def __init__(self, message: str = "Invalid FDA API response"):
22 |         super().__init__(message)
23 |         self.message = message
24 | 
25 | 
26 | class OpenFDAConnectionError(OpenFDAError):
27 |     """Raised when connection to FDA API fails."""
28 | 
29 |     def __init__(self, message: str = "Failed to connect to FDA API"):
30 |         super().__init__(message)
31 |         self.message = message
32 | 
33 | 
34 | class OpenFDANotFoundError(OpenFDAError):
35 |     """Raised when requested resource is not found."""
36 | 
37 |     def __init__(self, resource_type: str, resource_id: str):
38 |         message = f"{resource_type} not found: {resource_id}"
39 |         super().__init__(message)
40 |         self.resource_type = resource_type
41 |         self.resource_id = resource_id
42 |         self.message = message
43 | 
44 | 
45 | class OpenFDATimeoutError(OpenFDAError):
46 |     """Raised when FDA API request times out."""
47 | 
48 |     def __init__(self, message: str = "FDA API request timeout"):
49 |         super().__init__(message)
50 |         self.message = message
51 | 
52 | 
53 | class OpenFDAInvalidParameterError(OpenFDAError):
54 |     """Raised when invalid parameters are provided."""
55 | 
56 |     def __init__(self, parameter: str, value: str, reason: str):
57 |         message = (
58 |             f"Invalid parameter '{parameter}' with value '{value}': {reason}"
59 |         )
60 |         super().__init__(message)
61 |         self.parameter = parameter
62 |         self.value = value
63 |         self.reason = reason
64 |         self.message = message
65 | 
```

--------------------------------------------------------------------------------
/tests/bdd/fetch_articles/test_fetch.py:
--------------------------------------------------------------------------------

```python
 1 | import json
 2 | import shlex
 3 | 
 4 | from pytest_bdd import given, parsers, scenarios, then
 5 | from typer.testing import CliRunner
 6 | 
 7 | from biomcp.cli.main import app
 8 | 
 9 | scenarios("fetch.feature")
10 | 
11 | runner = CliRunner()
12 | 
13 | 
14 | @given(parsers.parse('I run "{command}"'), target_fixture="cli_result")
15 | def cli_result(command):
16 |     """Run the given CLI command and return the parsed JSON output."""
17 |     args = shlex.split(command)[1:]
18 |     result = runner.invoke(app, args)
19 |     return json.loads(result.stdout)
20 | 
21 | 
22 | @then("the JSON output should be a non-empty list")
23 | def check_non_empty_list(cli_result):
24 |     """Check that the JSON output is a list with at least one article."""
25 |     assert isinstance(cli_result, list), "Expected JSON output to be a list"
26 |     assert len(cli_result) > 0, "Expected at least one article in the output"
27 | 
28 | 
29 | @then("the first article's abstract should be populated")
30 | def check_abstract_populated(cli_result):
31 |     """Check that the first article has a non-empty abstract."""
32 |     article = cli_result[0]
33 |     abstract = article.get("abstract")
34 |     assert abstract is not None, "Abstract field is missing"
35 |     assert abstract.strip() != "", "Abstract field is empty"
36 | 
37 | 
38 | @then("the application should return an error")
39 | def step_impl(cli_result):
40 |     assert cli_result == [
41 |         {"error": 'Error 400: {"detail":"Could not retrieve publications"}'}
42 |     ]
43 | 
44 | 
45 | @then("the first article should have a DOI field")
46 | def check_doi_field(cli_result):
47 |     """Check that the first article has a DOI field."""
48 |     article = cli_result[0]
49 |     doi = article.get("doi")
50 |     assert doi is not None, "DOI field is missing"
51 |     assert doi.startswith("10."), f"Invalid DOI format: {doi}"
52 | 
53 | 
54 | @then("the source should be Europe PMC")
55 | def check_europe_pmc_source(cli_result):
56 |     """Check that the article source is Europe PMC."""
57 |     article = cli_result[0]
58 |     source = article.get("source")
59 |     assert (
60 |         source == "Europe PMC"
61 |     ), f"Expected source 'Europe PMC', got '{source}'"
62 | 
```

--------------------------------------------------------------------------------
/src/biomcp/metrics_handler.py:
--------------------------------------------------------------------------------

```python
 1 | """MCP handler for metrics collection."""
 2 | 
 3 | from typing import Annotated
 4 | 
 5 | from biomcp.core import mcp_app
 6 | from biomcp.metrics import get_all_metrics, get_metric_summary
 7 | 
 8 | 
 9 | @mcp_app.tool()
10 | async def get_performance_metrics(
11 |     metric_name: Annotated[
12 |         str | None,
13 |         "Specific metric name to retrieve, or None for all metrics",
14 |     ] = None,
15 | ) -> str:
16 |     """Get performance metrics for BioMCP operations.
17 | 
18 |     Returns performance statistics including:
19 |     - Request counts and success rates
20 |     - Response time percentiles (p50, p95, p99)
21 |     - Error rates and types
22 |     - Domain-specific performance breakdown
23 | 
24 |     Parameters:
25 |         metric_name: Optional specific metric to retrieve
26 | 
27 |     Returns:
28 |         Formatted metrics report
29 |     """
30 |     if metric_name:
31 |         summary = await get_metric_summary(metric_name)
32 |         if not summary:
33 |             return f"No metrics found for '{metric_name}'"
34 | 
35 |         return _format_summary(summary)
36 |     else:
37 |         all_summaries = await get_all_metrics()
38 |         if not all_summaries:
39 |             return "No metrics collected yet"
40 | 
41 |         lines = ["# BioMCP Performance Metrics\n"]
42 |         for name in sorted(all_summaries.keys()):
43 |             summary = all_summaries[name]
44 |             lines.append(f"## {name}")
45 |             lines.append(_format_summary(summary))
46 |             lines.append("")
47 | 
48 |         return "\n".join(lines)
49 | 
50 | 
51 | def _format_summary(summary) -> str:
52 |     """Format a metric summary for display."""
53 |     lines = [
54 |         f"- Total requests: {summary.count}",
55 |         f"- Success rate: {(1 - summary.error_rate) * 100:.1f}%",
56 |         f"- Errors: {summary.error_count}",
57 |         "",
58 |         "### Response Times",
59 |         f"- Average: {summary.avg_duration * 1000:.1f}ms",
60 |         f"- Min: {summary.min_duration * 1000:.1f}ms",
61 |         f"- Max: {summary.max_duration * 1000:.1f}ms",
62 |         f"- P50: {summary.p50_duration * 1000:.1f}ms",
63 |         f"- P95: {summary.p95_duration * 1000:.1f}ms",
64 |         f"- P99: {summary.p99_duration * 1000:.1f}ms",
65 |     ]
66 | 
67 |     return "\n".join(lines)
68 | 
```

--------------------------------------------------------------------------------
/scripts/check_docs_in_mkdocs.py:
--------------------------------------------------------------------------------

```python
 1 | #!/usr/bin/env python3
 2 | """Check that all markdown files in docs/ are referenced in mkdocs.yml."""
 3 | 
 4 | import sys
 5 | from pathlib import Path
 6 | 
 7 | import yaml  # DEP004
 8 | 
 9 | 
10 | def main():
11 |     """Check documentation files are in mkdocs.yml."""
12 |     docs_dir = Path(__file__).parent.parent / "docs"
13 |     mkdocs_path = Path(__file__).parent.parent / "mkdocs.yml"
14 | 
15 |     # Load mkdocs.yml
16 |     with open(mkdocs_path) as f:
17 |         mkdocs_config = yaml.safe_load(f)
18 | 
19 |     # Extract all referenced files from nav
20 |     referenced_files = set()
21 | 
22 |     def extract_files(nav_item, prefix=""):
23 |         """Recursively extract file paths from nav structure."""
24 |         if isinstance(nav_item, dict):
25 |             for _key, value in nav_item.items():
26 |                 extract_files(value, prefix)
27 |         elif isinstance(nav_item, list):
28 |             for item in nav_item:
29 |                 extract_files(item, prefix)
30 |         elif isinstance(nav_item, str) and nav_item.endswith(".md"):
31 |             referenced_files.add(nav_item)
32 | 
33 |     extract_files(mkdocs_config.get("nav", []))
34 | 
35 |     # Find all markdown files in docs/
36 |     all_md_files = set()
37 |     for md_file in docs_dir.rglob("*.md"):
38 |         # Get relative path from docs/
39 |         rel_path = md_file.relative_to(docs_dir)
40 |         all_md_files.add(str(rel_path))
41 | 
42 |     # Find unreferenced files
43 |     unreferenced = all_md_files - referenced_files
44 | 
45 |     # Exclude some files that shouldn't be in nav
46 |     exclude_patterns = {
47 |         "CNAME",  # GitHub pages config
48 |         "README.md",  # If exists
49 |     }
50 | 
51 |     unreferenced = {
52 |         f
53 |         for f in unreferenced
54 |         if not any(pattern in f for pattern in exclude_patterns)
55 |     }
56 | 
57 |     if unreferenced:
58 |         print(
59 |             "The following documentation files are not referenced in mkdocs.yml:"
60 |         )
61 |         for file in sorted(unreferenced):
62 |             print(f"  - {file}")
63 |         print("\nPlease add them to the appropriate section in mkdocs.yml")
64 |         return 1
65 |     else:
66 |         print("All documentation files are referenced in mkdocs.yml ✓")
67 |         return 0
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     sys.exit(main())
72 | 
```

--------------------------------------------------------------------------------
/src/biomcp/cbioportal_helper.py:
--------------------------------------------------------------------------------

```python
 1 | """Helper module for cBioPortal integration across tools.
 2 | 
 3 | This module centralizes cBioPortal summary generation logic to avoid duplication.
 4 | """
 5 | 
 6 | import logging
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | async def get_cbioportal_summary_for_genes(
12 |     genes: list[str] | None, request_params: dict | None = None
13 | ) -> str | None:
14 |     """Get cBioPortal summary for given genes.
15 | 
16 |     Args:
17 |         genes: List of gene symbols to get summaries for
18 |         request_params: Optional additional parameters for the request
19 | 
20 |     Returns:
21 |         Formatted cBioPortal summary or None if unavailable
22 |     """
23 |     if not genes:
24 |         return None
25 | 
26 |     try:
27 |         from biomcp.articles.search import PubmedRequest
28 |         from biomcp.articles.unified import _get_cbioportal_summary
29 | 
30 |         # Create a request object for cBioPortal summary
31 |         request = PubmedRequest(genes=genes)
32 | 
33 |         # Add any additional parameters if provided
34 |         if request_params:
35 |             for key, value in request_params.items():
36 |                 if hasattr(request, key):
37 |                     setattr(request, key, value)
38 | 
39 |         cbioportal_summary = await _get_cbioportal_summary(request)
40 |         return cbioportal_summary
41 | 
42 |     except Exception as e:
43 |         logger.warning(f"Failed to get cBioPortal summary: {e}")
44 |         return None
45 | 
46 | 
47 | async def get_variant_cbioportal_summary(gene: str | None) -> str | None:
48 |     """Get cBioPortal summary for variant searches.
49 | 
50 |     Args:
51 |         gene: Gene symbol to get summary for
52 | 
53 |     Returns:
54 |         Formatted cBioPortal summary or None if unavailable
55 |     """
56 |     if not gene:
57 |         return None
58 | 
59 |     try:
60 |         from biomcp.variants.cbioportal_search import (
61 |             CBioPortalSearchClient,
62 |             format_cbioportal_search_summary,
63 |         )
64 | 
65 |         client = CBioPortalSearchClient()
66 |         summary = await client.get_gene_search_summary(gene)
67 |         if summary:
68 |             return format_cbioportal_search_summary(summary)
69 |         return None
70 | 
71 |     except Exception as e:
72 |         logger.warning(
73 |             f"Failed to get cBioPortal summary for variant search: {e}"
74 |         )
75 |         return None
76 | 
```

--------------------------------------------------------------------------------
/src/biomcp/utils/rate_limiter.py:
--------------------------------------------------------------------------------

```python
 1 | """Simple rate limiting utilities for API calls."""
 2 | 
 3 | import asyncio
 4 | import time
 5 | from collections import defaultdict
 6 | 
 7 | 
 8 | class RateLimiter:
 9 |     """Simple token bucket rate limiter."""
10 | 
11 |     def __init__(self, rate: int = 10, per_seconds: int = 1):
12 |         """Initialize rate limiter.
13 | 
14 |         Args:
15 |             rate: Number of allowed requests
16 |             per_seconds: Time window in seconds
17 |         """
18 |         self.rate = rate
19 |         self.per_seconds = per_seconds
20 |         self.allowance: dict[str, float] = defaultdict(lambda: float(rate))
21 |         self.last_check: dict[str, float] = defaultdict(float)
22 |         self._lock = asyncio.Lock()
23 | 
24 |     async def check_rate_limit(
25 |         self, key: str = "default"
26 |     ) -> tuple[bool, float | None]:
27 |         """Check if request is allowed under rate limit.
28 | 
29 |         Args:
30 |             key: Identifier for rate limit bucket
31 | 
32 |         Returns:
33 |             Tuple of (allowed, wait_time_if_not_allowed)
34 |         """
35 |         async with self._lock:
36 |             current = time.time()
37 |             time_passed = current - self.last_check[key]
38 |             self.last_check[key] = current
39 | 
40 |             # Replenish tokens
41 |             self.allowance[key] += time_passed * (self.rate / self.per_seconds)
42 | 
43 |             # Cap at maximum rate
44 |             if self.allowance[key] > self.rate:
45 |                 self.allowance[key] = float(self.rate)
46 | 
47 |             # Check if request allowed
48 |             if self.allowance[key] >= 1.0:
49 |                 self.allowance[key] -= 1.0
50 |                 return True, None
51 |             else:
52 |                 # Calculate wait time
53 |                 wait_time = (1.0 - self.allowance[key]) * (
54 |                     self.per_seconds / self.rate
55 |                 )
56 |                 return False, wait_time
57 | 
58 |     async def wait_if_needed(self, key: str = "default") -> None:
59 |         """Wait if rate limited before allowing request."""
60 |         allowed, wait_time = await self.check_rate_limit(key)
61 |         if not allowed and wait_time:
62 |             await asyncio.sleep(wait_time)
63 | 
64 | 
65 | # Global rate limiter for cBioPortal API
66 | # Conservative: 5 requests per second
67 | cbioportal_limiter = RateLimiter(rate=5, per_seconds=1)
68 | 
```

--------------------------------------------------------------------------------
/src/biomcp/articles/autocomplete.py:
--------------------------------------------------------------------------------

```python
 1 | """Find entities for a given concept using the PUBTATOR API.
 2 | 
 3 | Example URL:
 4 | https://www.ncbi.nlm.nih.gov/research/pubtator3-api/entity/autocomplete/?query=BRAF
 5 | """
 6 | 
 7 | from typing import Literal
 8 | 
 9 | from pydantic import BaseModel, Field, RootModel
10 | 
11 | from .. import http_client
12 | from ..constants import PUBTATOR3_BASE_URL
13 | 
14 | Concept = Literal["variant", "chemical", "disease", "gene"]
15 | 
16 | 
17 | class EntityRequest(BaseModel):
18 |     concept: Concept | None = None
19 |     query: str
20 |     limit: int = Field(default=1, ge=1, le=100)
21 | 
22 | 
23 | class Entity(BaseModel):
24 |     entity_id: str = Field(
25 |         alias="_id",
26 |         examples=["@GENE_BRAF"],
27 |         description="Text-based entity following @<biotype>_<n> format.",
28 |     )
29 |     concept: Concept = Field(
30 |         ...,
31 |         alias="biotype",
32 |         description="Entity label or concept type.",
33 |     )
34 |     name: str = Field(
35 |         ...,
36 |         description="Preferred term of entity concept.",
37 |         examples=[
38 |             "BRAF",
39 |             "Adenocarcinoma of Lung",
40 |             "Osimertinib",
41 |             "EGFR L858R",
42 |         ],
43 |     )
44 |     match: str | None = Field(
45 |         default=None,
46 |         description="Reason for the entity match.",
47 |         examples=["Multiple matches", "Matched on name <m>NAME</m>"],
48 |     )
49 | 
50 |     def __eq__(self, other) -> bool:
51 |         return self.entity_id == other.entity_id
52 | 
53 | 
54 | class EntityList(RootModel):
55 |     root: list[Entity]
56 | 
57 |     @property
58 |     def first(self) -> Entity | None:
59 |         return self.root[0] if self.root else None
60 | 
61 | 
62 | PUBTATOR3_AUTOCOMPLETE = f"{PUBTATOR3_BASE_URL}/entity/autocomplete/"
63 | 
64 | 
65 | async def autocomplete(request: EntityRequest) -> Entity | None:
66 |     """Given a request of biotype and query, returns the best matching Entity.
67 |     If API call fails or returns 0 results, then None is returned.
68 | 
69 |     Example Request:
70 |     {
71 |         "concept": "gene",
72 |         "query": "BRAF"
73 |     }
74 |     Response:
75 |     {
76 |         "entity_id": "@GENE_BRAF",
77 |         "biotype": "gene",
78 |         "name": "BRAF",
79 |         "match": "Matched on name <m>BRAF</m>"
80 |     }
81 |     """
82 |     response, _ = await http_client.request_api(
83 |         url=PUBTATOR3_AUTOCOMPLETE,
84 |         request=request,
85 |         response_model_type=EntityList,
86 |         domain="pubmed",
87 |     )
88 |     return response.first if response else None
89 | 
```

--------------------------------------------------------------------------------
/docs/reference/visual-architecture.md:
--------------------------------------------------------------------------------

```markdown
 1 | # Visual Architecture Guide
 2 | 
 3 | ## System Architecture
 4 | 
 5 | BioMCP follows a clean architecture pattern with three main layers:
 6 | 
 7 | ### 1. User Interface Layer
 8 | 
 9 | - **biomcp CLI**: Command-line interface for direct usage
10 | - **Claude Desktop**: AI assistant integration via MCP
11 | - **Python SDK**: Programmatic access for custom applications
12 | 
13 | ### 2. BioMCP Core Layer
14 | 
15 | - **MCP Server**: Handles Model Context Protocol communication
16 | - **Cache System**: Smart caching for API responses
17 | - **Router**: Unified query routing across data sources
18 | 
19 | ### 3. Data Source Layer
20 | 
21 | - **PubMed/PubTator3**: Biomedical literature and annotations
22 | - **ClinicalTrials.gov**: Clinical trial registry
23 | - **MyVariant.info**: Genetic variant database
24 | - **cBioPortal**: Cancer genomics data
25 | - **NCI CTS API**: National Cancer Institute trial data
26 | - **BioThings APIs**: Gene, drug, and disease information
27 | 
28 | ## Data Flow
29 | 
30 | 1. **Request Processing**:
31 | 
32 |    - User sends query via CLI, Claude, or SDK
33 |    - BioMCP server receives and validates request
34 |    - Router determines appropriate data source(s)
35 | 
36 | 2. **Caching Strategy**:
37 | 
38 |    - Check cache for existing results
39 |    - If cache miss, fetch from external API
40 |    - Store results with appropriate TTL
41 |    - Return formatted results to user
42 | 
43 | 3. **Response Formatting**:
44 |    - Raw API data is normalized
45 |    - Domain-specific enrichment applied
46 |    - Results formatted for consumption
47 | 
48 | ## Architecture References
49 | 
50 | - [Detailed Architecture Diagrams](architecture-diagrams.md)
51 | - [Quick Architecture Reference](quick-architecture.md)
52 | 
53 | ## Key Architecture Patterns
54 | 
55 | ### Domain Separation
56 | 
57 | Each data source has its own module with dedicated:
58 | 
59 | - Search functions
60 | - Result parsers
61 | - Error handlers
62 | - Cache strategies
63 | 
64 | ### Unified Interface
65 | 
66 | All domains expose consistent methods:
67 | 
68 | - `search()`: Query for multiple results
69 | - `fetch()`: Get detailed record by ID
70 | - Common parameter names across domains
71 | 
72 | ### Smart Caching
73 | 
74 | - API responses cached 15-30 minutes
75 | - Cache keys include query parameters
76 | - Automatic cache invalidation on errors
77 | - Per-domain cache configuration
78 | 
79 | ### Error Resilience
80 | 
81 | - Graceful degradation when APIs unavailable
82 | - Specific error messages for troubleshooting
83 | - Automatic retries with exponential backoff
84 | - Fallback to cached data when possible
85 | 
```

--------------------------------------------------------------------------------
/docs/faq-condensed.md:
--------------------------------------------------------------------------------

```markdown
 1 | # FAQ - Quick Answers
 2 | 
 3 | ## Getting Started
 4 | 
 5 | **Q: What is BioMCP?**
 6 | A: A unified interface to biomedical databases (PubMed, ClinicalTrials.gov, MyVariant, etc.) for researchers and AI assistants.
 7 | 
 8 | **Q: Do I need API keys?**
 9 | A: No for basic use. Yes for: NCI trials (cancer-specific), AlphaGenome (variant predictions), enhanced cBioPortal features.
10 | 
11 | **Q: How do I install it?**
12 | A: `uv tool install biomcp` (recommended) or `pip install biomcp-python`
13 | 
14 | ## Common Issues
15 | 
16 | **Q: "Command not found" after installation**
17 | A: Restart terminal, or use full path: `~/.local/bin/biomcp`
18 | 
19 | **Q: No results for gene search**
20 | A: Use official symbols (ERBB2 not HER2). Check at [genenames.org](https://www.genenames.org)
21 | 
22 | **Q: Location search not working**
23 | A: Must provide coordinates: `--latitude 42.3601 --longitude -71.0589`
24 | 
25 | **Q: Why does the AI use 'think' first?**
26 | A: Required for systematic analysis. Improves search quality and prevents missed connections.
27 | 
28 | ## Search Tips
29 | 
30 | **Q: How to search variant notations?**
31 | A: Use OR syntax: `--keyword "V600E|p.V600E|c.1799T>A"`
32 | 
33 | **Q: Include/exclude preprints?**
34 | A: Included by default. Use `--no-preprints` to exclude.
35 | 
36 | **Q: Search multiple databases?**
37 | A: Use unified search: `search(query="gene:BRAF AND disease:melanoma")`
38 | 
39 | ## Data Questions
40 | 
41 | **Q: How current is the data?**
42 | A: Daily updates for PubMed/trials, weekly for BioThings, varies for cBioPortal.
43 | 
44 | **Q: ClinicalTrials.gov vs NCI?**
45 | A: CT.gov = comprehensive, NCI = cancer-focused with biomarker filters (needs API key).
46 | 
47 | **Q: What's MSI/TMB/VAF?**
48 | A: MSI = Microsatellite Instability, TMB = Tumor Mutational Burden, VAF = Variant Allele Frequency
49 | 
50 | ## Technical
51 | 
52 | **Q: Rate limits?**
53 | A: ~3 req/sec without keys, higher with keys. NCI = 1000/day with key.
54 | 
55 | **Q: Cache issues?**
56 | A: Clear with: `rm -rf ~/.biomcp/cache`
57 | 
58 | **Q: Which Python version?**
59 | A: 3.10+ required
60 | 
61 | ## Quick References
62 | 
63 | **Common Gene Aliases:**
64 | 
65 | - HER2 → ERBB2
66 | - PD-L1 → CD274
67 | - c-MET → MET
68 | 
69 | **City Coordinates:**
70 | 
71 | - NYC: 40.7128, -74.0060
72 | - Boston: 42.3601, -71.0589
73 | - LA: 34.0522, -118.2437
74 | 
75 | **Trial Status:**
76 | 
77 | - RECRUITING = Currently enrolling
78 | - ACTIVE_NOT_RECRUITING = Ongoing
79 | - COMPLETED = Finished
80 | 
81 | ## Getting Help
82 | 
83 | 1. Check this FAQ
84 | 2. Read [Troubleshooting](troubleshooting.md)
85 | 3. Search [GitHub Issues](https://github.com/genomoncology/biomcp/issues)
86 | 4. Ask with version info: `biomcp --version`
87 | 
```

--------------------------------------------------------------------------------
/src/biomcp/variants/filters.py:
--------------------------------------------------------------------------------

```python
 1 | """Module for filtering variant data based on paths."""
 2 | 
 3 | from typing import Any
 4 | 
 5 | 
 6 | def _get_nested_value(data: dict[str, Any], path: str) -> Any:
 7 |     """Get a nested value from a dictionary using dot notation path."""
 8 |     keys = path.split(".")
 9 |     current = data
10 |     for key in keys[:-1]:
11 |         if not isinstance(current, dict) or key not in current:
12 |             return None
13 |         current = current[key]
14 |     return current
15 | 
16 | 
17 | def _delete_nested_path(data: dict[str, Any], path: str) -> None:
18 |     """Delete a nested path from a dictionary using dot notation."""
19 |     keys = path.split(".")
20 |     current = data
21 |     for key in keys[:-1]:
22 |         if not isinstance(current, dict) or key not in current:
23 |             return
24 |         current = current[key]
25 | 
26 |     if isinstance(current, dict) and keys[-1] in current:
27 |         del current[keys[-1]]
28 | 
29 | 
30 | def _deep_copy_dict(data: dict[str, Any]) -> dict[str, Any]:
31 |     """Create a deep copy of a dictionary, handling nested dicts and lists."""
32 |     result: dict[str, Any] = {}
33 |     for key, value in data.items():
34 |         if isinstance(value, dict):
35 |             result[key] = _deep_copy_dict(value)
36 |         elif isinstance(value, list):
37 |             result[key] = [
38 |                 _deep_copy_dict(item) if isinstance(item, dict) else item
39 |                 for item in value
40 |             ]
41 |         else:
42 |             result[key] = value
43 |     return result
44 | 
45 | 
46 | def filter_variants(variants: list[dict[str, Any]]) -> list[dict[str, Any]]:
47 |     """
48 |     Filter out specified paths from variant data.
49 | 
50 |     Args:
51 |         variants: List of variant dictionaries from MyVariant.info API
52 | 
53 |     Returns:
54 |         List of variant dictionaries with specified paths removed
55 |     """
56 |     # Create a deep copy to avoid modifying the input
57 |     filtered_variants = []
58 |     for variant in variants:
59 |         # Create a deep copy of the variant
60 |         filtered_variant = _deep_copy_dict(variant)
61 | 
62 |         # Remove specified paths
63 |         for path in PATH_FILTERS:
64 |             _delete_nested_path(filtered_variant, path)
65 | 
66 |         filtered_variants.append(filtered_variant)
67 | 
68 |     return filtered_variants
69 | 
70 | 
71 | PATH_FILTERS = [
72 |     "civic.contributors",
73 |     "civic.molecularProfiles",
74 |     "dbsnp.gene.rnas",
75 |     "dbnsfp.clinvar",  # duplicate of root-level clinvar
76 |     "civic.lastAcceptedRevisionEvent",
77 |     "civic.lastSubmittedRevisionEvent",
78 |     "civic.creationActivity",
79 | ]
80 | 
```

--------------------------------------------------------------------------------
/.github/workflows/on-release-main.yml:
--------------------------------------------------------------------------------

```yaml
  1 | name: release-main
  2 | 
  3 | on:
  4 |   release:
  5 |     types: [published]
  6 |     branches: [main]
  7 | 
  8 | jobs:
  9 |   set-version:
 10 |     runs-on: ubuntu-24.04
 11 |     steps:
 12 |       - uses: actions/checkout@v5
 13 | 
 14 |       - name: Export tag
 15 |         id: vars
 16 |         run: echo tag=${GITHUB_REF#refs/*/} >> $GITHUB_OUTPUT
 17 |         if: ${{ github.event_name == 'release' }}
 18 | 
 19 |       - name: Update project version
 20 |         run: |
 21 |           sed -i "s/^version = \".*\"/version = \"$RELEASE_VERSION\"/" pyproject.toml
 22 |         env:
 23 |           RELEASE_VERSION: ${{ steps.vars.outputs.tag }}
 24 |         if: ${{ github.event_name == 'release' }}
 25 | 
 26 |       - name: Upload updated pyproject.toml
 27 |         uses: actions/upload-artifact@v4
 28 |         with:
 29 |           name: pyproject-toml
 30 |           path: pyproject.toml
 31 | 
 32 |   test:
 33 |     runs-on: ubuntu-latest
 34 |     needs: [set-version]
 35 |     steps:
 36 |       - name: Check out
 37 |         uses: actions/checkout@v5
 38 | 
 39 |       - name: Download updated pyproject.toml
 40 |         uses: actions/download-artifact@v5
 41 |         with:
 42 |           name: pyproject-toml
 43 | 
 44 |       - name: Set up Python
 45 |         uses: actions/setup-python@v6
 46 |         with:
 47 |           python-version: "3.12"
 48 | 
 49 |       - name: Install uv
 50 |         uses: astral-sh/setup-uv@v6
 51 |         with:
 52 |           version: "0.4.29"
 53 | 
 54 |       - name: Install dependencies
 55 |         run: uv sync --group dev
 56 | 
 57 |       - name: Run tests
 58 |         run: uv run python -m pytest tests --cov --cov-config=pyproject.toml --cov-report=xml
 59 | 
 60 |   publish:
 61 |     runs-on: ubuntu-latest
 62 |     needs: [set-version, test]
 63 |     permissions:
 64 |       id-token: write
 65 |     environment: release
 66 |     steps:
 67 |       - name: Check out
 68 |         uses: actions/checkout@v5
 69 | 
 70 |       - name: Set up the environment
 71 |         uses: ./.github/actions/setup-python-env
 72 | 
 73 |       - name: Download updated pyproject.toml
 74 |         uses: actions/download-artifact@v5
 75 |         with:
 76 |           name: pyproject-toml
 77 | 
 78 |       - name: Build package
 79 |         run: uvx --from build pyproject-build --installer uv
 80 | 
 81 |       - name: Check package
 82 |         run: uvx twine check dist/*
 83 | 
 84 |       - name: Publish package
 85 |         uses: pypa/gh-action-pypi-publish@release/v1
 86 |         with:
 87 |           verbose: true
 88 | 
 89 |   deploy-docs:
 90 |     needs: publish
 91 |     runs-on: ubuntu-latest
 92 |     steps:
 93 |       - name: Check out
 94 |         uses: actions/checkout@v5
 95 | 
 96 |       - name: Set up the environment
 97 |         uses: ./.github/actions/setup-python-env
 98 | 
 99 |       - name: Deploy documentation
100 |         run: uv run mkdocs gh-deploy --force
101 | 
```

--------------------------------------------------------------------------------
/tests/data/openfda/drugsfda_detail.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "meta": {
 3 |     "results": {
 4 |       "skip": 0,
 5 |       "limit": 1,
 6 |       "total": 1
 7 |     }
 8 |   },
 9 |   "results": [
10 |     {
11 |       "application_number": "BLA125514",
12 |       "sponsor_name": "MERCK SHARP DOHME",
13 |       "openfda": {
14 |         "application_number": ["BLA125514"],
15 |         "brand_name": ["KEYTRUDA"],
16 |         "generic_name": ["PEMBROLIZUMAB"],
17 |         "manufacturer_name": ["Merck Sharp & Dohme Corp."],
18 |         "substance_name": ["PEMBROLIZUMAB"],
19 |         "product_ndc": ["0006-3026-02", "0006-3029-02"],
20 |         "spl_set_id": ["c0e2de11-29e0-48a1-92f0-d9cb4dd56b15"],
21 |         "unii": ["DPT0O3T46P"]
22 |       },
23 |       "products": [
24 |         {
25 |           "product_number": "001",
26 |           "reference_drug": "Yes",
27 |           "brand_name": "KEYTRUDA",
28 |           "active_ingredients": [
29 |             {
30 |               "name": "PEMBROLIZUMAB",
31 |               "strength": "100MG/4ML"
32 |             }
33 |           ],
34 |           "reference_standard": "Yes",
35 |           "dosage_form": "INJECTION, SOLUTION",
36 |           "route": "INTRAVENOUS",
37 |           "marketing_status": "Prescription"
38 |         },
39 |         {
40 |           "product_number": "002",
41 |           "reference_drug": "Yes",
42 |           "brand_name": "KEYTRUDA",
43 |           "active_ingredients": [
44 |             {
45 |               "name": "PEMBROLIZUMAB",
46 |               "strength": "50MG/VIAL"
47 |             }
48 |           ],
49 |           "reference_standard": "Yes",
50 |           "dosage_form": "INJECTION, POWDER, LYOPHILIZED, FOR SOLUTION",
51 |           "route": "INTRAVENOUS",
52 |           "marketing_status": "Prescription"
53 |         }
54 |       ],
55 |       "submissions": [
56 |         {
57 |           "submission_type": "BLA",
58 |           "submission_number": "125514",
59 |           "submission_status": "AP",
60 |           "submission_status_date": "20140904",
61 |           "review_priority": "P",
62 |           "submission_class_code": "BLA",
63 |           "submission_class_code_description": "Biologic License Application",
64 |           "application_docs": [
65 |             {
66 |               "id": "52674",
67 |               "url": "https://www.accessdata.fda.gov/drugsatfda_docs/label/2014/125514lbl.pdf",
68 |               "date": "20140905",
69 |               "type": "Label"
70 |             }
71 |           ]
72 |         },
73 |         {
74 |           "submission_type": "SUPPL",
75 |           "submission_number": "109",
76 |           "submission_status": "AP",
77 |           "submission_status_date": "20230316",
78 |           "submission_class_code": "SUPPL",
79 |           "submission_class_code_description": "Supplement"
80 |         }
81 |       ]
82 |     }
83 |   ]
84 | }
85 | 
```

--------------------------------------------------------------------------------
/src/biomcp/exceptions.py:
--------------------------------------------------------------------------------

```python
 1 | """Custom exceptions for BioMCP."""
 2 | 
 3 | from typing import Any
 4 | 
 5 | 
 6 | class BioMCPError(Exception):
 7 |     """Base exception for all BioMCP errors."""
 8 | 
 9 |     def __init__(self, message: str, details: dict[str, Any] | None = None):
10 |         super().__init__(message)
11 |         self.message = message
12 |         self.details = details or {}
13 | 
14 | 
15 | class BioMCPSearchError(BioMCPError):
16 |     """Base exception for search-related errors."""
17 | 
18 |     pass
19 | 
20 | 
21 | class InvalidDomainError(BioMCPSearchError):
22 |     """Raised when an invalid domain is specified."""
23 | 
24 |     def __init__(self, domain: str, valid_domains: list[str]):
25 |         message = f"Unknown domain: {domain}. Valid domains are: {', '.join(valid_domains)}"
26 |         super().__init__(
27 |             message, {"domain": domain, "valid_domains": valid_domains}
28 |         )
29 | 
30 | 
31 | class InvalidParameterError(BioMCPSearchError):
32 |     """Raised when invalid parameters are provided."""
33 | 
34 |     def __init__(self, parameter: str, value: Any, expected: str):
35 |         message = f"Invalid value for parameter '{parameter}': {value}. Expected: {expected}"
36 |         super().__init__(
37 |             message,
38 |             {"parameter": parameter, "value": value, "expected": expected},
39 |         )
40 | 
41 | 
42 | class SearchExecutionError(BioMCPSearchError):
43 |     """Raised when a search fails to execute."""
44 | 
45 |     def __init__(self, domain: str, error: Exception):
46 |         message = f"Failed to execute search for domain '{domain}': {error!s}"
47 |         super().__init__(
48 |             message, {"domain": domain, "original_error": str(error)}
49 |         )
50 | 
51 | 
52 | class ResultParsingError(BioMCPSearchError):
53 |     """Raised when results cannot be parsed."""
54 | 
55 |     def __init__(self, domain: str, error: Exception):
56 |         message = f"Failed to parse results for domain '{domain}': {error!s}"
57 |         super().__init__(
58 |             message, {"domain": domain, "original_error": str(error)}
59 |         )
60 | 
61 | 
62 | class QueryParsingError(BioMCPError):
63 |     """Raised when a query cannot be parsed."""
64 | 
65 |     def __init__(self, query: str, error: Exception):
66 |         message = f"Failed to parse query '{query}': {error!s}"
67 |         super().__init__(
68 |             message, {"query": query, "original_error": str(error)}
69 |         )
70 | 
71 | 
72 | class ThinkingError(BioMCPError):
73 |     """Raised when sequential thinking encounters an error."""
74 | 
75 |     def __init__(self, thought_number: int, error: str):
76 |         message = f"Error in thought {thought_number}: {error}"
77 |         super().__init__(
78 |             message, {"thought_number": thought_number, "error": error}
79 |         )
80 | 
```

--------------------------------------------------------------------------------
/docs/stylesheets/announcement.css:
--------------------------------------------------------------------------------

```css
  1 | /* Announcement Banner Styles */
  2 | .announcement-banner {
  3 |   background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
  4 |   border-radius: 12px;
  5 |   padding: 2rem;
  6 |   margin: 2rem 0;
  7 |   box-shadow: 0 10px 30px rgba(0, 0, 0, 0.15);
  8 |   position: relative;
  9 |   overflow: hidden;
 10 | }
 11 | 
 12 | .announcement-banner::before {
 13 |   content: "";
 14 |   position: absolute;
 15 |   top: -50%;
 16 |   right: -50%;
 17 |   width: 200%;
 18 |   height: 200%;
 19 |   background: radial-gradient(
 20 |     circle,
 21 |     rgba(255, 255, 255, 0.1) 0%,
 22 |     transparent 70%
 23 |   );
 24 |   animation: shimmer 3s infinite;
 25 | }
 26 | 
 27 | @keyframes shimmer {
 28 |   0% {
 29 |     transform: rotate(0deg);
 30 |   }
 31 |   100% {
 32 |     transform: rotate(360deg);
 33 |   }
 34 | }
 35 | 
 36 | .announcement-content {
 37 |   position: relative;
 38 |   z-index: 1;
 39 | }
 40 | 
 41 | .announcement-banner h2 {
 42 |   color: white !important;
 43 |   margin-top: 0 !important;
 44 |   font-size: 1.8rem;
 45 |   display: flex;
 46 |   align-items: center;
 47 |   gap: 0.5rem;
 48 | }
 49 | 
 50 | .announcement-banner .badge-new {
 51 |   background: #ff6b6b;
 52 |   color: white;
 53 |   padding: 0.2rem 0.6rem;
 54 |   border-radius: 20px;
 55 |   font-size: 0.8rem;
 56 |   font-weight: bold;
 57 |   animation: pulse 2s infinite;
 58 | }
 59 | 
 60 | @keyframes pulse {
 61 |   0%,
 62 |   100% {
 63 |     transform: scale(1);
 64 |   }
 65 |   50% {
 66 |     transform: scale(1.05);
 67 |   }
 68 | }
 69 | 
 70 | .announcement-banner p {
 71 |   color: rgba(255, 255, 255, 0.95) !important;
 72 |   font-size: 1.1rem;
 73 |   margin: 1rem 0;
 74 | }
 75 | 
 76 | .announcement-banner .announcement-features {
 77 |   display: grid;
 78 |   grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
 79 |   gap: 1rem;
 80 |   margin: 1.5rem 0;
 81 | }
 82 | 
 83 | .announcement-banner .feature-item {
 84 |   background: rgba(255, 255, 255, 0.1);
 85 |   padding: 0.8rem;
 86 |   border-radius: 8px;
 87 |   backdrop-filter: blur(10px);
 88 |   border: 1px solid rgba(255, 255, 255, 0.2);
 89 | }
 90 | 
 91 | .announcement-banner .feature-item strong {
 92 |   color: white;
 93 |   display: block;
 94 |   margin-bottom: 0.3rem;
 95 | }
 96 | 
 97 | .announcement-banner .feature-item span {
 98 |   color: rgba(255, 255, 255, 0.85);
 99 |   font-size: 0.9rem;
100 | }
101 | 
102 | .announcement-banner .cta-button {
103 |   display: inline-block;
104 |   background: white;
105 |   color: #667eea !important;
106 |   padding: 0.8rem 2rem;
107 |   border-radius: 50px;
108 |   text-decoration: none !important;
109 |   font-weight: bold;
110 |   margin-top: 1rem;
111 |   transition: all 0.3s ease;
112 |   box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
113 | }
114 | 
115 | .announcement-banner .cta-button:hover {
116 |   transform: translateY(-2px);
117 |   box-shadow: 0 6px 20px rgba(0, 0, 0, 0.25);
118 |   background: #f8f9fa;
119 | }
120 | 
121 | .announcement-banner .cta-button::after {
122 |   content: " →";
123 |   font-size: 1.2rem;
124 |   transition: transform 0.3s ease;
125 |   display: inline-block;
126 | }
127 | 
128 | .announcement-banner .cta-button:hover::after {
129 |   transform: translateX(5px);
130 | }
131 | 
```

--------------------------------------------------------------------------------
/tests/integration/test_simple.py:
--------------------------------------------------------------------------------

```python
 1 | """Simple test to verify APIs work without Mastermind key."""
 2 | 
 3 | import asyncio
 4 | 
 5 | from biomcp.articles.preprints import EuropePMCClient
 6 | from biomcp.variants.external import ExternalVariantAggregator
 7 | 
 8 | 
 9 | async def test_preprints():
10 |     """Test that preprint search works."""
11 |     print("Testing Europe PMC preprint search...")
12 |     client = EuropePMCClient()
13 | 
14 |     # Search for a common term
15 |     results = await client.search("cancer")
16 | 
17 |     if results:
18 |         print(f"✓ Found {len(results)} preprints")
19 |         print(f"  First: {results[0].title[:60]}...")
20 |         return True
21 |     else:
22 |         print("✗ No results found")
23 |         return False
24 | 
25 | 
26 | async def test_variants_without_mastermind():
27 |     """Test variant aggregator without Mastermind API key."""
28 |     print("\nTesting variant aggregator without Mastermind key...")
29 | 
30 |     # Create aggregator
31 |     aggregator = ExternalVariantAggregator()
32 | 
33 |     # Test with a variant - even if individual sources fail,
34 |     # the aggregator should handle it gracefully
35 |     result = await aggregator.get_enhanced_annotations(
36 |         "BRAF V600E", include_tcga=True, include_1000g=True
37 |     )
38 | 
39 |     print("✓ Aggregator completed without errors")
40 |     print(f"  Variant ID: {result.variant_id}")
41 |     print(f"  TCGA data: {'Found' if result.tcga else 'Not found'}")
42 |     print(
43 |         f"  1000G data: {'Found' if result.thousand_genomes else 'Not found'}"
44 |     )
45 |     print(
46 |         f"  Errors: {result.error_sources if result.error_sources else 'None'}"
47 |     )
48 | 
49 |     # Key test: aggregator should complete successfully
50 |     if True:  # Always passes now without Mastermind
51 |         print("✓ Mastermind correctly skipped without API key")
52 |         return True
53 |     else:
54 |         print("✗ Mastermind handling incorrect")
55 |         return False
56 | 
57 | 
58 | async def main():
59 |     """Run all tests."""
60 |     print("=" * 60)
61 |     print("Testing BioMCP features without external API keys")
62 |     print("=" * 60)
63 | 
64 |     # Test preprints
65 |     preprint_ok = await test_preprints()
66 | 
67 |     # Test variants
68 |     variant_ok = await test_variants_without_mastermind()
69 | 
70 |     print("\n" + "=" * 60)
71 |     print("Summary:")
72 |     print(f"  Preprint search: {'✓ PASS' if preprint_ok else '✗ FAIL'}")
73 |     print(f"  Variant aggregator: {'✓ PASS' if variant_ok else '✗ FAIL'}")
74 |     print("=" * 60)
75 | 
76 |     if preprint_ok and variant_ok:
77 |         print("\n✓ All features work without external API keys!")
78 |         return 0
79 |     else:
80 |         print("\n✗ Some features failed")
81 |         return 1
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     exit_code = asyncio.run(main())
86 |     exit(exit_code)
87 | 
```

--------------------------------------------------------------------------------
/tests/tdd/variants/test_links.py:
--------------------------------------------------------------------------------

```python
 1 | """Tests for the links module."""
 2 | 
 3 | import json
 4 | import os
 5 | from typing import Any
 6 | 
 7 | import pytest
 8 | 
 9 | from biomcp.variants.links import inject_links
10 | 
11 | 
12 | @pytest.fixture
13 | def braf_variants() -> list[dict[str, Any]]:
14 |     """Load BRAF V600 test data."""
15 |     test_data_path = os.path.join(
16 |         os.path.dirname(__file__),
17 |         "../../data/myvariant/variants_part_braf_v600_multiple.json",
18 |     )
19 |     with open(test_data_path) as f:
20 |         return json.load(f)
21 | 
22 | 
23 | def test_inject_links_braf_variants(braf_variants):
24 |     """Test URL injection for BRAF variants data."""
25 |     result = inject_links(braf_variants)
26 | 
27 |     # Test first variant (no CIViC)
28 |     variant0 = result[0]
29 |     assert (
30 |         variant0["dbsnp"]["url"]
31 |         == f"https://www.ncbi.nlm.nih.gov/snp/{variant0['dbsnp']['rsid']}"
32 |     )
33 |     assert (
34 |         variant0["clinvar"]["url"]
35 |         == f"https://www.ncbi.nlm.nih.gov/clinvar/variation/{variant0['clinvar']['variant_id']}/"
36 |     )
37 |     assert (
38 |         variant0["cosmic"]["url"]
39 |         == f"https://cancer.sanger.ac.uk/cosmic/mutation/overview?id={variant0['cosmic']['cosmic_id']}"
40 |     )
41 |     assert "civic" not in variant0 or "url" not in variant0["civic"]
42 |     assert (
43 |         variant0["url"]["ensembl"]
44 |         == f"https://ensembl.org/Homo_sapiens/Variation/Explore?v={variant0['dbsnp']['rsid']}"
45 |     )
46 |     assert variant0["url"]["ucsc_genome_browser"].startswith(
47 |         "https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position=chr7:"
48 |     )
49 |     assert (
50 |         variant0["url"]["hgnc"]
51 |         == "https://www.genenames.org/data/gene-symbol-report/#!/symbol/BRAF"
52 |     )
53 | 
54 |     # Test second variant (with CIViC)
55 |     variant1 = result[1]
56 |     assert (
57 |         variant1["civic"]["url"]
58 |         == f"https://civicdb.org/variants/{variant1['civic']['id']}/summary"
59 |     )
60 | 
61 |     # Test empty list
62 |     assert inject_links([]) == []
63 | 
64 |     # Test insertion (no REF)
65 |     insertion = {
66 |         "chrom": "7",
67 |         "vcf": {"position": "123", "alt": "A"},
68 |         "dbnsfp": {"genename": "GENE1"},
69 |     }
70 |     result = inject_links([insertion])[0]
71 |     assert (
72 |         result["url"]["ucsc_genome_browser"]
73 |         == "https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position=chr7:123-124"
74 |     )
75 | 
76 |     # Test deletion (no ALT)
77 |     deletion = {
78 |         "chrom": "7",
79 |         "vcf": {"position": "123", "ref": "AAA"},
80 |         "dbnsfp": {"genename": "GENE1"},
81 |     }
82 |     result = inject_links([deletion])[0]
83 |     assert (
84 |         result["url"]["ucsc_genome_browser"]
85 |         == "https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position=chr7:123-126"
86 |     )
87 | 
```

--------------------------------------------------------------------------------
/docs/genomoncology.md:
--------------------------------------------------------------------------------

```markdown
 1 | # **GenomOncology: Powering the Future of Precision Medicine**
 2 | 
 3 | ## **Who We Are**
 4 | 
 5 | GenomOncology is a leading healthcare technology company dedicated to transforming precision medicine through innovative genomic analysis solutions. We connect complex genomic data and actionable clinical insights, enabling healthcare providers to deliver personalized treatment strategies for cancer patients.
 6 | 
 7 | ## **Our Commitment to Open Healthcare**
 8 | 
 9 | We believe in the power of open source systems to further the impact of precision medicine. Through the BioMCP initiative we hope to engage the healthcare community in this open-access ecosystem designed to accelerate innovation in precision medicine. By evolving this open framework, we're moving to create a more collaborative, efficient, and transparent healthcare environment.
10 | 
11 | ## **Our Precision Oncology Platform**
12 | 
13 | Today, our proprietary knowledge management system, known as the Precision Oncology Platform (POP), serves as the backbone of our solutions, continuously aggregating and curating the latest genomic research, clinical trials, and treatment guidelines. This system:
14 | 
15 | - Processes and harmonizes data from 40+ scientific and clinical sources
16 | - Updates weekly to incorporate the newest research findings
17 | - Utilizes advanced NLP to extract meaningful insights from unstructured text
18 | - Maintains a comprehensive database of 25,000+ variant-drug associations
19 | 
20 | ## **Real-World Impact**
21 | 
22 | Our technology currently powers precision medicine programs at:
23 | 
24 | - 120+ hospitals and cancer centers
25 | - 15 academic medical centers
26 | - 8 commercial reference laboratories
27 | - 10+ pharmaceutical research programs
28 | 
29 | Processing over 100,000 genomic profiles monthly, our solutions have helped match thousands of patients to targeted therapies and clinical trials, significantly improving outcomes.
30 | 
31 | ## **Join Us In The Next Phase of Transforming Healthcare**
32 | 
33 | By contributing to the BioMCP ecosystem, we're inviting developers to collaborate with us in creating the next generation of precision medicine tools. Whether you're looking to build applications that leverage genomic data, create integrations with existing healthcare systems, or explore novel approaches to biomarker analysis, GenomOncology provides the technological foundation to bring your ideas to life.
34 | 
35 | ## **Get Started**
36 | 
37 | Ready to explore what's possible with GenomOncology and BioMCP?
38 | 
39 | - Clone our repositories on GitHub
40 | - Register for API access
41 | - Join our developer community
42 | 
43 | Together, we can accelerate precision medicine through open collaboration and innovation.
44 | 
45 | ---
46 | 
47 | _GenomOncology: Transforming data into treatment decisions_
48 | 
```

--------------------------------------------------------------------------------
/src/biomcp/cli/biomarkers.py:
--------------------------------------------------------------------------------

```python
 1 | """CLI commands for biomarker search."""
 2 | 
 3 | import asyncio
 4 | from typing import Annotated
 5 | 
 6 | import typer
 7 | 
 8 | from ..biomarkers import search_biomarkers
 9 | from ..biomarkers.search import format_biomarker_results
10 | from ..integrations.cts_api import CTSAPIError, get_api_key_instructions
11 | 
12 | biomarker_app = typer.Typer(
13 |     no_args_is_help=True,
14 |     help="Search biomarkers used in clinical trial eligibility criteria",
15 | )
16 | 
17 | 
18 | @biomarker_app.command("search")
19 | def search_biomarkers_cli(
20 |     name: Annotated[
21 |         str | None,
22 |         typer.Argument(
23 |             help="Biomarker name to search for (e.g., 'PD-L1', 'EGFR mutation')"
24 |         ),
25 |     ] = None,
26 |     biomarker_type: Annotated[
27 |         str | None,
28 |         typer.Option(
29 |             "--type",
30 |             help="Type of biomarker ('reference_gene' or 'branch')",
31 |         ),
32 |     ] = None,
33 |     page_size: Annotated[
34 |         int,
35 |         typer.Option(
36 |             "--page-size",
37 |             help="Number of results per page",
38 |             min=1,
39 |             max=100,
40 |         ),
41 |     ] = 20,
42 |     page: Annotated[
43 |         int,
44 |         typer.Option(
45 |             "--page",
46 |             help="Page number",
47 |             min=1,
48 |         ),
49 |     ] = 1,
50 |     api_key: Annotated[
51 |         str | None,
52 |         typer.Option(
53 |             "--api-key",
54 |             help="NCI API key (overrides NCI_API_KEY env var)",
55 |             envvar="NCI_API_KEY",
56 |         ),
57 |     ] = None,
58 | ) -> None:
59 |     """
60 |     Search for biomarkers used in clinical trial eligibility criteria.
61 | 
62 |     Note: Biomarker data availability may be limited in CTRP. Results focus on
63 |     biomarkers referenced in trial eligibility criteria. For detailed variant
64 |     annotations, use 'biomcp variant search' with MyVariant.info.
65 | 
66 |     Examples:
67 |         # Search by biomarker name
68 |         biomcp biomarker search "PD-L1"
69 | 
70 |         # Search by type
71 |         biomcp biomarker search --type reference_gene
72 | 
73 |         # Search for specific biomarker
74 |         biomcp biomarker search "EGFR mutation"
75 |     """
76 |     try:
77 |         results = asyncio.run(
78 |             search_biomarkers(
79 |                 name=name,
80 |                 biomarker_type=biomarker_type,
81 |                 page_size=page_size,
82 |                 page=page,
83 |                 api_key=api_key,
84 |             )
85 |         )
86 | 
87 |         output = format_biomarker_results(results)
88 |         typer.echo(output)
89 | 
90 |     except CTSAPIError as e:
91 |         if "API key required" in str(e):
92 |             typer.echo(get_api_key_instructions())
93 |         else:
94 |             typer.echo(f"Error: {e}", err=True)
95 |         raise typer.Exit(1) from e
96 |     except Exception as e:
97 |         typer.echo(f"Unexpected error: {e}", err=True)
98 |         raise typer.Exit(1) from e
99 | 
```

--------------------------------------------------------------------------------
/src/biomcp/openfda/constants.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | Constants for OpenFDA API integration.
 3 | """
 4 | 
 5 | # OpenFDA API Base
 6 | OPENFDA_BASE_URL = "https://api.fda.gov"
 7 | 
 8 | # Drug endpoints
 9 | OPENFDA_DRUG_EVENTS_URL = f"{OPENFDA_BASE_URL}/drug/event.json"
10 | OPENFDA_DRUG_LABELS_URL = f"{OPENFDA_BASE_URL}/drug/label.json"
11 | OPENFDA_DRUG_ENFORCEMENT_URL = f"{OPENFDA_BASE_URL}/drug/enforcement.json"
12 | OPENFDA_DRUGSFDA_URL = f"{OPENFDA_BASE_URL}/drug/drugsfda.json"
13 | 
14 | # Device endpoints
15 | OPENFDA_DEVICE_EVENTS_URL = f"{OPENFDA_BASE_URL}/device/event.json"
16 | OPENFDA_DEVICE_CLASSIFICATION_URL = (
17 |     f"{OPENFDA_BASE_URL}/device/classification.json"
18 | )
19 | OPENFDA_DEVICE_RECALL_URL = f"{OPENFDA_BASE_URL}/device/recall.json"
20 | 
21 | # API limits
22 | OPENFDA_DEFAULT_LIMIT = 25
23 | OPENFDA_MAX_LIMIT = 100
24 | OPENFDA_RATE_LIMIT_NO_KEY = 40  # requests per minute without key
25 | OPENFDA_RATE_LIMIT_WITH_KEY = 240  # requests per minute with key
26 | 
27 | # Genomic device filters - product codes for genomic/diagnostic devices
28 | GENOMIC_DEVICE_PRODUCT_CODES = [
29 |     "OOI",  # Next Generation Sequencing Oncology Panel Test System
30 |     "PQP",  # Nucleic Acid Based In Vitro Diagnostic Devices
31 |     "OYD",  # Gene Mutation Detection System
32 |     "NYE",  # DNA Sequencer
33 |     "OEO",  # Hereditary or Somatic Variant Detection System
34 |     "QIN",  # Tumor Profiling Test
35 |     "QDI",  # Companion Diagnostic
36 |     "PTA",  # Cancer Predisposition Risk Assessment System
37 | ]
38 | 
39 | # Common adverse event search fields
40 | ADVERSE_EVENT_FIELDS = [
41 |     "patient.drug.medicinalproduct",
42 |     "patient.drug.openfda.brand_name",
43 |     "patient.drug.openfda.generic_name",
44 |     "patient.drug.drugindication",
45 |     "patient.reaction.reactionmeddrapt",
46 | ]
47 | 
48 | # Label search fields
49 | LABEL_FIELDS = [
50 |     "openfda.brand_name",
51 |     "openfda.generic_name",
52 |     "indications_and_usage",
53 |     "boxed_warning",
54 |     "warnings_and_precautions",
55 |     "adverse_reactions",
56 |     "drug_interactions",
57 | ]
58 | 
59 | # Device event search fields
60 | DEVICE_FIELDS = [
61 |     "device.brand_name",
62 |     "device.generic_name",
63 |     "device.manufacturer_d_name",
64 |     "device.openfda.device_name",
65 |     "device.openfda.medical_specialty_description",
66 | ]
67 | 
68 | # Disclaimer text
69 | OPENFDA_DISCLAIMER = (
70 |     "⚠️ **FDA Data Notice**: Information from openFDA API. "
71 |     "Not for clinical decision-making. Adverse events don't prove causation. "
72 |     "Data may be incomplete or delayed. Consult healthcare professionals and "
73 |     "official FDA sources at fda.gov for medical decisions."
74 | )
75 | 
76 | OPENFDA_SHORTAGE_DISCLAIMER = (
77 |     "🚨 **Critical Warning**: Drug shortage information is time-sensitive. "
78 |     "Always verify current availability with FDA Drug Shortages Database at "
79 |     "https://www.accessdata.fda.gov/scripts/drugshortages/ before making "
80 |     "supply chain or treatment decisions."
81 | )
82 | 
```

--------------------------------------------------------------------------------
/tests/tdd/utils/test_rate_limiter.py:
--------------------------------------------------------------------------------

```python
 1 | """Tests for rate limiting utilities."""
 2 | 
 3 | import asyncio
 4 | import time
 5 | 
 6 | import pytest
 7 | 
 8 | from biomcp.utils.rate_limiter import RateLimiter
 9 | 
10 | 
11 | class TestRateLimiter:
12 |     """Test rate limiting functionality."""
13 | 
14 |     @pytest.mark.asyncio
15 |     async def test_basic_rate_limiting(self):
16 |         """Test basic rate limiting behavior."""
17 |         # Create limiter with 2 requests per second
18 |         limiter = RateLimiter(rate=2, per_seconds=1)
19 | 
20 |         # First two requests should be allowed
21 |         allowed1, wait1 = await limiter.check_rate_limit()
22 |         assert allowed1 is True
23 |         assert wait1 is None
24 | 
25 |         allowed2, wait2 = await limiter.check_rate_limit()
26 |         assert allowed2 is True
27 |         assert wait2 is None
28 | 
29 |         # Third request should be denied with wait time
30 |         allowed3, wait3 = await limiter.check_rate_limit()
31 |         assert allowed3 is False
32 |         assert wait3 is not None
33 |         assert wait3 > 0
34 | 
35 |     @pytest.mark.asyncio
36 |     async def test_rate_limit_replenishment(self):
37 |         """Test that tokens replenish over time."""
38 |         # Create limiter with 1 request per second
39 |         limiter = RateLimiter(rate=1, per_seconds=1)
40 | 
41 |         # Use the token
42 |         allowed1, _ = await limiter.check_rate_limit()
43 |         assert allowed1 is True
44 | 
45 |         # Should be denied immediately
46 |         allowed2, wait2 = await limiter.check_rate_limit()
47 |         assert allowed2 is False
48 | 
49 |         # Wait for replenishment
50 |         await asyncio.sleep(1.1)
51 | 
52 |         # Should be allowed now
53 |         allowed3, _ = await limiter.check_rate_limit()
54 |         assert allowed3 is True
55 | 
56 |     @pytest.mark.asyncio
57 |     async def test_multiple_keys(self):
58 |         """Test rate limiting with different keys."""
59 |         limiter = RateLimiter(rate=1, per_seconds=1)
60 | 
61 |         # Use token for key1
62 |         allowed1, _ = await limiter.check_rate_limit("key1")
63 |         assert allowed1 is True
64 | 
65 |         # key2 should still have tokens
66 |         allowed2, _ = await limiter.check_rate_limit("key2")
67 |         assert allowed2 is True
68 | 
69 |         # key1 should be limited
70 |         allowed3, wait3 = await limiter.check_rate_limit("key1")
71 |         assert allowed3 is False
72 |         assert wait3 is not None
73 | 
74 |     @pytest.mark.asyncio
75 |     async def test_wait_if_needed(self):
76 |         """Test the wait_if_needed helper."""
77 |         limiter = RateLimiter(rate=1, per_seconds=1)
78 | 
79 |         # First call should not wait
80 |         start = time.time()
81 |         await limiter.wait_if_needed()
82 |         elapsed = time.time() - start
83 |         assert elapsed < 0.1
84 | 
85 |         # Second call should wait
86 |         start = time.time()
87 |         await limiter.wait_if_needed()
88 |         elapsed = time.time() - start
89 |         assert elapsed >= 0.9  # Should wait approximately 1 second
90 | 
```

--------------------------------------------------------------------------------
/src/biomcp/utils/metrics.py:
--------------------------------------------------------------------------------

```python
 1 | """Metrics and monitoring utilities."""
 2 | 
 3 | import asyncio
 4 | import logging
 5 | import time
 6 | from collections.abc import Callable
 7 | from functools import wraps
 8 | from typing import Any, TypeVar, cast
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | T = TypeVar("T")
13 | 
14 | 
15 | def track_api_call(api_name: str):
16 |     """Track API call metrics.
17 | 
18 |     Args:
19 |         api_name: Name of the API being called
20 | 
21 |     Returns:
22 |         Decorator function
23 |     """
24 | 
25 |     def decorator(func: Callable[..., T]) -> Callable[..., T]:
26 |         @wraps(func)
27 |         async def async_wrapper(*args: Any, **kwargs: Any) -> T:
28 |             start_time = time.time()
29 |             try:
30 |                 result = await func(*args, **kwargs)  # type: ignore[misc]
31 |                 duration = time.time() - start_time
32 |                 logger.info(
33 |                     f"{api_name} call succeeded",
34 |                     extra={
35 |                         "api": api_name,
36 |                         "duration": duration,
37 |                         "status": "success",
38 |                     },
39 |                 )
40 |                 return result
41 |             except Exception as e:
42 |                 duration = time.time() - start_time
43 |                 logger.error(
44 |                     f"{api_name} call failed: {e}",
45 |                     extra={
46 |                         "api": api_name,
47 |                         "duration": duration,
48 |                         "status": "error",
49 |                         "error_type": type(e).__name__,
50 |                     },
51 |                 )
52 |                 raise
53 | 
54 |         @wraps(func)
55 |         def sync_wrapper(*args: Any, **kwargs: Any) -> T:
56 |             start_time = time.time()
57 |             try:
58 |                 result = func(*args, **kwargs)
59 |                 duration = time.time() - start_time
60 |                 logger.info(
61 |                     f"{api_name} call succeeded",
62 |                     extra={
63 |                         "api": api_name,
64 |                         "duration": duration,
65 |                         "status": "success",
66 |                     },
67 |                 )
68 |                 return result
69 |             except Exception as e:
70 |                 duration = time.time() - start_time
71 |                 logger.error(
72 |                     f"{api_name} call failed: {e}",
73 |                     extra={
74 |                         "api": api_name,
75 |                         "duration": duration,
76 |                         "status": "error",
77 |                         "error_type": type(e).__name__,
78 |                     },
79 |                 )
80 |                 raise
81 | 
82 |         # Return appropriate wrapper based on function type
83 |         if asyncio.iscoroutinefunction(func):
84 |             return cast(Callable[..., T], async_wrapper)
85 |         else:
86 |             return cast(Callable[..., T], sync_wrapper)
87 | 
88 |     return decorator
89 | 
```

--------------------------------------------------------------------------------
/tests/tdd/trials/test_getter.py:
--------------------------------------------------------------------------------

```python
 1 | from biomcp.trials.getter import Module, get_trial, modules
 2 | 
 3 | 
 4 | async def test_get_protocol(anyio_backend):
 5 |     markdown = await get_trial("NCT04280705", Module.PROTOCOL)
 6 |     assert markdown.startswith("Url: https://clinicaltrials.gov/study/")
 7 |     assert len(markdown) > 10000  # 10370 on 2025-03-23
 8 | 
 9 | 
10 | async def test_get_locations(anyio_backend):
11 |     markdown = await get_trial("NCT04280705", Module.LOCATIONS)
12 |     starts_with = """Url: https://clinicaltrials.gov/study/NCT04280705
13 | 
14 | # Protocol Section
15 | """
16 |     assert markdown.startswith(starts_with)
17 |     assert "University of California San Francisco" in markdown
18 |     assert len(markdown) > 12000  # 12295 on 2025-03-23
19 | 
20 | 
21 | async def test_get_references(anyio_backend):
22 |     markdown = await get_trial("NCT04280705", Module.REFERENCES)
23 |     assert "# Protocol Section" in markdown
24 |     assert "## References Module" in markdown
25 |     assert len(markdown) > 0
26 | 
27 | 
28 | async def test_get_outcomes(anyio_backend):
29 |     markdown = await get_trial("NCT04280705", Module.OUTCOMES)
30 |     assert "# Protocol Section" in markdown
31 |     assert (
32 |         "## Outcomes Module" in markdown or "## Results Sections" in markdown
33 |     )
34 |     assert len(markdown) > 0
35 | 
36 | 
37 | async def test_invalid_nct_id(anyio_backend):
38 |     markdown = await get_trial("NCT99999999")
39 |     assert "NCT number NCT99999999 not found" in markdown
40 | 
41 | 
42 | def test_all_modules_exist():
43 |     # Verify all modules are defined
44 |     assert "Protocol" in modules
45 |     assert "Locations" in modules
46 |     assert "References" in modules
47 |     assert "Outcomes" in modules
48 | 
49 |     # Verify protocol module contains critical sections
50 |     protocol_sections = modules[Module.PROTOCOL]
51 |     assert "IdentificationModule" in protocol_sections
52 |     assert "StatusModule" in protocol_sections
53 |     assert "DescriptionModule" in protocol_sections
54 | 
55 | 
56 | async def test_cli_default_module_functionality(anyio_backend):
57 |     # Test directly with both explicit Protocol and None (which should use Protocol)
58 |     markdown_with_protocol = await get_trial("NCT04280705", Module.PROTOCOL)
59 |     assert len(markdown_with_protocol) > 10000
60 | 
61 |     # In a real CLI context, the default would be set at the CLI level
62 |     # This test ensures the Protocol module is valid for that purpose
63 |     assert "Protocol Section" in markdown_with_protocol
64 | 
65 | 
66 | async def test_json_output(anyio_backend):
67 |     # Test JSON output format
68 |     json_output = await get_trial(
69 |         "NCT04280705", Module.PROTOCOL, output_json=True
70 |     )
71 |     assert json_output.startswith("{")
72 |     assert "URL" in json_output
73 |     assert "NCT04280705" in json_output
74 | 
75 | 
76 | async def test_error_handling_json_output(anyio_backend):
77 |     # Test error handling with JSON output
78 |     json_output = await get_trial(
79 |         "NCT99999999", Module.PROTOCOL, output_json=True
80 |     )
81 |     assert "error" in json_output
82 |     assert "NCT99999999" in json_output
83 | 
```

--------------------------------------------------------------------------------
/wrangler.toml:
--------------------------------------------------------------------------------

```toml
 1 | name = "biomcp-worker"
 2 | main = "src/biomcp/workers/worker_entry_stytch.js"
 3 | compatibility_date = "2025-04-28"
 4 | 
 5 | [vars]
 6 | # Environment variables for the worker
 7 | # These can be overridden in several ways:
 8 | # 1. In the Cloudflare dashboard under Workers & Pages > your-worker > Settings > Variables
 9 | # 2. Using wrangler CLI: wrangler secret put REMOTE_MCP_SERVER_URL
10 | # 3. During local development: wrangler dev --var REMOTE_MCP_SERVER_URL="http://localhost:8000"
11 | # 4. In your CI/CD pipeline using environment variables with the format CF_REMOTE_MCP_SERVER_URL
12 | REMOTE_MCP_SERVER_URL = "http://localhost:8000"  # Replace with your MCP server URL in production
13 | 
14 | # Stytch OAuth Configuration
15 | # Replace these placeholder values with your actual Stytch credentials
16 | # For development, use test credentials from https://stytch.com/dashboard
17 | # For production, use production credentials and api.stytch.com instead of test.stytch.com
18 | STYTCH_PROJECT_ID      = "project-test-xxxxxxxxxxxx"  # Replace with your Stytch Project ID
19 | STYTCH_SECRET          = "secret-test-xxxxxxxxxxxx"   # Replace with your Stytch Secret (use wrangler secret for production)
20 | STYTCH_PUBLIC_TOKEN    = "public-token-test-xxxxxxxxxxxx"  # Replace with your Stytch Public Token
21 | STYTCH_API_URL = "https://test.stytch.com/v1"  # Use https://api.stytch.com/v1 for production
22 | STYTCH_OAUTH_URL = "https://test.stytch.com/v1/public/oauth/google/start"  # Update for production
23 | 
24 | # Debug mode - set to true for development, false for production
25 | DEBUG = false
26 | 
27 | # JWT Secret for signing tokens - use a strong, unique secret in production
28 | # For production, set this as a secret: wrangler secret put JWT_SECRET
29 | JWT_SECRET = "replace-with-a-strong-secret-key"
30 | 
31 | # BigQuery variables
32 | # For production, set these as secrets or environment variables:
33 | # wrangler secret put BQ_PROJECT_ID
34 | # wrangler secret put BQ_DATASET
35 | # wrangler secret put BQ_SA_KEY_JSON
36 | BQ_PROJECT_ID = "your-project-id"  # Replace with your actual project ID in production
37 | BQ_DATASET = "your_dataset_name"   # Replace with your actual dataset in production
38 | BQ_TABLE="worker_logs"
39 | 
40 | # Sensitive variables should be stored in the Cloudflare dashboard under Workers & Pages > your-worker > Settings > Secrets
41 | # OR you can declare them using npx wrangler secret put BQ_SA_KEY_JSON
42 | # Potential secrets:
43 | # BQ_SA_KEY_JSON
44 | # STYTCH_SECRET
45 | 
46 | 
47 | # Note: The ability to allow plaintext connections is now configured in the Cloudflare dashboard
48 | # under Security settings for your Worker
49 | 
50 | [build]
51 | command = ""
52 | 
53 | [triggers]
54 | crons = []
55 | 
56 | [observability.logs]
57 | enabled = true
58 | 
59 | # KV namespace for storing OAuth tokens and state
60 | # Create your KV namespace with: wrangler kv:namespace create OAUTH_KV
61 | # Then replace the ID below with your namespace ID
62 | [[kv_namespaces]]
63 | binding = "OAUTH_KV"
64 | id = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"  # Replace with your KV namespace ID
65 | 
```

--------------------------------------------------------------------------------
/docs/concepts/01-what-is-biomcp.md:
--------------------------------------------------------------------------------

```markdown
 1 | # What is BioMCP?
 2 | 
 3 | BioMCP is an open-source implementation of the Model Context Protocol (MCP) designed for biomedical research. It connects AI assistants to specialized biomedical databases, enabling natural language access to complex scientific data.
 4 | 
 5 | [![Watch: What is BioMCP?](../blog/images/what_is_biomcp_thumbnail.png)](https://www.youtube.com/watch?v=bKxOWrWUUhM)
 6 | 
 7 | ## The Bridge to Biomedical Data
 8 | 
 9 | BioMCP provides AI assistants with direct access to specialized biomedical databases that aren't available through general web search. Built on Anthropic's Model Context Protocol standard, it creates a toolbox that enables natural language queries across multiple scientific data sources.
10 | 
11 | ## Connected Data Sources
12 | 
13 | - **PubMed/PubTator3**: 30M+ research articles with entity recognition for genes, diseases, drugs, and variants
14 | - **ClinicalTrials.gov**: 400K+ clinical trials searchable by condition, location, phase, and eligibility
15 | - **MyVariant.info**: Comprehensive variant annotations with clinical significance
16 | - **cBioPortal**: Cancer genomics data automatically integrated with searches
17 | - **BioThings APIs**: Real-time gene, drug, and disease information
18 | - **NCI CTS API**: Enhanced cancer trial search with biomarker filtering
19 | - **AlphaGenome**: Variant effect predictions using Google DeepMind's AI
20 | 
21 | ## How Does It Transform Research?
22 | 
23 | What makes BioMCP particularly powerful is its conversational nature. A
24 | researcher might begin with a simple question about a disease, then naturally
25 | progress to exploring related clinical trials, and finally investigate genetic
26 | variants that affect treatment efficacy—all within a single, flowing
27 | conversation.
28 | 
29 | The system remembers context throughout the interaction, allowing for natural
30 | follow-up questions and a research experience that mirrors how scientists
31 | actually work. Instead of requiring researchers to master complex query
32 | languages for each database, BioMCP translates natural language into the
33 | precise syntax each system requires.
34 | 
35 | ## Why This Matters
36 | 
37 | BioMCP represents a significant advancement in making specialized biomedical
38 | knowledge accessible. For researchers and clinicians, it means spending less
39 | time wrestling with complex database interfaces and more time advancing their
40 | work. For the broader field of AI in healthcare, it demonstrates how
41 | specialized knowledge domains can be made accessible through conversation.
42 | 
43 | As both AI assistants (synchronous conversation partners) and AI agents (
44 | autonomous systems working toward goals over time) continue to evolve, tools
45 | like BioMCP will be essential in connecting these systems to the specialized
46 | knowledge they need to deliver meaningful insights in complex domains.
47 | 
48 | By open-sourcing BioMCP, we're inviting the community to build upon this
49 | foundation, creating more powerful and accessible tools for biomedical research
50 | and ultimately accelerating the pace of scientific discovery.
51 | 
```

--------------------------------------------------------------------------------
/tests/tdd/variants/test_alphagenome.py:
--------------------------------------------------------------------------------

```python
 1 | """Tests for AlphaGenome integration."""
 2 | 
 3 | from unittest.mock import patch
 4 | 
 5 | import pytest
 6 | 
 7 | from biomcp.variants.alphagenome import predict_variant_effects
 8 | 
 9 | 
10 | @pytest.mark.asyncio
11 | async def test_predict_variant_effects_no_api_key():
12 |     """Test that missing API key returns helpful error message."""
13 |     with patch.dict("os.environ", {}, clear=True):
14 |         result = await predict_variant_effects(
15 |             chromosome="chr7",
16 |             position=140753336,
17 |             reference="A",
18 |             alternate="T",
19 |         )
20 | 
21 |         assert "AlphaGenome API key required" in result
22 |         assert "https://deepmind.google.com/science/alphagenome" in result
23 |         assert "ALPHAGENOME_API_KEY" in result
24 | 
25 | 
26 | @pytest.mark.asyncio
27 | async def test_predict_variant_effects_not_installed():
28 |     """Test that missing AlphaGenome package returns installation instructions or API error."""
29 |     # Since AlphaGenome might be installed in test environments, we need to test both cases
30 |     # We'll set a dummy API key and check what error we get
31 |     import os
32 | 
33 |     original_key = os.environ.get("ALPHAGENOME_API_KEY")
34 |     try:
35 |         os.environ["ALPHAGENOME_API_KEY"] = "test-key"
36 | 
37 |         result = await predict_variant_effects(
38 |             chromosome="chr7",
39 |             position=140753336,
40 |             reference="A",
41 |             alternate="T",
42 |             skip_cache=True,  # Skip cache to ensure fresh results
43 |         )
44 | 
45 |         # The function should either:
46 |         # 1. Handle ImportError if AlphaGenome is not installed
47 |         # 2. Return API error if AlphaGenome is installed but API key is invalid
48 |         # 3. Return a prediction failure for other errors
49 |         assert any([
50 |             "AlphaGenome not installed" in result,
51 |             "AlphaGenome prediction failed" in result,
52 |             "API key not valid"
53 |             in result,  # This can happen with invalid test keys
54 |         ])
55 | 
56 |         if "AlphaGenome not installed" in result:
57 |             assert "git clone" in result
58 |             assert "pip install" in result
59 |     finally:
60 |         # Restore original key
61 |         if original_key is None:
62 |             os.environ.pop("ALPHAGENOME_API_KEY", None)
63 |         else:
64 |             os.environ["ALPHAGENOME_API_KEY"] = original_key
65 | 
66 | 
67 | @pytest.mark.asyncio
68 | async def test_predict_variant_effects_basic_parameters():
69 |     """Test that function accepts the expected parameters."""
70 |     # This tests the function interface without requiring AlphaGenome
71 |     with patch.dict("os.environ", {}, clear=True):
72 |         # Test with all parameters
73 |         result = await predict_variant_effects(
74 |             chromosome="chrX",
75 |             position=12345,
76 |             reference="G",
77 |             alternate="C",
78 |             interval_size=500_000,
79 |             tissue_types=["UBERON:0002367", "UBERON:0001157"],
80 |         )
81 | 
82 |         # Should get API key error (not import error), proving parameters were accepted
83 |         assert "AlphaGenome API key required" in result
84 | 
```

--------------------------------------------------------------------------------
/example_scripts/mcp_integration.py:
--------------------------------------------------------------------------------

```python
 1 | #!/usr/bin/env -S uv --quiet run --script
 2 | # /// script
 3 | # requires-python = ">=3.11"
 4 | # dependencies = [
 5 | #     "mcp",
 6 | # ]
 7 | # ///
 8 | 
 9 | # Scripts to reproduce this page:
10 | # https://biomcp.org/mcp_integration/
11 | 
12 | import asyncio
13 | 
14 | from mcp.client.session import ClientSession
15 | from mcp.client.stdio import StdioServerParameters, stdio_client
16 | from mcp.types import TextContent
17 | 
18 | 
19 | async def check_server():
20 |     # Run with pypi package using `uv` not `uvx`
21 |     server_params = StdioServerParameters(
22 |         command="uv",
23 |         args=["run", "--with", "biomcp-python", "biomcp", "run"],
24 |     )
25 |     #
26 |     # Run with local code
27 |     # server_params = StdioServerParameters(
28 |     #     command="python",
29 |     #     args=["-m", "biomcp", "run"],
30 |     # )
31 | 
32 |     async with (
33 |         stdio_client(server_params) as (read, write),
34 |         ClientSession(read, write) as session,
35 |     ):
36 |         await session.initialize()
37 | 
38 |         # list prompts
39 |         prompts = await session.list_prompts()
40 |         print("Available prompts:", prompts)
41 | 
42 |         # list resources
43 |         resources = await session.list_resources()
44 |         print("Available resources:", resources)
45 | 
46 |         # list tools
47 |         tool_result = await session.list_tools()
48 |         tools = tool_result.tools
49 |         print("Available tools:", tools)
50 |         assert len(tools) == 13  # 3 core tools + 10 individual tools
51 | 
52 |         # IMPORTANT: Always use think tool first!
53 |         think_result = await session.call_tool(
54 |             "think",
55 |             {
56 |                 "thought": "Planning to analyze variant rs113488022 for BRAF gene...",
57 |                 "thoughtNumber": 1,
58 |                 "totalThoughts": 2,
59 |                 "nextThoughtNeeded": True,
60 |             },
61 |         )
62 |         assert (
63 |             think_result.isError is False
64 |         ), f"Think error: {think_result.content}"
65 | 
66 |         # Now fetch variant details using unified fetch tool
67 |         tool_name = "fetch"
68 |         tool_args = {"domain": "variant", "id_": "rs113488022"}
69 |         result = await session.call_tool(tool_name, tool_args)
70 |         assert result.isError is False, f"Error: {result.content}"
71 | 
72 |         # --- Assertions ---
73 |         # 1. Check the call was successful (not an error)
74 |         assert (
75 |             result.isError is False
76 |         ), f"Tool call resulted in error: {result.content}"
77 | 
78 |         # 2. Check there is content
79 |         assert result.content is not None
80 |         assert len(result.content) >= 1
81 | 
82 |         # 3. Check the type of the first content block
83 |         content_block = result.content[0]
84 |         assert isinstance(content_block, TextContent)
85 | 
86 |         markdown_output = content_block.text
87 |         # print(markdown_output)
88 |         assert isinstance(markdown_output, str)
89 |         assert "rs113488022" in markdown_output
90 |         assert "BRAF" in markdown_output
91 |         assert "Pathogenic" in markdown_output
92 |         print(f"Successfully called tool '{tool_name}' with args {tool_args}")
93 | 
94 | 
95 | if __name__ == "__main__":
96 |     asyncio.run(check_server())
97 | 
```

--------------------------------------------------------------------------------
/src/biomcp/variants/cancer_types.py:
--------------------------------------------------------------------------------

```python
  1 | """Cancer type configuration for gene-specific studies."""
  2 | 
  3 | # Gene to cancer type keyword mapping
  4 | # These keywords are used to filter relevant studies from cBioPortal
  5 | GENE_CANCER_KEYWORDS = {
  6 |     "BRAF": [
  7 |         "skcm",  # melanoma
  8 |         "thca",  # thyroid
  9 |         "coad",  # colorectal
 10 |         "lung",
 11 |         "glioma",  # brain
 12 |         "hairy_cell",  # hairy cell leukemia
 13 |     ],
 14 |     "KRAS": [
 15 |         "coad",  # colorectal
 16 |         "paad",  # pancreatic
 17 |         "lung",
 18 |         "stad",  # stomach
 19 |         "coadread",  # colorectal adenocarcinoma
 20 |         "ampca",  # ampullary carcinoma
 21 |     ],
 22 |     "TP53": [
 23 |         "brca",  # breast
 24 |         "ov",  # ovarian
 25 |         "lung",
 26 |         "hnsc",  # head/neck
 27 |         "lgg",  # lower grade glioma
 28 |         "gbm",  # glioblastoma
 29 |         "blca",  # bladder
 30 |         "lihc",  # liver
 31 |     ],
 32 |     "EGFR": [
 33 |         "lung",
 34 |         "nsclc",  # non-small cell lung cancer
 35 |         "gbm",  # glioblastoma
 36 |         "hnsc",  # head/neck
 37 |     ],
 38 |     "PIK3CA": [
 39 |         "brca",  # breast
 40 |         "hnsc",  # head/neck
 41 |         "coad",  # colorectal
 42 |         "ucec",  # endometrial
 43 |     ],
 44 |     "PTEN": [
 45 |         "prad",  # prostate
 46 |         "gbm",  # glioblastoma
 47 |         "ucec",  # endometrial
 48 |         "brca",  # breast
 49 |     ],
 50 |     "APC": [
 51 |         "coad",  # colorectal
 52 |         "coadread",
 53 |         "stad",  # stomach
 54 |     ],
 55 |     "VHL": [
 56 |         "rcc",  # renal cell carcinoma
 57 |         "ccrcc",  # clear cell RCC
 58 |         "kirc",  # kidney clear cell
 59 |     ],
 60 |     "RB1": [
 61 |         "rbl",  # retinoblastoma
 62 |         "sclc",  # small cell lung cancer
 63 |         "blca",  # bladder
 64 |     ],
 65 |     "BRCA1": [
 66 |         "brca",  # breast
 67 |         "ov",  # ovarian
 68 |         "prad",  # prostate
 69 |         "paad",  # pancreatic
 70 |     ],
 71 |     "BRCA2": [
 72 |         "brca",  # breast
 73 |         "ov",  # ovarian
 74 |         "prad",  # prostate
 75 |         "paad",  # pancreatic
 76 |     ],
 77 |     "ALK": [
 78 |         "lung",
 79 |         "nsclc",  # non-small cell lung cancer
 80 |         "alcl",  # anaplastic large cell lymphoma
 81 |         "nbl",  # neuroblastoma
 82 |     ],
 83 |     "MYC": [
 84 |         "burkitt",  # Burkitt lymphoma
 85 |         "dlbcl",  # diffuse large B-cell lymphoma
 86 |         "mm",  # multiple myeloma
 87 |         "nbl",  # neuroblastoma
 88 |     ],
 89 |     "NRAS": [
 90 |         "mel",  # melanoma
 91 |         "skcm",
 92 |         "thca",  # thyroid
 93 |         "aml",  # acute myeloid leukemia
 94 |     ],
 95 |     "KIT": [
 96 |         "gist",  # gastrointestinal stromal tumor
 97 |         "mel",  # melanoma
 98 |         "aml",  # acute myeloid leukemia
 99 |     ],
100 | }
101 | 
102 | # Default keywords for genes not in the mapping
103 | DEFAULT_CANCER_KEYWORDS = ["msk", "tcga", "metabric", "dfci", "broad"]
104 | 
105 | # Maximum number of studies to query per gene
106 | MAX_STUDIES_PER_GENE = 20
107 | 
108 | # Maximum mutations to process per study
109 | MAX_MUTATIONS_PER_STUDY = 5000
110 | 
111 | 
112 | def get_cancer_keywords(gene: str) -> list[str]:
113 |     """Get cancer type keywords for a given gene.
114 | 
115 |     Args:
116 |         gene: Gene symbol (e.g., "BRAF")
117 | 
118 |     Returns:
119 |         List of cancer type keywords to search for
120 |     """
121 |     return GENE_CANCER_KEYWORDS.get(gene.upper(), DEFAULT_CANCER_KEYWORDS)
122 | 
```

--------------------------------------------------------------------------------
/src/biomcp/cli/main.py:
--------------------------------------------------------------------------------

```python
  1 | import importlib.metadata
  2 | from typing import Annotated
  3 | 
  4 | import typer
  5 | 
  6 | from .articles import article_app
  7 | from .biomarkers import biomarker_app
  8 | from .diseases import disease_app
  9 | from .health import health_app
 10 | from .interventions import intervention_app
 11 | from .openfda import openfda_app
 12 | from .organizations import organization_app
 13 | from .server import run_server
 14 | from .trials import trial_app
 15 | from .variants import variant_app
 16 | 
 17 | # --- Get version from installed package metadata ---
 18 | try:
 19 |     __version__ = importlib.metadata.version("biomcp-python")
 20 | except importlib.metadata.PackageNotFoundError:
 21 |     __version__ = "unknown"  # Fallback if package not installed properly
 22 | 
 23 | 
 24 | # --- Callback for --version option ---
 25 | def version_callback(value: bool):
 26 |     if value:
 27 |         typer.echo(f"biomcp version: {__version__}")
 28 |         raise typer.Exit()
 29 | 
 30 | 
 31 | # --- Main Typer App ---
 32 | app = typer.Typer(
 33 |     help="BioMCP: Biomedical Model Context Protocol",
 34 |     no_args_is_help=True,
 35 |     # Add a callback to handle top-level options like --version
 36 |     # This callback itself doesn't do much, but allows defining eager options
 37 |     callback=lambda: None,
 38 | )
 39 | 
 40 | app.add_typer(
 41 |     trial_app,
 42 |     name="trial",
 43 |     no_args_is_help=True,
 44 | )
 45 | 
 46 | app.add_typer(
 47 |     article_app,
 48 |     name="article",
 49 |     no_args_is_help=True,
 50 | )
 51 | 
 52 | app.add_typer(
 53 |     variant_app,
 54 |     name="variant",
 55 |     no_args_is_help=True,
 56 | )
 57 | 
 58 | app.add_typer(
 59 |     health_app,
 60 |     name="health",
 61 |     no_args_is_help=True,
 62 | )
 63 | 
 64 | app.add_typer(
 65 |     organization_app,
 66 |     name="organization",
 67 |     no_args_is_help=True,
 68 | )
 69 | 
 70 | app.add_typer(
 71 |     intervention_app,
 72 |     name="intervention",
 73 |     no_args_is_help=True,
 74 | )
 75 | 
 76 | app.add_typer(
 77 |     biomarker_app,
 78 |     name="biomarker",
 79 |     no_args_is_help=True,
 80 | )
 81 | 
 82 | app.add_typer(
 83 |     disease_app,
 84 |     name="disease",
 85 |     no_args_is_help=True,
 86 | )
 87 | 
 88 | app.add_typer(
 89 |     openfda_app,
 90 |     name="openfda",
 91 |     no_args_is_help=True,
 92 | )
 93 | 
 94 | 
 95 | # --- Add --version Option using Annotation ---
 96 | # We add this directly to the app's callback invocation signature via annotation
 97 | # Note: This relies on Typer magic linking Annotated options in the callback signature
 98 | # This approach is cleaner than adding it to every subcommand.
 99 | @app.callback()
100 | def main_callback(
101 |     version: Annotated[
102 |         bool | None,  # Allows the option to not be present
103 |         typer.Option(
104 |             "--version",  # The flag name
105 |             callback=version_callback,  # Function to call when flag is used
106 |             is_eager=True,  # Process this option before any commands
107 |             help="Show the application's version and exit.",
108 |         ),
109 |     ] = None,  # Default value
110 | ):
111 |     """
112 |     BioMCP main application callback. Handles global options like --version.
113 |     """
114 |     # The actual logic is in version_callback due to is_eager=True
115 |     pass
116 | 
117 | 
118 | # --- Add Explicit 'version' Command ---
119 | @app.command()
120 | def version():
121 |     """
122 |     Display the installed biomcp version.
123 |     """
124 |     typer.echo(f"biomcp version: {__version__}")
125 | 
126 | 
127 | # Directly expose run_server as the 'run' command with all its options
128 | app.command("run")(run_server)
129 | 
130 | 
131 | if __name__ == "__main__":
132 |     app()
133 | 
```

--------------------------------------------------------------------------------
/src/biomcp/openfda/drug_shortages_helpers.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Helper functions for drug shortage search to reduce complexity.
  3 | """
  4 | 
  5 | from datetime import datetime
  6 | from typing import Any
  7 | 
  8 | 
  9 | def matches_drug_filter(shortage: dict[str, Any], drug: str | None) -> bool:
 10 |     """Check if shortage matches drug name filter."""
 11 |     if not drug:
 12 |         return True
 13 | 
 14 |     drug_lower = drug.lower()
 15 |     generic = shortage.get("generic_name", "").lower()
 16 |     brands = [b.lower() for b in shortage.get("brand_names", [])]
 17 | 
 18 |     return drug_lower in generic or any(drug_lower in b for b in brands)
 19 | 
 20 | 
 21 | def matches_status_filter(
 22 |     shortage: dict[str, Any], status: str | None
 23 | ) -> bool:
 24 |     """Check if shortage matches status filter."""
 25 |     if not status:
 26 |         return True
 27 | 
 28 |     status_lower = status.lower()
 29 |     shortage_status = shortage.get("status", "").lower()
 30 | 
 31 |     if status_lower == "current":
 32 |         return "current" in shortage_status
 33 |     elif status_lower == "resolved":
 34 |         return "resolved" in shortage_status
 35 | 
 36 |     return False
 37 | 
 38 | 
 39 | def matches_category_filter(
 40 |     shortage: dict[str, Any], therapeutic_category: str | None
 41 | ) -> bool:
 42 |     """Check if shortage matches therapeutic category filter."""
 43 |     if not therapeutic_category:
 44 |         return True
 45 | 
 46 |     cat_lower = therapeutic_category.lower()
 47 |     shortage_cat = shortage.get("therapeutic_category", "").lower()
 48 | 
 49 |     return cat_lower in shortage_cat
 50 | 
 51 | 
 52 | def filter_shortages(
 53 |     shortages: list[dict[str, Any]],
 54 |     drug: str | None,
 55 |     status: str | None,
 56 |     therapeutic_category: str | None,
 57 | ) -> list[dict[str, Any]]:
 58 |     """Filter shortage list based on criteria."""
 59 |     filtered = []
 60 | 
 61 |     for shortage in shortages:
 62 |         if not matches_drug_filter(shortage, drug):
 63 |             continue
 64 |         if not matches_status_filter(shortage, status):
 65 |             continue
 66 |         if not matches_category_filter(shortage, therapeutic_category):
 67 |             continue
 68 | 
 69 |         filtered.append(shortage)
 70 | 
 71 |     return filtered
 72 | 
 73 | 
 74 | def format_shortage_search_header(
 75 |     drug: str | None,
 76 |     status: str | None,
 77 |     therapeutic_category: str | None,
 78 |     last_updated: str | None,
 79 | ) -> list[str]:
 80 |     """Format header for shortage search results."""
 81 |     output = []
 82 | 
 83 |     # Add last updated time
 84 |     if last_updated:
 85 |         try:
 86 |             updated_dt = datetime.fromisoformat(last_updated)
 87 |             output.append(
 88 |                 f"*Last Updated: {updated_dt.strftime('%Y-%m-%d %H:%M')}*\n"
 89 |             )
 90 |         except (ValueError, TypeError):
 91 |             pass
 92 | 
 93 |     if drug:
 94 |         output.append(f"**Drug**: {drug}")
 95 |     if status:
 96 |         output.append(f"**Status Filter**: {status}")
 97 |     if therapeutic_category:
 98 |         output.append(f"**Category**: {therapeutic_category}")
 99 | 
100 |     return output
101 | 
102 | 
103 | def format_cache_timestamp(data: dict[str, Any]) -> str | None:
104 |     """Format cache timestamp from data."""
105 |     last_updated = data.get("last_updated") or data.get("_fetched_at")
106 |     if not last_updated:
107 |         return None
108 | 
109 |     try:
110 |         updated_dt = datetime.fromisoformat(last_updated)
111 |         return f"*Data Updated: {updated_dt.strftime('%Y-%m-%d %H:%M')}*\n"
112 |     except (ValueError, TypeError):
113 |         return None
114 | 
```