This is page 1 of 15. Use http://codebase.md/genomoncology/biomcp?lines=false&page={x} to view the full context. # Directory Structure ``` ├── .github │ ├── actions │ │ └── setup-python-env │ │ └── action.yml │ ├── dependabot.yml │ └── workflows │ ├── ci.yml │ ├── deploy-docs.yml │ ├── main.yml.disabled │ ├── on-release-main.yml │ └── validate-codecov-config.yml ├── .gitignore ├── .pre-commit-config.yaml ├── BIOMCP_DATA_FLOW.md ├── CHANGELOG.md ├── CNAME ├── codecov.yaml ├── docker-compose.yml ├── Dockerfile ├── docs │ ├── apis │ │ ├── error-codes.md │ │ ├── overview.md │ │ └── python-sdk.md │ ├── assets │ │ ├── biomcp-cursor-locations.png │ │ ├── favicon.ico │ │ ├── icon.png │ │ ├── logo.png │ │ ├── mcp_architecture.txt │ │ └── remote-connection │ │ ├── 00_connectors.png │ │ ├── 01_add_custom_connector.png │ │ ├── 02_connector_enabled.png │ │ ├── 03_connect_to_biomcp.png │ │ ├── 04_select_google_oauth.png │ │ └── 05_success_connect.png │ ├── backend-services-reference │ │ ├── 01-overview.md │ │ ├── 02-biothings-suite.md │ │ ├── 03-cbioportal.md │ │ ├── 04-clinicaltrials-gov.md │ │ ├── 05-nci-cts-api.md │ │ ├── 06-pubtator3.md │ │ └── 07-alphagenome.md │ ├── blog │ │ ├── ai-assisted-clinical-trial-search-analysis.md │ │ ├── images │ │ │ ├── deep-researcher-video.png │ │ │ ├── researcher-announce.png │ │ │ ├── researcher-drop-down.png │ │ │ ├── researcher-prompt.png │ │ │ ├── trial-search-assistant.png │ │ │ └── what_is_biomcp_thumbnail.png │ │ └── researcher-persona-resource.md │ ├── changelog.md │ ├── CNAME │ ├── concepts │ │ ├── 01-what-is-biomcp.md │ │ ├── 02-the-deep-researcher-persona.md │ │ └── 03-sequential-thinking-with-the-think-tool.md │ ├── developer-guides │ │ ├── 01-server-deployment.md │ │ ├── 02-contributing-and-testing.md │ │ ├── 03-third-party-endpoints.md │ │ ├── 04-transport-protocol.md │ │ ├── 05-error-handling.md │ │ ├── 06-http-client-and-caching.md │ │ ├── 07-performance-optimizations.md │ │ └── generate_endpoints.py │ ├── faq-condensed.md │ ├── FDA_SECURITY.md │ ├── genomoncology.md │ ├── getting-started │ │ ├── 01-quickstart-cli.md │ │ ├── 02-claude-desktop-integration.md │ │ └── 03-authentication-and-api-keys.md │ ├── how-to-guides │ │ ├── 01-find-articles-and-cbioportal-data.md │ │ ├── 02-find-trials-with-nci-and-biothings.md │ │ ├── 03-get-comprehensive-variant-annotations.md │ │ ├── 04-predict-variant-effects-with-alphagenome.md │ │ ├── 05-logging-and-monitoring-with-bigquery.md │ │ └── 06-search-nci-organizations-and-interventions.md │ ├── index.md │ ├── policies.md │ ├── reference │ │ ├── architecture-diagrams.md │ │ ├── quick-architecture.md │ │ ├── quick-reference.md │ │ └── visual-architecture.md │ ├── robots.txt │ ├── stylesheets │ │ ├── announcement.css │ │ └── extra.css │ ├── troubleshooting.md │ ├── tutorials │ │ ├── biothings-prompts.md │ │ ├── claude-code-biomcp-alphagenome.md │ │ ├── nci-prompts.md │ │ ├── openfda-integration.md │ │ ├── openfda-prompts.md │ │ ├── pydantic-ai-integration.md │ │ └── remote-connection.md │ ├── user-guides │ │ ├── 01-command-line-interface.md │ │ ├── 02-mcp-tools-reference.md │ │ └── 03-integrating-with-ides-and-clients.md │ └── workflows │ └── all-workflows.md ├── example_scripts │ ├── mcp_integration.py │ └── python_sdk.py ├── glama.json ├── LICENSE ├── lzyank.toml ├── Makefile ├── mkdocs.yml ├── package-lock.json ├── package.json ├── pyproject.toml ├── README.md ├── scripts │ ├── check_docs_in_mkdocs.py │ ├── check_http_imports.py │ └── generate_endpoints_doc.py ├── smithery.yaml ├── src │ └── biomcp │ ├── __init__.py │ ├── __main__.py │ ├── articles │ │ ├── __init__.py │ │ ├── autocomplete.py │ │ ├── fetch.py │ │ ├── preprints.py │ │ ├── search_optimized.py │ │ ├── search.py │ │ └── unified.py │ ├── biomarkers │ │ ├── __init__.py │ │ └── search.py │ ├── cbioportal_helper.py │ ├── circuit_breaker.py │ ├── cli │ │ ├── __init__.py │ │ ├── articles.py │ │ ├── biomarkers.py │ │ ├── diseases.py │ │ ├── health.py │ │ ├── interventions.py │ │ ├── main.py │ │ ├── openfda.py │ │ ├── organizations.py │ │ ├── server.py │ │ ├── trials.py │ │ └── variants.py │ ├── connection_pool.py │ ├── constants.py │ ├── core.py │ ├── diseases │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── domain_handlers.py │ ├── drugs │ │ ├── __init__.py │ │ └── getter.py │ ├── exceptions.py │ ├── genes │ │ ├── __init__.py │ │ └── getter.py │ ├── http_client_simple.py │ ├── http_client.py │ ├── individual_tools.py │ ├── integrations │ │ ├── __init__.py │ │ ├── biothings_client.py │ │ └── cts_api.py │ ├── interventions │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── logging_filter.py │ ├── metrics_handler.py │ ├── metrics.py │ ├── openfda │ │ ├── __init__.py │ │ ├── adverse_events_helpers.py │ │ ├── adverse_events.py │ │ ├── cache.py │ │ ├── constants.py │ │ ├── device_events_helpers.py │ │ ├── device_events.py │ │ ├── drug_approvals.py │ │ ├── drug_labels_helpers.py │ │ ├── drug_labels.py │ │ ├── drug_recalls_helpers.py │ │ ├── drug_recalls.py │ │ ├── drug_shortages_detail_helpers.py │ │ ├── drug_shortages_helpers.py │ │ ├── drug_shortages.py │ │ ├── exceptions.py │ │ ├── input_validation.py │ │ ├── rate_limiter.py │ │ ├── utils.py │ │ └── validation.py │ ├── organizations │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── parameter_parser.py │ ├── prefetch.py │ ├── query_parser.py │ ├── query_router.py │ ├── rate_limiter.py │ ├── render.py │ ├── request_batcher.py │ ├── resources │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── instructions.md │ │ └── researcher.md │ ├── retry.py │ ├── router_handlers.py │ ├── router.py │ ├── shared_context.py │ ├── thinking │ │ ├── __init__.py │ │ ├── sequential.py │ │ └── session.py │ ├── thinking_tool.py │ ├── thinking_tracker.py │ ├── trials │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── nci_getter.py │ │ ├── nci_search.py │ │ └── search.py │ ├── utils │ │ ├── __init__.py │ │ ├── cancer_types_api.py │ │ ├── cbio_http_adapter.py │ │ ├── endpoint_registry.py │ │ ├── gene_validator.py │ │ ├── metrics.py │ │ ├── mutation_filter.py │ │ ├── query_utils.py │ │ ├── rate_limiter.py │ │ └── request_cache.py │ ├── variants │ │ ├── __init__.py │ │ ├── alphagenome.py │ │ ├── cancer_types.py │ │ ├── cbio_external_client.py │ │ ├── cbioportal_mutations.py │ │ ├── cbioportal_search_helpers.py │ │ ├── cbioportal_search.py │ │ ├── constants.py │ │ ├── external.py │ │ ├── filters.py │ │ ├── getter.py │ │ ├── links.py │ │ └── search.py │ └── workers │ ├── __init__.py │ ├── worker_entry_stytch.js │ ├── worker_entry.js │ └── worker.py ├── tests │ ├── bdd │ │ ├── cli_help │ │ │ ├── help.feature │ │ │ └── test_help.py │ │ ├── conftest.py │ │ ├── features │ │ │ └── alphagenome_integration.feature │ │ ├── fetch_articles │ │ │ ├── fetch.feature │ │ │ └── test_fetch.py │ │ ├── get_trials │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── get_variants │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── search_articles │ │ │ ├── autocomplete.feature │ │ │ ├── search.feature │ │ │ ├── test_autocomplete.py │ │ │ └── test_search.py │ │ ├── search_trials │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ ├── search_variants │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ └── steps │ │ └── test_alphagenome_steps.py │ ├── config │ │ └── test_smithery_config.py │ ├── conftest.py │ ├── data │ │ ├── ct_gov │ │ │ ├── clinical_trials_api_v2.yaml │ │ │ ├── trials_NCT04280705.json │ │ │ └── trials_NCT04280705.txt │ │ ├── myvariant │ │ │ ├── myvariant_api.yaml │ │ │ ├── myvariant_field_descriptions.csv │ │ │ ├── variants_full_braf_v600e.json │ │ │ ├── variants_full_braf_v600e.txt │ │ │ └── variants_part_braf_v600_multiple.json │ │ ├── openfda │ │ │ ├── drugsfda_detail.json │ │ │ ├── drugsfda_search.json │ │ │ ├── enforcement_detail.json │ │ │ └── enforcement_search.json │ │ └── pubtator │ │ ├── pubtator_autocomplete.json │ │ └── pubtator3_paper.txt │ ├── integration │ │ ├── test_openfda_integration.py │ │ ├── test_preprints_integration.py │ │ ├── test_simple.py │ │ └── test_variants_integration.py │ ├── tdd │ │ ├── articles │ │ │ ├── test_autocomplete.py │ │ │ ├── test_cbioportal_integration.py │ │ │ ├── test_fetch.py │ │ │ ├── test_preprints.py │ │ │ ├── test_search.py │ │ │ └── test_unified.py │ │ ├── conftest.py │ │ ├── drugs │ │ │ ├── __init__.py │ │ │ └── test_drug_getter.py │ │ ├── openfda │ │ │ ├── __init__.py │ │ │ ├── test_adverse_events.py │ │ │ ├── test_device_events.py │ │ │ ├── test_drug_approvals.py │ │ │ ├── test_drug_labels.py │ │ │ ├── test_drug_recalls.py │ │ │ ├── test_drug_shortages.py │ │ │ └── test_security.py │ │ ├── test_biothings_integration_real.py │ │ ├── test_biothings_integration.py │ │ ├── test_circuit_breaker.py │ │ ├── test_concurrent_requests.py │ │ ├── test_connection_pool.py │ │ ├── test_domain_handlers.py │ │ ├── test_drug_approvals.py │ │ ├── test_drug_recalls.py │ │ ├── test_drug_shortages.py │ │ ├── test_endpoint_documentation.py │ │ ├── test_error_scenarios.py │ │ ├── test_europe_pmc_fetch.py │ │ ├── test_mcp_integration.py │ │ ├── test_mcp_tools.py │ │ ├── test_metrics.py │ │ ├── test_nci_integration.py │ │ ├── test_nci_mcp_tools.py │ │ ├── test_network_policies.py │ │ ├── test_offline_mode.py │ │ ├── test_openfda_unified.py │ │ ├── test_pten_r173_search.py │ │ ├── test_render.py │ │ ├── test_request_batcher.py.disabled │ │ ├── test_retry.py │ │ ├── test_router.py │ │ ├── test_shared_context.py.disabled │ │ ├── test_unified_biothings.py │ │ ├── thinking │ │ │ ├── __init__.py │ │ │ └── test_sequential.py │ │ ├── trials │ │ │ ├── test_backward_compatibility.py │ │ │ ├── test_getter.py │ │ │ └── test_search.py │ │ ├── utils │ │ │ ├── test_gene_validator.py │ │ │ ├── test_mutation_filter.py │ │ │ ├── test_rate_limiter.py │ │ │ └── test_request_cache.py │ │ ├── variants │ │ │ ├── constants.py │ │ │ ├── test_alphagenome_api_key.py │ │ │ ├── test_alphagenome_comprehensive.py │ │ │ ├── test_alphagenome.py │ │ │ ├── test_cbioportal_mutations.py │ │ │ ├── test_cbioportal_search.py │ │ │ ├── test_external_integration.py │ │ │ ├── test_external.py │ │ │ ├── test_extract_gene_aa_change.py │ │ │ ├── test_filters.py │ │ │ ├── test_getter.py │ │ │ ├── test_links.py │ │ │ └── test_search.py │ │ └── workers │ │ └── test_worker_sanitization.js │ └── test_pydantic_ai_integration.py ├── THIRD_PARTY_ENDPOINTS.md ├── tox.ini ├── uv.lock └── wrangler.toml ``` # Files -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- ```yaml repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: "v4.4.0" hooks: - id: check-case-conflict - id: check-merge-conflict - id: check-toml - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit rev: "v0.6.3" hooks: - id: ruff args: [--exit-non-zero-on-fix] - id: ruff-format - repo: local hooks: - id: update-endpoints-doc name: Update THIRD_PARTY_ENDPOINTS.md entry: uv run python scripts/generate_endpoints_doc.py language: system pass_filenames: false files: 'src/biomcp/utils/endpoint_registry\.py$' - id: check-http-imports name: Check for direct HTTP library imports entry: uv run python scripts/check_http_imports.py language: system pass_filenames: false always_run: true files: '\.py$' - id: check-docs-in-mkdocs name: Check documentation files are in mkdocs.yml entry: uv run python scripts/check_docs_in_mkdocs.py language: system pass_filenames: false files: '^docs/.*\.md$|^mkdocs\.yml$' - repo: https://github.com/pre-commit/mirrors-prettier rev: "v3.0.3" hooks: - id: prettier ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` docs/source # From https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # ruff .ruff_cache # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # Vscode config files .vscode/ # PyCharm .idea/ # LLMs Notes llms/ vault/ .DS_Store /node_modules/ CLAUDE.md lzyank.toml experiment/ alphagenome spike/ ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown # BioMCP: Biomedical Model Context Protocol BioMCP is an open source (MIT License) toolkit that empowers AI assistants and agents with specialized biomedical knowledge. Built following the Model Context Protocol (MCP), it connects AI systems to authoritative biomedical data sources, enabling them to answer questions about clinical trials, scientific literature, and genomic variants with precision and depth. [](https://www.youtube.com/watch?v=bKxOWrWUUhM) ## MCPHub Certification BioMCP is certified by [MCPHub](https://mcphub.com/mcp-servers/genomoncology/biomcp). This certification ensures that BioMCP follows best practices for Model Context Protocol implementation and provides reliable biomedical data access. ## Why BioMCP? While Large Language Models have broad general knowledge, they often lack specialized domain-specific information or access to up-to-date resources. BioMCP bridges this gap for biomedicine by: - Providing **structured access** to clinical trials, biomedical literature, and genomic variants - Enabling **natural language queries** to specialized databases without requiring knowledge of their specific syntax - Supporting **biomedical research** workflows through a consistent interface - Functioning as an **MCP server** for AI assistants and agents ## Biomedical Data Sources BioMCP integrates with multiple biomedical data sources: ### Literature Sources - **PubTator3/PubMed** - Peer-reviewed biomedical literature with entity annotations - **bioRxiv/medRxiv** - Preprint servers for biology and health sciences - **Europe PMC** - Open science platform including preprints ### Clinical & Genomic Sources - **ClinicalTrials.gov** - Clinical trial registry and results database - **NCI Clinical Trials Search API** - National Cancer Institute's curated cancer trials database - Advanced search filters (biomarkers, prior therapies, brain metastases) - Organization and intervention databases - Disease vocabulary with synonyms - **BioThings Suite** - Comprehensive biomedical data APIs: - **MyVariant.info** - Consolidated genetic variant annotation - **MyGene.info** - Real-time gene annotations and information - **MyDisease.info** - Disease ontology and synonym information - **MyChem.info** - Drug/chemical annotations and properties - **TCGA/GDC** - The Cancer Genome Atlas for cancer variant data - **1000 Genomes** - Population frequency data via Ensembl - **cBioPortal** - Cancer genomics portal with mutation occurrence data ### Regulatory & Safety Sources - **OpenFDA** - FDA regulatory and safety data: - **Drug Adverse Events (FAERS)** - Post-market drug safety reports - **Drug Labels (SPL)** - Official prescribing information - **Device Events (MAUDE)** - Medical device adverse events, with genomic device filtering ## Available MCP Tools BioMCP provides 24 specialized tools for biomedical research: ### Core Tools (3) #### 1. Think Tool (ALWAYS USE FIRST!) **CRITICAL**: The `think` tool MUST be your first step for ANY biomedical research task. ```python # Start analysis with sequential thinking think( thought="Breaking down the query about BRAF mutations in melanoma...", thoughtNumber=1, totalThoughts=3, nextThoughtNeeded=True ) ``` The sequential thinking tool helps: - Break down complex biomedical problems systematically - Plan multi-step research approaches - Track reasoning progress - Ensure comprehensive analysis #### 2. Search Tool The search tool supports two modes: ##### Unified Query Language (Recommended) Use the `query` parameter with structured field syntax for powerful cross-domain searches: ```python # Simple natural language search(query="BRAF melanoma") # Field-specific search search(query="gene:BRAF AND trials.condition:melanoma") # Complex queries search(query="gene:BRAF AND variants.significance:pathogenic AND articles.date:>2023") # Get searchable fields schema search(get_schema=True) # Explain how a query is parsed search(query="gene:BRAF", explain_query=True) ``` **Supported Fields:** - **Cross-domain**: `gene:`, `variant:`, `disease:` - **Trials**: `trials.condition:`, `trials.phase:`, `trials.status:`, `trials.intervention:` - **Articles**: `articles.author:`, `articles.journal:`, `articles.date:` - **Variants**: `variants.significance:`, `variants.rsid:`, `variants.frequency:` ##### Domain-Based Search Use the `domain` parameter with specific filters: ```python # Search articles (includes automatic cBioPortal integration) search(domain="article", genes=["BRAF"], diseases=["melanoma"]) # Search with mutation-specific cBioPortal data search(domain="article", genes=["BRAF"], keywords=["V600E"]) search(domain="article", genes=["SRSF2"], keywords=["F57*"]) # Wildcard patterns # Search trials search(domain="trial", conditions=["lung cancer"], phase="3") # Search variants search(domain="variant", gene="TP53", significance="pathogenic") ``` **Note**: When searching articles with a gene parameter, cBioPortal data is automatically included: - Gene-level summaries show mutation frequency across cancer studies - Mutation-specific searches (e.g., "V600E") show study-level occurrence data - Cancer types are dynamically resolved from cBioPortal API #### 3. Fetch Tool Retrieve full details for a single article, trial, or variant: ```python # Fetch article details (supports both PMID and DOI) fetch(domain="article", id="34567890") # PMID fetch(domain="article", id="10.1101/2024.01.20.23288905") # DOI # Fetch trial with all sections fetch(domain="trial", id="NCT04280705", detail="all") # Fetch variant details fetch(domain="variant", id="rs113488022") ``` **Domain-specific options:** - **Articles**: `detail="full"` retrieves full text if available - **Trials**: `detail` can be "protocol", "locations", "outcomes", "references", or "all" - **Variants**: Always returns full details ### Individual Tools (21) For users who prefer direct access to specific functionality, BioMCP also provides 21 individual tools: #### Article Tools (2) - **article_searcher**: Search PubMed/PubTator3 and preprints - **article_getter**: Fetch detailed article information (supports PMID and DOI) #### Trial Tools (5) - **trial_searcher**: Search ClinicalTrials.gov or NCI CTS API (via source parameter) - **trial_getter**: Fetch all trial details from either source - **trial_protocol_getter**: Fetch protocol information only (ClinicalTrials.gov) - **trial_references_getter**: Fetch trial publications (ClinicalTrials.gov) - **trial_outcomes_getter**: Fetch outcome measures and results (ClinicalTrials.gov) - **trial_locations_getter**: Fetch site locations and contacts (ClinicalTrials.gov) #### Variant Tools (2) - **variant_searcher**: Search MyVariant.info database - **variant_getter**: Fetch comprehensive variant details #### NCI-Specific Tools (6) - **nci_organization_searcher**: Search NCI's organization database - **nci_organization_getter**: Get organization details by ID - **nci_intervention_searcher**: Search NCI's intervention database (drugs, devices, procedures) - **nci_intervention_getter**: Get intervention details by ID - **nci_biomarker_searcher**: Search biomarkers used in trial eligibility criteria - **nci_disease_searcher**: Search NCI's controlled vocabulary of cancer conditions #### Gene, Disease & Drug Tools (3) - **gene_getter**: Get real-time gene information from MyGene.info - **disease_getter**: Get disease definitions and synonyms from MyDisease.info - **drug_getter**: Get drug/chemical information from MyChem.info **Note**: All individual tools that search by gene automatically include cBioPortal summaries when the `include_cbioportal` parameter is True (default). Trial searches can expand disease conditions with synonyms when `expand_synonyms` is True (default). ## Quick Start ### For Claude Desktop Users 1. **Install `uv`** if you don't have it (recommended): ```bash # MacOS brew install uv # Windows/Linux pip install uv ``` 2. **Configure Claude Desktop**: - Open Claude Desktop settings - Navigate to Developer section - Click "Edit Config" and add: ```json { "mcpServers": { "biomcp": { "command": "uv", "args": ["run", "--with", "biomcp-python", "biomcp", "run"] } } } ``` - Restart Claude Desktop and start chatting about biomedical topics! ### Python Package Installation ```bash # Using pip pip install biomcp-python # Using uv (recommended for faster installation) uv pip install biomcp-python # Run directly without installation uv run --with biomcp-python biomcp trial search --condition "lung cancer" ``` ## Configuration ### Environment Variables BioMCP supports optional environment variables for enhanced functionality: ```bash # cBioPortal API authentication (optional) export CBIO_TOKEN="your-api-token" # For authenticated access export CBIO_BASE_URL="https://www.cbioportal.org/api" # Custom API endpoint # Performance tuning export BIOMCP_USE_CONNECTION_POOL="true" # Enable HTTP connection pooling (default: true) export BIOMCP_METRICS_ENABLED="false" # Enable performance metrics (default: false) ``` ## Running BioMCP Server BioMCP supports multiple transport protocols to suit different deployment scenarios: ### Local Development (STDIO) For direct integration with Claude Desktop or local MCP clients: ```bash # Default STDIO mode for local development biomcp run # Or explicitly specify STDIO biomcp run --mode stdio ``` ### HTTP Server Mode BioMCP supports multiple HTTP transport protocols: #### Legacy SSE Transport (Worker Mode) For backward compatibility with existing SSE clients: ```bash biomcp run --mode worker # Server available at http://localhost:8000/sse ``` #### Streamable HTTP Transport (Recommended) The new MCP-compliant Streamable HTTP transport provides optimal performance and standards compliance: ```bash biomcp run --mode streamable_http # Custom host and port biomcp run --mode streamable_http --host 127.0.0.1 --port 8080 ``` Features of Streamable HTTP transport: - Single `/mcp` endpoint for all operations - Dynamic response mode (JSON for quick operations, SSE for long-running) - Session management support (future) - Full MCP specification compliance (2025-03-26) - Better scalability for cloud deployments ### Deployment Options #### Docker ```bash # Build the Docker image locally docker build -t biomcp:latest . # Run the container docker run -p 8000:8000 biomcp:latest biomcp run --mode streamable_http ``` #### Cloudflare Workers The worker mode can be deployed to Cloudflare Workers for global edge deployment. Note: All APIs work without authentication, but tokens may provide higher rate limits. ## Command Line Interface BioMCP provides a comprehensive CLI for direct database interaction: ```bash # Get help biomcp --help # Run the MCP server biomcp run # Article search examples biomcp article search --gene BRAF --disease Melanoma # Includes preprints by default biomcp article search --gene BRAF --no-preprints # Exclude preprints biomcp article get 21717063 --full # Clinical trial examples biomcp trial search --condition "Lung Cancer" --phase PHASE3 biomcp trial search --condition melanoma --source nci --api-key YOUR_KEY # Use NCI API biomcp trial get NCT04280705 Protocol biomcp trial get NCT04280705 --source nci --api-key YOUR_KEY # Get from NCI # Variant examples with external annotations biomcp variant search --gene TP53 --significance pathogenic biomcp variant get rs113488022 # Includes TCGA, 1000 Genomes, and cBioPortal data by default biomcp variant get rs113488022 --no-external # Core annotations only # NCI-specific examples (requires NCI API key) biomcp organization search "MD Anderson" --api-key YOUR_KEY biomcp organization get ORG123456 --api-key YOUR_KEY biomcp intervention search pembrolizumab --api-key YOUR_KEY biomcp intervention search --type Device --api-key YOUR_KEY biomcp biomarker search "PD-L1" --api-key YOUR_KEY biomcp disease search melanoma --source nci --api-key YOUR_KEY ``` ## Testing & Verification Test your BioMCP setup with the MCP Inspector: ```bash npx @modelcontextprotocol/inspector uv run --with biomcp-python biomcp run ``` This opens a web interface where you can explore and test all available tools. ## Enterprise Version: OncoMCP OncoMCP extends BioMCP with GenomOncology's enterprise-grade precision oncology platform (POP), providing: - **HIPAA-Compliant Deployment**: Secure on-premise options - **Real-Time Trial Matching**: Up-to-date status and arm-level matching - **Healthcare Integration**: Seamless EHR and data warehouse connectivity - **Curated Knowledge Base**: 15,000+ trials and FDA approvals - **Sophisticated Patient Matching**: Using integrated clinical and molecular profiles - **Advanced NLP**: Structured extraction from unstructured text - **Comprehensive Biomarker Processing**: Mutation and rule processing Learn more: [GenomOncology](https://genomoncology.com/) ## MCP Registries [](https://smithery.ai/server/@genomoncology/biomcp) <a href="https://glama.ai/mcp/servers/@genomoncology/biomcp"> <img width="380" height="200" src="https://glama.ai/mcp/servers/@genomoncology/biomcp/badge" /> </a> ## Example Use Cases ### Gene Information Retrieval ```python # Get comprehensive gene information gene_getter(gene_id_or_symbol="TP53") # Returns: Official name, summary, aliases, links to databases ``` ### Disease Synonym Expansion ```python # Get disease information with synonyms disease_getter(disease_id_or_name="GIST") # Returns: "gastrointestinal stromal tumor" and other synonyms # Search trials with automatic synonym expansion trial_searcher(conditions=["GIST"], expand_synonyms=True) # Searches for: GIST OR "gastrointestinal stromal tumor" OR "GI stromal tumor" ``` ### Integrated Biomedical Research ```python # 1. Always start with thinking think(thought="Analyzing BRAF V600E in melanoma treatment", thoughtNumber=1) # 2. Get gene context gene_getter("BRAF") # 3. Search for pathogenic variants variant_searcher(gene="BRAF", hgvsp="V600E", significance="pathogenic") # 4. Find relevant clinical trials with disease expansion trial_searcher(conditions=["melanoma"], interventions=["BRAF inhibitor"]) ``` ## Documentation For comprehensive documentation, visit [https://biomcp.org](https://biomcp.org) ### Developer Guides - [HTTP Client Guide](./docs/http-client-guide.md) - Using the centralized HTTP client - [Migration Examples](./docs/migration-examples.md) - Migrating from direct HTTP usage - [Error Handling Guide](./docs/error-handling.md) - Comprehensive error handling patterns - [Integration Testing Guide](./docs/integration-testing.md) - Best practices for reliable integration tests - [Third-Party Endpoints](./THIRD_PARTY_ENDPOINTS.md) - Complete list of external APIs used - [Testing Guide](./docs/development/testing.md) - Running tests and understanding test categories ## Development ### Running Tests ```bash # Run all tests (including integration tests) make test # Run only unit tests (excluding integration tests) uv run python -m pytest tests -m "not integration" # Run only integration tests uv run python -m pytest tests -m "integration" ``` **Note**: Integration tests make real API calls and may fail due to network issues or rate limiting. In CI/CD, integration tests are run separately and allowed to fail without blocking the build. ## BioMCP Examples Repo Looking to see BioMCP in action? Check out the companion repository: 👉 **[biomcp-examples](https://github.com/genomoncology/biomcp-examples)** It contains real prompts, AI-generated research briefs, and evaluation runs across different models. Use it to explore capabilities, compare outputs, or benchmark your own setup. Have a cool example of your own? **We’d love for you to contribute!** Just fork the repo and submit a PR with your experiment. ## License This project is licensed under the MIT License. ``` -------------------------------------------------------------------------------- /tests/tdd/drugs/__init__.py: -------------------------------------------------------------------------------- ```python """Tests for drug information tools.""" ``` -------------------------------------------------------------------------------- /tests/tdd/openfda/__init__.py: -------------------------------------------------------------------------------- ```python """Test package for OpenFDA integration.""" ``` -------------------------------------------------------------------------------- /tests/tdd/thinking/__init__.py: -------------------------------------------------------------------------------- ```python # Test module for sequential thinking functionality ``` -------------------------------------------------------------------------------- /src/biomcp/thinking/__init__.py: -------------------------------------------------------------------------------- ```python from . import sequential __all__ = [ "sequential", ] ``` -------------------------------------------------------------------------------- /src/biomcp/resources/__init__.py: -------------------------------------------------------------------------------- ```python from .getter import get_instructions __all__ = [ "get_instructions", ] ``` -------------------------------------------------------------------------------- /src/biomcp/cli/__init__.py: -------------------------------------------------------------------------------- ```python """BioMCP Command Line Interface.""" from .main import app __all__ = ["app"] ``` -------------------------------------------------------------------------------- /src/biomcp/genes/__init__.py: -------------------------------------------------------------------------------- ```python """Gene information tools for BioMCP.""" from .getter import get_gene __all__ = ["get_gene"] ``` -------------------------------------------------------------------------------- /glama.json: -------------------------------------------------------------------------------- ```json { "$schema": "https://glama.ai/mcp/schemas/server.json", "maintainers": ["imaurer", "jyeakley"] } ``` -------------------------------------------------------------------------------- /src/biomcp/drugs/__init__.py: -------------------------------------------------------------------------------- ```python """Drug information tools using MyChem.info.""" from .getter import get_drug __all__ = ["get_drug"] ``` -------------------------------------------------------------------------------- /src/biomcp/workers/__init__.py: -------------------------------------------------------------------------------- ```python """Cloudflare Workers module for BioMCP.""" from .worker import create_worker_app __all__ = ["create_worker_app"] ``` -------------------------------------------------------------------------------- /src/biomcp/variants/__init__.py: -------------------------------------------------------------------------------- ```python from . import search from . import getter from . import external __all__ = [ "external", "getter", "search", ] ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json { "devDependencies": { "wrangler": "^4.13.2" }, "dependencies": { "hono": "^4.7.8", "jose": "^6.0.11" } } ``` -------------------------------------------------------------------------------- /codecov.yaml: -------------------------------------------------------------------------------- ```yaml coverage: range: 90..100 round: down precision: 1 status: project: default: target: 95% threshold: 0.5% ``` -------------------------------------------------------------------------------- /src/biomcp/utils/__init__.py: -------------------------------------------------------------------------------- ```python """Utility modules for BioMCP.""" from .query_utils import parse_or_query, contains_or_operator __all__ = ["contains_or_operator", "parse_or_query"] ``` -------------------------------------------------------------------------------- /src/biomcp/integrations/__init__.py: -------------------------------------------------------------------------------- ```python """BioThings API integrations for BioMCP.""" from .biothings_client import BioThingsClient, DiseaseInfo, GeneInfo __all__ = ["BioThingsClient", "DiseaseInfo", "GeneInfo"] ``` -------------------------------------------------------------------------------- /src/biomcp/variants/constants.py: -------------------------------------------------------------------------------- ```python """Constants for variant modules.""" import os # cBioPortal API endpoints CBIO_BASE_URL = os.getenv("CBIO_BASE_URL", "https://www.cbioportal.org/api") CBIO_TOKEN = os.getenv("CBIO_TOKEN") ``` -------------------------------------------------------------------------------- /src/biomcp/articles/__init__.py: -------------------------------------------------------------------------------- ```python from . import autocomplete from . import fetch from . import search from . import preprints from . import unified __all__ = [ "autocomplete", "fetch", "preprints", "search", "unified", ] ``` -------------------------------------------------------------------------------- /lzyank.toml: -------------------------------------------------------------------------------- ```toml [default] exclude = [ "uv.lock", "lzyank.toml", ".github", "*.ini", ".pre-commit-config.yaml", "LICENSE", "codecov.yaml", "mkdocs.yml", "tests/data" ] [actions] include = [".github/"] ``` -------------------------------------------------------------------------------- /src/biomcp/trials/__init__.py: -------------------------------------------------------------------------------- ```python from . import getter from . import nci_getter from . import nci_search from . import search from .search import LineOfTherapy __all__ = [ "LineOfTherapy", "getter", "nci_getter", "nci_search", "search", ] ``` -------------------------------------------------------------------------------- /src/biomcp/diseases/__init__.py: -------------------------------------------------------------------------------- ```python """Disease information tools for BioMCP.""" from .getter import get_disease from .search import search_diseases, get_disease_by_id, search_diseases_with_or __all__ = [ "get_disease", "get_disease_by_id", "search_diseases", "search_diseases_with_or", ] ``` -------------------------------------------------------------------------------- /src/biomcp/interventions/__init__.py: -------------------------------------------------------------------------------- ```python """Interventions module for NCI Clinical Trials API integration.""" from .getter import get_intervention from .search import search_interventions, search_interventions_with_or __all__ = [ "get_intervention", "search_interventions", "search_interventions_with_or", ] ``` -------------------------------------------------------------------------------- /src/biomcp/organizations/__init__.py: -------------------------------------------------------------------------------- ```python """Organizations module for NCI Clinical Trials API integration.""" from .getter import get_organization from .search import search_organizations, search_organizations_with_or __all__ = [ "get_organization", "search_organizations", "search_organizations_with_or", ] ``` -------------------------------------------------------------------------------- /docs/robots.txt: -------------------------------------------------------------------------------- ``` # Robots.txt for BioMCP Documentation # https://biomcp.org/ User-agent: * Allow: / # Sitemap location Sitemap: https://biomcp.org/sitemap.xml # Rate limiting for crawlers Crawl-delay: 1 # Block access to build artifacts Disallow: /site/ Disallow: /.git/ Disallow: /node_modules/ ``` -------------------------------------------------------------------------------- /tests/data/pubtator/pubtator_autocomplete.json: -------------------------------------------------------------------------------- ```json [ { "_id": "@GENE_BRAF", "biotype": "gene", "name": "BRAF", "description": "All Species", "match": "Matched on name <m>BRAF</m>" }, { "_id": "@GENE_BRAFP1", "biotype": "gene", "name": "BRAFP1", "description": "All Species", "match": "Matched on name <m>BRAFP1</m>" } ] ``` -------------------------------------------------------------------------------- /src/biomcp/biomarkers/__init__.py: -------------------------------------------------------------------------------- ```python """Biomarkers module for NCI Clinical Trials API integration. Note: CTRP documentation indicates biomarker data may have limited public availability. This module focuses on trial eligibility biomarkers. """ from .search import search_biomarkers, search_biomarkers_with_or __all__ = ["search_biomarkers", "search_biomarkers_with_or"] ``` -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- ``` [tox] skipsdist = true envlist = py311, py312, py313 [gh-actions] python = 3.11: py311 3.12: py312 3.13: py313 [testenv] passenv = PYTHON_VERSION allowlist_externals = uv commands = uv sync --python {envpython} uv run python -m pytest --doctest-modules tests --cov --cov-config=pyproject.toml --cov-report=xml mypy ``` -------------------------------------------------------------------------------- /src/biomcp/__main__.py: -------------------------------------------------------------------------------- ```python import sys from dotenv import load_dotenv from .cli import app # Load environment variables from .env file load_dotenv() def main(): try: app(standalone_mode=True) except SystemExit as e: sys.exit(e.code) if __name__ == "__main__": main() # Make main() the callable when importing __main__ __call__ = main ``` -------------------------------------------------------------------------------- /.github/workflows/validate-codecov-config.yml: -------------------------------------------------------------------------------- ```yaml name: validate-codecov-config on: pull_request: paths: [codecov.yaml] push: branches: [main] jobs: validate-codecov-config: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v5 - name: Validate codecov configuration run: curl -sSL --fail-with-body --data-binary @codecov.yaml https://codecov.io/validate ``` -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- ```yaml services: biomcp-server: platform: linux/amd64 build: . image: us.gcr.io/graceful-medley-134315/biomcp-server:${TAG} container_name: biomcp-server ports: - "8000:8000" environment: - MCP_MODE=streamable_http # Can be 'stdio', 'worker', 'http', or 'streamable_http' - ALPHAGENOME_API_KEY=${ALPHAGENOME_API_KEY:-} restart: unless-stopped ``` -------------------------------------------------------------------------------- /tests/tdd/variants/constants.py: -------------------------------------------------------------------------------- ```python """Constants for variant tests.""" # API retry settings API_RETRY_DELAY_SECONDS = 1.0 MAX_RETRY_ATTEMPTS = 2 # Test data settings DEFAULT_MAX_STUDIES = 10 # Number of studies to query in integration tests STRUCTURE_CHECK_LIMIT = ( 3 # Number of items to check when verifying data structures ) # Timeout settings INTEGRATION_TEST_TIMEOUT = 30.0 # Maximum time for integration tests ``` -------------------------------------------------------------------------------- /src/biomcp/resources/getter.py: -------------------------------------------------------------------------------- ```python from pathlib import Path from .. import mcp_app RESOURCES_ROOT = Path(__file__).parent @mcp_app.resource("biomcp://instructions.md") def get_instructions() -> str: return (RESOURCES_ROOT / "instructions.md").read_text(encoding="utf-8") @mcp_app.resource("biomcp://researcher.md") def get_researcher() -> str: return (RESOURCES_ROOT / "researcher.md").read_text(encoding="utf-8") ``` -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- ```yaml version: 2 updates: # Maintain dependencies for GitHub Actions - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly" commit-message: prefix: "chore" include: "scope" # Maintain dependencies for Python - package-ecosystem: "pip" directory: "/" schedule: interval: "weekly" open-pull-requests-limit: 10 commit-message: prefix: "chore" include: "scope" allow: - dependency-type: "all" ``` -------------------------------------------------------------------------------- /tests/tdd/conftest.py: -------------------------------------------------------------------------------- ```python from pathlib import Path from pytest import fixture from biomcp import http_client @fixture def anyio_backend(): return "asyncio" class DummyCache: def __init__(self): self.store = {} def set(self, key, value, expire=None): self.store[key] = value def get(self, key, default=None): return self.store.get(key, default) @property def count(self): return len(self.store) def close(self): self.store.clear() @fixture def http_cache(): cache = DummyCache() http_client._cache = cache yield cache cache.close() @fixture def data_dir(): return Path(__file__).parent.parent / "data" ``` -------------------------------------------------------------------------------- /.github/actions/setup-python-env/action.yml: -------------------------------------------------------------------------------- ```yaml name: "Setup Python Environment" description: "Set up Python environment for the given Python version" inputs: python-version: description: "Python version to use" required: true default: "3.12" uv-version: description: "uv version to use" required: true default: "0.5.20" runs: using: "composite" steps: - uses: actions/setup-python@v5 with: python-version: ${{ inputs.python-version }} - name: Install uv uses: astral-sh/setup-uv@v2 with: version: ${{ inputs.uv-version }} enable-cache: "true" cache-suffix: ${{ matrix.python-version }} - name: Install Python dependencies run: uv sync --frozen shell: bash ``` -------------------------------------------------------------------------------- /src/biomcp/__init__.py: -------------------------------------------------------------------------------- ```python from .core import ensure_list, logger, mcp_app, StrEnum from . import constants from . import http_client from . import render from . import articles from . import trials from . import variants from . import resources from . import thinking from . import query_parser from . import query_router from . import router from . import thinking_tool from . import individual_tools from . import cbioportal_helper __all__ = [ "StrEnum", "articles", "cbioportal_helper", "constants", "ensure_list", "http_client", "individual_tools", "logger", "mcp_app", "query_parser", "query_router", "render", "resources", "router", "thinking", "thinking_tool", "trials", "variants", ] ``` -------------------------------------------------------------------------------- /docs/developer-guides/generate_endpoints.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 """ Generate third-party endpoints documentation from the endpoint registry. This script reads the endpoint registry and generates a markdown file documenting all third-party API endpoints used by BioMCP. """ import sys from pathlib import Path # Add src to Python path sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) from biomcp.utils.endpoint_registry import EndpointRegistry def main(): """Generate endpoints documentation.""" # Initialize registry registry = EndpointRegistry() # Generate markdown report markdown_content = registry.generate_markdown_report() # Write to file output_path = Path(__file__).parent / "03-third-party-endpoints.md" output_path.write_text(markdown_content) print(f"Generated endpoints documentation: {output_path}") if __name__ == "__main__": main() ``` -------------------------------------------------------------------------------- /tests/tdd/articles/test_fetch.py: -------------------------------------------------------------------------------- ```python import json from biomcp.articles.fetch import fetch_articles pmids = [39293516, 34397683, 37296959] async def test_fetch_full_text(anyio_backend): results = await fetch_articles(pmids, full=True, output_json=True) assert isinstance(results, str) data = json.loads(results) assert len(data) == 3 for item in data: assert item["pmid"] in pmids assert len(item["title"]) > 10 assert len(item["abstract"]) > 100 assert item["full_text"] is not None async def test_fetch_abstracts(anyio_backend): results = await fetch_articles(pmids, full=False, output_json=True) assert isinstance(results, str) data = json.loads(results) assert len(data) == 3 for item in data: assert item["pmid"] in pmids assert len(item["title"]) > 10 assert len(item["abstract"]) > 100 assert "full_text" not in item ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/__init__.py: -------------------------------------------------------------------------------- ```python """ OpenFDA integration for BioMCP. Provides access to FDA drug labels, adverse events, device data, drug approvals, recalls, and shortage information. """ from .adverse_events import ( search_adverse_events, get_adverse_event, ) from .drug_labels import ( search_drug_labels, get_drug_label, ) from .device_events import ( search_device_events, get_device_event, ) from .drug_approvals import ( search_drug_approvals, get_drug_approval, ) from .drug_recalls import ( search_drug_recalls, get_drug_recall, ) from .drug_shortages import ( search_drug_shortages, get_drug_shortage, ) __all__ = [ "get_adverse_event", "get_device_event", "get_drug_approval", "get_drug_label", "get_drug_recall", "get_drug_shortage", "search_adverse_events", "search_device_events", "search_drug_approvals", "search_drug_labels", "search_drug_recalls", "search_drug_shortages", ] ``` -------------------------------------------------------------------------------- /.github/workflows/deploy-docs.yml: -------------------------------------------------------------------------------- ```yaml name: Deploy Documentation on: # Allows you to manually trigger this workflow from the Actions tab workflow_dispatch: # Automatically trigger on pushes to main IF docs changed push: branches: - main paths: - "docs/**" - "mkdocs.yml" - ".github/workflows/deploy-docs.yml" jobs: deploy: runs-on: ubuntu-latest permissions: contents: write steps: - name: Check out code uses: actions/checkout@v5 with: fetch-depth: 0 - name: Set up Python environment uses: ./.github/actions/setup-python-env with: python-version: "3.11" uv-version: "0.5.20" - name: Configure Git User run: | git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" - name: Deploy documentation using MkDocs run: | uv run mkdocs gh-deploy --force ``` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- ```dockerfile # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile FROM python:3.11-slim # set work directory WORKDIR /app # Install build dependencies and git (needed for AlphaGenome) RUN apt-get update && apt-get install -y --no-install-recommends gcc build-essential git && rm -rf /var/lib/apt/lists/* # Copy requirements (pyproject.toml, etc.) COPY pyproject.toml . COPY README.md . COPY LICENSE . # Copy source code COPY src ./src COPY tests ./tests COPY Makefile . COPY tox.ini . # Install the package with worker dependencies RUN pip install --upgrade pip && pip install .[worker] # Clone and install AlphaGenome RUN git clone https://github.com/google-deepmind/alphagenome.git /tmp/alphagenome && \ pip install /tmp/alphagenome && \ rm -rf /tmp/alphagenome # Expose port for remote MCP connections EXPOSE 8000 # Set default mode to worker, but allow it to be overridden ENV MCP_MODE=stdio # Run the MCP server with configurable mode CMD ["sh", "-c", "biomcp run --mode ${MCP_MODE}"] ``` -------------------------------------------------------------------------------- /src/biomcp/thinking_tracker.py: -------------------------------------------------------------------------------- ```python """Track thinking tool usage within MCP sessions. This module provides a simple mechanism to track whether the think tool has been used in the current session, encouraging AI clients to follow best practices. """ from contextvars import ContextVar # Track if thinking has been used in current context thinking_used: ContextVar[bool] = ContextVar("thinking_used", default=False) def mark_thinking_used() -> None: """Mark that the thinking tool has been used.""" thinking_used.set(True) def has_thinking_been_used() -> bool: """Check if thinking tool has been used in current context.""" return thinking_used.get() def reset_thinking_tracker() -> None: """Reset the thinking tracker (for testing).""" thinking_used.set(False) def get_thinking_reminder() -> str: """Get a reminder message if thinking hasn't been used.""" if not has_thinking_been_used(): return ( "\n\n⚠️ **REMINDER**: You haven't used the 'think' tool yet! " "For optimal results, please use 'think' BEFORE searching to plan " "your research strategy and ensure comprehensive analysis." ) return "" ``` -------------------------------------------------------------------------------- /tests/bdd/cli_help/test_help.py: -------------------------------------------------------------------------------- ```python import shlex from pytest_bdd import given, parsers, scenarios, then from typer.testing import CliRunner from biomcp.cli.main import app # Link to the feature file scenarios("help.feature") runner = CliRunner() @given(parsers.parse('I run "{command}"'), target_fixture="cli_result") def cli_result(command): """ Run the given CLI command and return the result. """ # Remove the initial token ("biomcp") if present args = ( shlex.split(command)[1:] if command.startswith("biomcp") else shlex.split(command) ) result = runner.invoke(app, args) assert result.exit_code == 0, f"CLI command failed: {result.stderr}" return result @then(parsers.parse('the output should contain "{expected}"')) def output_should_contain(cli_result, expected): """ Verify that the output contains the expected text. This helper handles both plain text and rich-formatted text outputs. """ # Check if the expected text is in the output, ignoring case assert ( expected.lower() in cli_result.stdout.lower() ), f"Expected output to contain '{expected}', but it did not.\nActual output: {cli_result.stdout}" ``` -------------------------------------------------------------------------------- /tests/tdd/articles/test_autocomplete.py: -------------------------------------------------------------------------------- ```python from biomcp.articles.autocomplete import Entity, EntityRequest, autocomplete async def test_autocomplete(anyio_backend, http_cache): # new cache for each call assert http_cache.count == 0 # gene (compare using entity_id directly) request = EntityRequest(concept="gene", query="her2") entity = await autocomplete(request=request) assert entity.entity_id == "@GENE_ERBB2" # variant request = EntityRequest(concept="variant", query="BRAF V600E") assert await autocomplete(request=request) == Entity( _id="@VARIANT_p.V600E_BRAF_human", biotype="variant", name="p.V600E", ) # disease request = EntityRequest(concept="disease", query="lung adenocarcinoma") assert await autocomplete(request=request) == Entity( _id="@DISEASE_Adenocarcinoma_of_Lung", biotype="disease", name="Adenocarcinoma of Lung", match="Multiple matches", ) assert http_cache.count == 3 # duplicate request uses the cached response request = EntityRequest(concept="gene", query="her2") entity = await autocomplete(request=request) assert entity.entity_id == "@GENE_ERBB2" assert http_cache.count == 3 ``` -------------------------------------------------------------------------------- /scripts/generate_endpoints_doc.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 """Generate THIRD_PARTY_ENDPOINTS.md documentation.""" import shutil import subprocess import sys from pathlib import Path # Add src to path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) from biomcp.utils.endpoint_registry import get_registry def main(): """Generate the endpoints documentation.""" registry = get_registry() output_path = Path(__file__).parent.parent / "THIRD_PARTY_ENDPOINTS.md" # Generate new content new_content = registry.generate_markdown_report() # Write new content output_path.write_text(new_content) # Run prettier to format the file npx_path = shutil.which("npx") if npx_path: try: # Safe: npx_path from shutil.which, output_path is controlled subprocess.run( # noqa: S603 [npx_path, "prettier", "--write", str(output_path)], check=True, capture_output=True, text=True, ) except subprocess.CalledProcessError as e: print(f"Warning: prettier formatting failed: {e.stderr}") else: print("Warning: npx not found, skipping prettier formatting") print(f"Generated {output_path}") if __name__ == "__main__": main() ``` -------------------------------------------------------------------------------- /tests/data/openfda/drugsfda_search.json: -------------------------------------------------------------------------------- ```json { "meta": { "results": { "skip": 0, "limit": 10, "total": 25 } }, "results": [ { "application_number": "BLA125514", "sponsor_name": "MERCK SHARP DOHME", "openfda": { "application_number": ["BLA125514"], "brand_name": ["KEYTRUDA"], "generic_name": ["PEMBROLIZUMAB"], "manufacturer_name": ["Merck Sharp & Dohme Corp."], "substance_name": ["PEMBROLIZUMAB"] }, "products": [ { "product_number": "001", "reference_drug": "Yes", "brand_name": "KEYTRUDA", "active_ingredients": [ { "name": "PEMBROLIZUMAB", "strength": "100MG/4ML" } ], "reference_standard": "Yes", "dosage_form": "INJECTION, SOLUTION", "route": "INTRAVENOUS", "marketing_status": "Prescription" } ], "submissions": [ { "submission_type": "BLA", "submission_number": "125514", "submission_status": "AP", "submission_status_date": "20140904", "submission_class_code": "BLA", "submission_class_code_description": "Biologic License Application" } ] } ] } ``` -------------------------------------------------------------------------------- /tests/tdd/variants/test_filters.py: -------------------------------------------------------------------------------- ```python """Tests for the filters module.""" import json import os from typing import Any import pytest from biomcp.variants.filters import filter_variants @pytest.fixture def braf_v600e_variants() -> list[dict[str, Any]]: """Load BRAF V600E test data.""" test_data_path = os.path.join( os.path.dirname(__file__), "../../data/myvariant/variants_full_braf_v600e.json", ) with open(test_data_path) as f: data = json.load(f) return data.get("hits", []) def test_filter_variants_civic_contributors(braf_v600e_variants): """Test filtering out civic.contributors path.""" # Verify that civic.contributors exists in original data variant = braf_v600e_variants[0] assert "civic" in variant assert "contributors" in variant["civic"] assert variant["civic"]["contributors"] is not None # Filter out civic.contributors filtered = filter_variants(braf_v600e_variants) # Verify civic.contributors is removed but civic section remains filtered_variant = filtered[0] assert "civic" in filtered_variant assert "contributors" not in filtered_variant["civic"] # Verify other civic data is preserved assert "id" in filtered_variant["civic"] assert filtered_variant["civic"]["id"] == variant["civic"]["id"] ``` -------------------------------------------------------------------------------- /tests/bdd/search_articles/test_autocomplete.py: -------------------------------------------------------------------------------- ```python import asyncio from pytest_bdd import given, parsers, scenarios, then, when from biomcp.articles.autocomplete import ( Concept, Entity, EntityRequest, autocomplete, ) scenarios("autocomplete.feature") @given( parsers.parse( 'I have a valid concept "{concept}" and a valid query "{query}"', ), target_fixture="entity_request", ) def entity_request(concept: Concept, query: str): return EntityRequest(concept=concept, query=query) @given( parsers.parse( 'I have a valid concept "{concept}" and an invalid query "{query}"', ), target_fixture="entity_request", ) def invalid_query_request(concept: Concept, query: str): return EntityRequest(concept=concept, query=query) @when( "I call the Pubtator Autocomplete API", target_fixture="entity", ) def entity(entity_request) -> Entity | None: return asyncio.run(autocomplete(request=entity_request)) @then(parsers.parse('the response entity_id should be "{expected_id}"')) def check_entity_id(entity, expected_id): assert entity.entity_id == expected_id @then(parsers.parse('the response concept should be "{concept}"')) def check_concept(entity, concept): assert entity.concept == concept @then("the response should be empty") def check_empty_response(entity): assert entity is None ``` -------------------------------------------------------------------------------- /src/biomcp/utils/gene_validator.py: -------------------------------------------------------------------------------- ```python """Gene symbol validation utilities.""" import re # Common gene symbol patterns GENE_SYMBOL_PATTERN = re.compile(r"^[A-Z][A-Z0-9-]*(\.[0-9]+)?$") # Known problematic or invalid gene symbols INVALID_GENES = { "INVALID", "UNKNOWN", "NULL", "NONE", "TEST", "INVALID_GENE_XYZ", } def is_valid_gene_symbol(gene: str | None) -> bool: """Validate if a string is a valid gene symbol. Args: gene: The gene symbol to validate Returns: True if the gene symbol appears valid, False otherwise Notes: - Gene symbols should start with a letter - Can contain letters, numbers, and hyphens - May have a version suffix (e.g., .1, .2) - Should be uppercase - Should not be in the invalid genes list """ if not gene: return False gene = gene.strip() # Check length constraints if len(gene) < 2 or len(gene) > 20: return False # Check against known invalid genes if gene.upper() in INVALID_GENES: return False # Check pattern return bool(GENE_SYMBOL_PATTERN.match(gene)) def sanitize_gene_symbol(gene: str) -> str: """Sanitize a gene symbol for API calls. Args: gene: The gene symbol to sanitize Returns: Sanitized gene symbol in uppercase with whitespace stripped """ return gene.strip().upper() ``` -------------------------------------------------------------------------------- /tests/bdd/search_articles/test_search.py: -------------------------------------------------------------------------------- ```python """Test steps for search_pubmed feature.""" from __future__ import annotations import asyncio import json from pytest_bdd import given, parsers, scenarios, then, when from biomcp.articles.search import ( PubmedRequest, search_articles, ) scenarios("search.feature") @given( parsers.parse('I build a query for "{gene}" "{disease}" "{variant}"'), target_fixture="query", ) def query(gene, disease, variant) -> PubmedRequest: return PubmedRequest( genes=[gene], diseases=[disease], variants=[variant], ) @when("I perform a search with that query", target_fixture="result") def result(query) -> list[dict]: text = asyncio.run(search_articles(query, output_json=True)) return json.loads(text) @then(parsers.parse('the response should contain the article "{pmid:d}"')) def step_impl(result: list[dict], pmid: int): pm_ids = [article["pmid"] for article in result] assert pmid in pm_ids, "pmid not found in {pm_ids}" @then( parsers.parse('the article "{pmid:d}" abstract should contain "{phrase}"'), ) def step_check_abstract(result: list[dict], pmid: int, phrase: str): for r in result: if r["pmid"] == pmid and r.get("abstract"): assert ( phrase in r["abstract"] ), f"Phrase '{phrase}' not found in article {pmid}'s abstract" return raise AssertionError(f"Article {pmid} not found or has no abstract") ``` -------------------------------------------------------------------------------- /src/biomcp/workers/worker.py: -------------------------------------------------------------------------------- ```python """Worker implementation for BioMCP.""" from fastapi import FastAPI, Response from fastapi.middleware.cors import CORSMiddleware from starlette.responses import JSONResponse from starlette.routing import Route from .. import mcp_app app = FastAPI(title="BioMCP Worker", version="0.1.10") # Add CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) streamable_app = mcp_app.streamable_http_app() # Add health endpoint to the streamable app before mounting async def health_check(request): return JSONResponse({"status": "healthy"}) health_route = Route("/health", health_check, methods=["GET"]) streamable_app.routes.append(health_route) app.mount("/", streamable_app) # Health endpoint is now added directly to the streamable_app above # Add OPTIONS endpoint for CORS preflight @app.options("/{path:path}") async def options_handler(path: str): """Handle CORS preflight requests.""" return Response( content="", status_code=204, headers={ "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "GET, POST, OPTIONS", "Access-Control-Allow-Headers": "*", "Access-Control-Max-Age": "86400", # 24 hours }, ) # Create a stub for create_worker_app to satisfy imports def create_worker_app() -> FastAPI: """Stub for create_worker_app to satisfy import in __init__.py.""" return app ``` -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- ```python """Pytest configuration and fixtures.""" import os from unittest.mock import AsyncMock, patch import pytest # Check if we should skip integration tests SKIP_INTEGRATION = os.environ.get("SKIP_INTEGRATION_TESTS", "").lower() in ( "true", "1", "yes", ) def pytest_configure(config): """Configure pytest with custom markers.""" config.addinivalue_line( "markers", "integration: marks tests as integration tests (deselect with '-m \"not integration\"')", ) def pytest_collection_modifyitems(config, items): """Modify test collection to handle integration tests.""" if SKIP_INTEGRATION: skip_integration = pytest.mark.skip( reason="Integration tests disabled via SKIP_INTEGRATION_TESTS env var" ) for item in items: if "integration" in item.keywords: item.add_marker(skip_integration) @pytest.fixture def mock_cbioportal_api(): """Mock cBioPortal API responses for testing.""" with patch( "biomcp.variants.cbioportal_search.CBioPortalSearchClient.get_gene_search_summary" ) as mock: # Return a mock summary mock.return_value = AsyncMock( gene="BRAF", total_mutations=1000, total_samples_tested=2000, mutation_frequency=50.0, hotspots=[ AsyncMock(amino_acid_change="V600E", count=800), AsyncMock(amino_acid_change="V600K", count=100), ], cancer_distribution=["Melanoma", "Colorectal Cancer"], study_count=10, ) yield mock ``` -------------------------------------------------------------------------------- /tests/bdd/get_variants/test_get.py: -------------------------------------------------------------------------------- ```python import json import shlex from pytest_bdd import given, parsers, scenarios, then from typer.testing import CliRunner from biomcp.cli.main import app # Link to the feature file scenarios("get.feature") runner = CliRunner() @given(parsers.parse('I run "{command}"'), target_fixture="cli_result") def cli_result(command): """ Run the given CLI command and return the parsed JSON output. The command is expected to include the '--json' flag. """ args = shlex.split(command)[1:] # remove the leading "biomcp" token result = runner.invoke(app, args) assert result.exit_code == 0, f"CLI command failed: {result.stderr}" return json.loads(result.stdout) def get_field_value_from_variant(variant, field_path): """ Retrieve a value from a variant dictionary using a simple dot-notation path. (This version does not support array indexing.) """ parts = field_path.split(".") value = variant for part in parts: value = value.get(part) if value is None: break return value @then( parsers.parse( 'at least one variant should have field "{field}" equal to "{expected}"' ) ) def variant_field_should_equal(cli_result, field, expected): """ Verify that at least one variant in the returned list has the specified field equal to the expected value. """ # cli_result is already a list of variant dicts. matching = [ v for v in cli_result if str(get_field_value_from_variant(v, field)) == expected ] assert ( matching ), f"No variant found with field '{field}' equal to '{expected}'" ``` -------------------------------------------------------------------------------- /tests/bdd/get_trials/test_get.py: -------------------------------------------------------------------------------- ```python import json import shlex from pytest_bdd import given, parsers, scenarios, then from typer.testing import CliRunner from biomcp.cli.main import app # Link to the feature file scenarios("get.feature") runner = CliRunner() @given(parsers.parse('I run "{command}"'), target_fixture="cli_result") def cli_result(command): """ Run the given CLI command and return the parsed JSON output. The command is expected to include the '--json' flag. """ # Remove the initial token ("biomcp") if present. args = shlex.split(command)[1:] result = runner.invoke(app, args) assert result.exit_code == 0, f"CLI command failed: {result.stderr}" return json.loads(result.stdout) def get_field_value(data, field_path): """ Access a nested dictionary value using a dot-notation path. Supports array notation like "locations[0]". """ parts = field_path.split(".") value = data for part in parts: if "[" in part and part.endswith("]"): # e.g. "locations[0]" base, index_str = part[:-1].split("[") index = int(index_str) value = value[base][index] else: value = value[part] return value @then(parsers.parse('the field "{field}" should equal "{expected}"')) def field_should_equal(cli_result, field, expected): """ Verify that the value at the specified dot-notation field equals the expected value. """ actual = get_field_value(cli_result, field) # Compare as strings for simplicity. assert ( str(actual) == expected ), f"Expected field '{field}' to equal '{expected}', but got '{actual}'" ``` -------------------------------------------------------------------------------- /tests/bdd/conftest.py: -------------------------------------------------------------------------------- ```python import pytest def _recursive_extract(current_value, key_path, path_index): """Recursively extract values based on the key path.""" if path_index >= len(key_path): if isinstance(current_value, list): yield from current_value else: yield current_value else: k = key_path[path_index] if isinstance(current_value, dict): next_value = current_value.get(k) if next_value is not None: yield from _recursive_extract( next_value, key_path, path_index + 1, ) elif isinstance(current_value, list): for item in current_value: if isinstance(item, dict): next_value = item.get(k) if next_value is not None: yield from _recursive_extract( next_value, key_path, path_index + 1, ) def iter_value(field_map: dict, data: dict | list, key: str): """Iterates through a nested structure, yielding all values encountered.""" if isinstance(data, dict): # Handle new format with cBioPortal summary hits = data["variants"] if "variants" in data else data.get("hits", []) else: hits = data key_path = field_map.get(key, [key]) # num = variant number for tracking each individual variant for num, hit in enumerate(hits, 1): for value in _recursive_extract(hit, key_path, 0): yield num, value @pytest.fixture(scope="module") def it() -> callable: return iter_value ``` -------------------------------------------------------------------------------- /tests/data/openfda/enforcement_detail.json: -------------------------------------------------------------------------------- ```json { "meta": { "results": { "skip": 0, "limit": 1, "total": 1 } }, "results": [ { "country": "United States", "city": "Princeton", "reason_for_recall": "Presence of N-Nitrosodimethylamine (NDMA) impurity above the acceptable daily intake limit", "address_1": "One Merck Drive", "address_2": "Building 5", "product_quantity": "5,432 bottles", "code_info": "Lot numbers: AB1234 (Exp 12/2024), CD5678 (Exp 01/2025), EF9012 (Exp 02/2025)", "center_classification_date": "20230615", "distribution_pattern": "Nationwide distribution to wholesalers and retail pharmacies in all 50 states", "state": "NJ", "product_description": "Valsartan Tablets USP, 160 mg, 90 count bottles, NDC 0378-5160-90", "report_date": "20230622", "classification": "Class II", "openfda": { "application_number": ["ANDA090802"], "brand_name": ["VALSARTAN"], "generic_name": ["VALSARTAN"], "manufacturer_name": ["Mylan Pharmaceuticals Inc."], "product_ndc": ["0378-5160"], "package_ndc": ["0378-5160-90"], "unii": ["80M03YXJ7I"], "spl_set_id": ["4b5c5f6d-7e8f-9g0h-1i2j-3k4l5m6n7o8p"] }, "more_code_info": "Manufacturing dates: January 2023 - March 2023", "recalling_firm": "Mylan Pharmaceuticals Inc.", "recall_number": "D-0001-2023", "initial_firm_notification": "Letter", "product_type": "Drugs", "event_id": "91234", "termination_date": "", "recall_initiation_date": "20230610", "postal_code": "08540-0004", "voluntary_mandated": "Voluntary: Firm Initiated", "status": "Ongoing" } ] } ``` -------------------------------------------------------------------------------- /tests/data/openfda/enforcement_search.json: -------------------------------------------------------------------------------- ```json { "meta": { "results": { "skip": 0, "limit": 10, "total": 45 } }, "results": [ { "country": "United States", "city": "Princeton", "reason_for_recall": "Presence of N-Nitrosodimethylamine (NDMA) impurity", "address_1": "One Merck Drive", "address_2": "", "product_quantity": "5,432 bottles", "code_info": "Lot numbers: AB1234, CD5678, EF9012", "center_classification_date": "20230615", "distribution_pattern": "Nationwide", "state": "NJ", "product_description": "Valsartan Tablets USP, 160 mg, 90 count bottles", "report_date": "20230622", "classification": "Class II", "openfda": { "application_number": ["ANDA090802"], "brand_name": ["VALSARTAN"], "generic_name": ["VALSARTAN"], "manufacturer_name": ["Mylan Pharmaceuticals Inc."] }, "recalling_firm": "Mylan Pharmaceuticals Inc.", "recall_number": "D-0001-2023", "initial_firm_notification": "Letter", "product_type": "Drugs", "event_id": "91234", "recall_initiation_date": "20230610", "postal_code": "08540", "voluntary_mandated": "Voluntary: Firm Initiated", "status": "Ongoing" }, { "country": "United States", "city": "New York", "reason_for_recall": "Contamination with foreign substance", "product_quantity": "10,000 units", "classification": "Class I", "product_description": "Metformin Hydrochloride Extended-Release Tablets, 500 mg", "report_date": "20230515", "recalling_firm": "Generic Pharma Corp", "recall_number": "D-0002-2023", "recall_initiation_date": "20230510", "status": "Completed" } ] } ``` -------------------------------------------------------------------------------- /src/biomcp/logging_filter.py: -------------------------------------------------------------------------------- ```python """Logging filter to suppress non-critical ASGI errors.""" import logging class ASGIErrorFilter(logging.Filter): """Filter out non-critical ASGI/Starlette middleware errors.""" def filter(self, record: logging.LogRecord) -> bool: """Return False to suppress the log record, True to allow it.""" # Check if this is an ASGI error we want to suppress if record.levelname == "ERROR": message = str(record.getMessage()) # Suppress known non-critical ASGI errors if "Exception in ASGI application" in message: return False if "AssertionError" in message and "http.response.body" in message: return False if ( "unhandled errors in a TaskGroup" in message and hasattr(record, "exc_info") and record.exc_info ): exc_type, exc_value, _ = record.exc_info if exc_type and "AssertionError" in str(exc_type): return False # Allow all other logs return True def setup_logging_filters(): """Set up logging filters to suppress non-critical errors.""" # Add filter to uvicorn error logger uvicorn_logger = logging.getLogger("uvicorn.error") uvicorn_logger.addFilter(ASGIErrorFilter()) # Add filter to uvicorn access logger uvicorn_access_logger = logging.getLogger("uvicorn.access") uvicorn_access_logger.addFilter(ASGIErrorFilter()) # Add filter to starlette logger starlette_logger = logging.getLogger("starlette") starlette_logger.addFilter(ASGIErrorFilter()) # Add filter to fastapi logger fastapi_logger = logging.getLogger("fastapi") fastapi_logger.addFilter(ASGIErrorFilter()) ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/drug_shortages_detail_helpers.py: -------------------------------------------------------------------------------- ```python """ Helper functions for formatting drug shortage details. """ from typing import Any def format_shortage_status(shortage: dict[str, Any]) -> list[str]: """Format status information for shortage detail.""" output = [] status = shortage.get("status", "Unknown") status_emoji = "🔴" if "current" in status.lower() else "🟢" output.append(f"{status_emoji} **Status**: {status}") return output def format_shortage_names(shortage: dict[str, Any]) -> list[str]: """Format drug names for shortage detail.""" output = [] if generic := shortage.get("generic_name"): output.append(f"**Generic Name**: {generic}") brands = shortage.get("brand_names") if brands and brands[0]: output.append(f"**Brand Names**: {', '.join(brands)}") return output def format_shortage_timeline(shortage: dict[str, Any]) -> list[str]: """Format timeline information for shortage detail.""" output = ["### Timeline"] if start_date := shortage.get("shortage_start_date"): output.append(f"**Shortage Started**: {start_date}") if resolution_date := shortage.get("resolution_date"): output.append(f"**Resolved**: {resolution_date}") elif estimated := shortage.get("estimated_resolution"): output.append(f"**Estimated Resolution**: {estimated}") else: output.append("**Estimated Resolution**: Unknown") return output def format_shortage_details_section(shortage: dict[str, Any]) -> list[str]: """Format details section for shortage detail.""" output = ["### Details"] if reason := shortage.get("reason"): output.append(f"**Reason for Shortage**:\n{reason}") if notes := shortage.get("notes"): from .utils import clean_text output.append(f"\n**Additional Notes**:\n{clean_text(notes)}") return output ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/exceptions.py: -------------------------------------------------------------------------------- ```python """Custom exceptions for OpenFDA integration.""" class OpenFDAError(Exception): """Base exception for OpenFDA-related errors.""" pass class OpenFDARateLimitError(OpenFDAError): """Raised when FDA API rate limit is exceeded.""" def __init__(self, message: str = "FDA API rate limit exceeded"): super().__init__(message) self.message = message class OpenFDAValidationError(OpenFDAError): """Raised when FDA response validation fails.""" def __init__(self, message: str = "Invalid FDA API response"): super().__init__(message) self.message = message class OpenFDAConnectionError(OpenFDAError): """Raised when connection to FDA API fails.""" def __init__(self, message: str = "Failed to connect to FDA API"): super().__init__(message) self.message = message class OpenFDANotFoundError(OpenFDAError): """Raised when requested resource is not found.""" def __init__(self, resource_type: str, resource_id: str): message = f"{resource_type} not found: {resource_id}" super().__init__(message) self.resource_type = resource_type self.resource_id = resource_id self.message = message class OpenFDATimeoutError(OpenFDAError): """Raised when FDA API request times out.""" def __init__(self, message: str = "FDA API request timeout"): super().__init__(message) self.message = message class OpenFDAInvalidParameterError(OpenFDAError): """Raised when invalid parameters are provided.""" def __init__(self, parameter: str, value: str, reason: str): message = ( f"Invalid parameter '{parameter}' with value '{value}': {reason}" ) super().__init__(message) self.parameter = parameter self.value = value self.reason = reason self.message = message ``` -------------------------------------------------------------------------------- /tests/bdd/fetch_articles/test_fetch.py: -------------------------------------------------------------------------------- ```python import json import shlex from pytest_bdd import given, parsers, scenarios, then from typer.testing import CliRunner from biomcp.cli.main import app scenarios("fetch.feature") runner = CliRunner() @given(parsers.parse('I run "{command}"'), target_fixture="cli_result") def cli_result(command): """Run the given CLI command and return the parsed JSON output.""" args = shlex.split(command)[1:] result = runner.invoke(app, args) return json.loads(result.stdout) @then("the JSON output should be a non-empty list") def check_non_empty_list(cli_result): """Check that the JSON output is a list with at least one article.""" assert isinstance(cli_result, list), "Expected JSON output to be a list" assert len(cli_result) > 0, "Expected at least one article in the output" @then("the first article's abstract should be populated") def check_abstract_populated(cli_result): """Check that the first article has a non-empty abstract.""" article = cli_result[0] abstract = article.get("abstract") assert abstract is not None, "Abstract field is missing" assert abstract.strip() != "", "Abstract field is empty" @then("the application should return an error") def step_impl(cli_result): assert cli_result == [ {"error": 'Error 400: {"detail":"Could not retrieve publications"}'} ] @then("the first article should have a DOI field") def check_doi_field(cli_result): """Check that the first article has a DOI field.""" article = cli_result[0] doi = article.get("doi") assert doi is not None, "DOI field is missing" assert doi.startswith("10."), f"Invalid DOI format: {doi}" @then("the source should be Europe PMC") def check_europe_pmc_source(cli_result): """Check that the article source is Europe PMC.""" article = cli_result[0] source = article.get("source") assert ( source == "Europe PMC" ), f"Expected source 'Europe PMC', got '{source}'" ``` -------------------------------------------------------------------------------- /src/biomcp/metrics_handler.py: -------------------------------------------------------------------------------- ```python """MCP handler for metrics collection.""" from typing import Annotated from biomcp.core import mcp_app from biomcp.metrics import get_all_metrics, get_metric_summary @mcp_app.tool() async def get_performance_metrics( metric_name: Annotated[ str | None, "Specific metric name to retrieve, or None for all metrics", ] = None, ) -> str: """Get performance metrics for BioMCP operations. Returns performance statistics including: - Request counts and success rates - Response time percentiles (p50, p95, p99) - Error rates and types - Domain-specific performance breakdown Parameters: metric_name: Optional specific metric to retrieve Returns: Formatted metrics report """ if metric_name: summary = await get_metric_summary(metric_name) if not summary: return f"No metrics found for '{metric_name}'" return _format_summary(summary) else: all_summaries = await get_all_metrics() if not all_summaries: return "No metrics collected yet" lines = ["# BioMCP Performance Metrics\n"] for name in sorted(all_summaries.keys()): summary = all_summaries[name] lines.append(f"## {name}") lines.append(_format_summary(summary)) lines.append("") return "\n".join(lines) def _format_summary(summary) -> str: """Format a metric summary for display.""" lines = [ f"- Total requests: {summary.count}", f"- Success rate: {(1 - summary.error_rate) * 100:.1f}%", f"- Errors: {summary.error_count}", "", "### Response Times", f"- Average: {summary.avg_duration * 1000:.1f}ms", f"- Min: {summary.min_duration * 1000:.1f}ms", f"- Max: {summary.max_duration * 1000:.1f}ms", f"- P50: {summary.p50_duration * 1000:.1f}ms", f"- P95: {summary.p95_duration * 1000:.1f}ms", f"- P99: {summary.p99_duration * 1000:.1f}ms", ] return "\n".join(lines) ``` -------------------------------------------------------------------------------- /scripts/check_docs_in_mkdocs.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 """Check that all markdown files in docs/ are referenced in mkdocs.yml.""" import sys from pathlib import Path import yaml # DEP004 def main(): """Check documentation files are in mkdocs.yml.""" docs_dir = Path(__file__).parent.parent / "docs" mkdocs_path = Path(__file__).parent.parent / "mkdocs.yml" # Load mkdocs.yml with open(mkdocs_path) as f: mkdocs_config = yaml.safe_load(f) # Extract all referenced files from nav referenced_files = set() def extract_files(nav_item, prefix=""): """Recursively extract file paths from nav structure.""" if isinstance(nav_item, dict): for _key, value in nav_item.items(): extract_files(value, prefix) elif isinstance(nav_item, list): for item in nav_item: extract_files(item, prefix) elif isinstance(nav_item, str) and nav_item.endswith(".md"): referenced_files.add(nav_item) extract_files(mkdocs_config.get("nav", [])) # Find all markdown files in docs/ all_md_files = set() for md_file in docs_dir.rglob("*.md"): # Get relative path from docs/ rel_path = md_file.relative_to(docs_dir) all_md_files.add(str(rel_path)) # Find unreferenced files unreferenced = all_md_files - referenced_files # Exclude some files that shouldn't be in nav exclude_patterns = { "CNAME", # GitHub pages config "README.md", # If exists } unreferenced = { f for f in unreferenced if not any(pattern in f for pattern in exclude_patterns) } if unreferenced: print( "The following documentation files are not referenced in mkdocs.yml:" ) for file in sorted(unreferenced): print(f" - {file}") print("\nPlease add them to the appropriate section in mkdocs.yml") return 1 else: print("All documentation files are referenced in mkdocs.yml ✓") return 0 if __name__ == "__main__": sys.exit(main()) ``` -------------------------------------------------------------------------------- /src/biomcp/cbioportal_helper.py: -------------------------------------------------------------------------------- ```python """Helper module for cBioPortal integration across tools. This module centralizes cBioPortal summary generation logic to avoid duplication. """ import logging logger = logging.getLogger(__name__) async def get_cbioportal_summary_for_genes( genes: list[str] | None, request_params: dict | None = None ) -> str | None: """Get cBioPortal summary for given genes. Args: genes: List of gene symbols to get summaries for request_params: Optional additional parameters for the request Returns: Formatted cBioPortal summary or None if unavailable """ if not genes: return None try: from biomcp.articles.search import PubmedRequest from biomcp.articles.unified import _get_cbioportal_summary # Create a request object for cBioPortal summary request = PubmedRequest(genes=genes) # Add any additional parameters if provided if request_params: for key, value in request_params.items(): if hasattr(request, key): setattr(request, key, value) cbioportal_summary = await _get_cbioportal_summary(request) return cbioportal_summary except Exception as e: logger.warning(f"Failed to get cBioPortal summary: {e}") return None async def get_variant_cbioportal_summary(gene: str | None) -> str | None: """Get cBioPortal summary for variant searches. Args: gene: Gene symbol to get summary for Returns: Formatted cBioPortal summary or None if unavailable """ if not gene: return None try: from biomcp.variants.cbioportal_search import ( CBioPortalSearchClient, format_cbioportal_search_summary, ) client = CBioPortalSearchClient() summary = await client.get_gene_search_summary(gene) if summary: return format_cbioportal_search_summary(summary) return None except Exception as e: logger.warning( f"Failed to get cBioPortal summary for variant search: {e}" ) return None ``` -------------------------------------------------------------------------------- /src/biomcp/utils/rate_limiter.py: -------------------------------------------------------------------------------- ```python """Simple rate limiting utilities for API calls.""" import asyncio import time from collections import defaultdict class RateLimiter: """Simple token bucket rate limiter.""" def __init__(self, rate: int = 10, per_seconds: int = 1): """Initialize rate limiter. Args: rate: Number of allowed requests per_seconds: Time window in seconds """ self.rate = rate self.per_seconds = per_seconds self.allowance: dict[str, float] = defaultdict(lambda: float(rate)) self.last_check: dict[str, float] = defaultdict(float) self._lock = asyncio.Lock() async def check_rate_limit( self, key: str = "default" ) -> tuple[bool, float | None]: """Check if request is allowed under rate limit. Args: key: Identifier for rate limit bucket Returns: Tuple of (allowed, wait_time_if_not_allowed) """ async with self._lock: current = time.time() time_passed = current - self.last_check[key] self.last_check[key] = current # Replenish tokens self.allowance[key] += time_passed * (self.rate / self.per_seconds) # Cap at maximum rate if self.allowance[key] > self.rate: self.allowance[key] = float(self.rate) # Check if request allowed if self.allowance[key] >= 1.0: self.allowance[key] -= 1.0 return True, None else: # Calculate wait time wait_time = (1.0 - self.allowance[key]) * ( self.per_seconds / self.rate ) return False, wait_time async def wait_if_needed(self, key: str = "default") -> None: """Wait if rate limited before allowing request.""" allowed, wait_time = await self.check_rate_limit(key) if not allowed and wait_time: await asyncio.sleep(wait_time) # Global rate limiter for cBioPortal API # Conservative: 5 requests per second cbioportal_limiter = RateLimiter(rate=5, per_seconds=1) ``` -------------------------------------------------------------------------------- /src/biomcp/articles/autocomplete.py: -------------------------------------------------------------------------------- ```python """Find entities for a given concept using the PUBTATOR API. Example URL: https://www.ncbi.nlm.nih.gov/research/pubtator3-api/entity/autocomplete/?query=BRAF """ from typing import Literal from pydantic import BaseModel, Field, RootModel from .. import http_client from ..constants import PUBTATOR3_BASE_URL Concept = Literal["variant", "chemical", "disease", "gene"] class EntityRequest(BaseModel): concept: Concept | None = None query: str limit: int = Field(default=1, ge=1, le=100) class Entity(BaseModel): entity_id: str = Field( alias="_id", examples=["@GENE_BRAF"], description="Text-based entity following @<biotype>_<n> format.", ) concept: Concept = Field( ..., alias="biotype", description="Entity label or concept type.", ) name: str = Field( ..., description="Preferred term of entity concept.", examples=[ "BRAF", "Adenocarcinoma of Lung", "Osimertinib", "EGFR L858R", ], ) match: str | None = Field( default=None, description="Reason for the entity match.", examples=["Multiple matches", "Matched on name <m>NAME</m>"], ) def __eq__(self, other) -> bool: return self.entity_id == other.entity_id class EntityList(RootModel): root: list[Entity] @property def first(self) -> Entity | None: return self.root[0] if self.root else None PUBTATOR3_AUTOCOMPLETE = f"{PUBTATOR3_BASE_URL}/entity/autocomplete/" async def autocomplete(request: EntityRequest) -> Entity | None: """Given a request of biotype and query, returns the best matching Entity. If API call fails or returns 0 results, then None is returned. Example Request: { "concept": "gene", "query": "BRAF" } Response: { "entity_id": "@GENE_BRAF", "biotype": "gene", "name": "BRAF", "match": "Matched on name <m>BRAF</m>" } """ response, _ = await http_client.request_api( url=PUBTATOR3_AUTOCOMPLETE, request=request, response_model_type=EntityList, domain="pubmed", ) return response.first if response else None ``` -------------------------------------------------------------------------------- /docs/reference/visual-architecture.md: -------------------------------------------------------------------------------- ```markdown # Visual Architecture Guide ## System Architecture BioMCP follows a clean architecture pattern with three main layers: ### 1. User Interface Layer - **biomcp CLI**: Command-line interface for direct usage - **Claude Desktop**: AI assistant integration via MCP - **Python SDK**: Programmatic access for custom applications ### 2. BioMCP Core Layer - **MCP Server**: Handles Model Context Protocol communication - **Cache System**: Smart caching for API responses - **Router**: Unified query routing across data sources ### 3. Data Source Layer - **PubMed/PubTator3**: Biomedical literature and annotations - **ClinicalTrials.gov**: Clinical trial registry - **MyVariant.info**: Genetic variant database - **cBioPortal**: Cancer genomics data - **NCI CTS API**: National Cancer Institute trial data - **BioThings APIs**: Gene, drug, and disease information ## Data Flow 1. **Request Processing**: - User sends query via CLI, Claude, or SDK - BioMCP server receives and validates request - Router determines appropriate data source(s) 2. **Caching Strategy**: - Check cache for existing results - If cache miss, fetch from external API - Store results with appropriate TTL - Return formatted results to user 3. **Response Formatting**: - Raw API data is normalized - Domain-specific enrichment applied - Results formatted for consumption ## Architecture References - [Detailed Architecture Diagrams](architecture-diagrams.md) - [Quick Architecture Reference](quick-architecture.md) ## Key Architecture Patterns ### Domain Separation Each data source has its own module with dedicated: - Search functions - Result parsers - Error handlers - Cache strategies ### Unified Interface All domains expose consistent methods: - `search()`: Query for multiple results - `fetch()`: Get detailed record by ID - Common parameter names across domains ### Smart Caching - API responses cached 15-30 minutes - Cache keys include query parameters - Automatic cache invalidation on errors - Per-domain cache configuration ### Error Resilience - Graceful degradation when APIs unavailable - Specific error messages for troubleshooting - Automatic retries with exponential backoff - Fallback to cached data when possible ``` -------------------------------------------------------------------------------- /docs/faq-condensed.md: -------------------------------------------------------------------------------- ```markdown # FAQ - Quick Answers ## Getting Started **Q: What is BioMCP?** A: A unified interface to biomedical databases (PubMed, ClinicalTrials.gov, MyVariant, etc.) for researchers and AI assistants. **Q: Do I need API keys?** A: No for basic use. Yes for: NCI trials (cancer-specific), AlphaGenome (variant predictions), enhanced cBioPortal features. **Q: How do I install it?** A: `uv tool install biomcp` (recommended) or `pip install biomcp-python` ## Common Issues **Q: "Command not found" after installation** A: Restart terminal, or use full path: `~/.local/bin/biomcp` **Q: No results for gene search** A: Use official symbols (ERBB2 not HER2). Check at [genenames.org](https://www.genenames.org) **Q: Location search not working** A: Must provide coordinates: `--latitude 42.3601 --longitude -71.0589` **Q: Why does the AI use 'think' first?** A: Required for systematic analysis. Improves search quality and prevents missed connections. ## Search Tips **Q: How to search variant notations?** A: Use OR syntax: `--keyword "V600E|p.V600E|c.1799T>A"` **Q: Include/exclude preprints?** A: Included by default. Use `--no-preprints` to exclude. **Q: Search multiple databases?** A: Use unified search: `search(query="gene:BRAF AND disease:melanoma")` ## Data Questions **Q: How current is the data?** A: Daily updates for PubMed/trials, weekly for BioThings, varies for cBioPortal. **Q: ClinicalTrials.gov vs NCI?** A: CT.gov = comprehensive, NCI = cancer-focused with biomarker filters (needs API key). **Q: What's MSI/TMB/VAF?** A: MSI = Microsatellite Instability, TMB = Tumor Mutational Burden, VAF = Variant Allele Frequency ## Technical **Q: Rate limits?** A: ~3 req/sec without keys, higher with keys. NCI = 1000/day with key. **Q: Cache issues?** A: Clear with: `rm -rf ~/.biomcp/cache` **Q: Which Python version?** A: 3.10+ required ## Quick References **Common Gene Aliases:** - HER2 → ERBB2 - PD-L1 → CD274 - c-MET → MET **City Coordinates:** - NYC: 40.7128, -74.0060 - Boston: 42.3601, -71.0589 - LA: 34.0522, -118.2437 **Trial Status:** - RECRUITING = Currently enrolling - ACTIVE_NOT_RECRUITING = Ongoing - COMPLETED = Finished ## Getting Help 1. Check this FAQ 2. Read [Troubleshooting](troubleshooting.md) 3. Search [GitHub Issues](https://github.com/genomoncology/biomcp/issues) 4. Ask with version info: `biomcp --version` ``` -------------------------------------------------------------------------------- /src/biomcp/variants/filters.py: -------------------------------------------------------------------------------- ```python """Module for filtering variant data based on paths.""" from typing import Any def _get_nested_value(data: dict[str, Any], path: str) -> Any: """Get a nested value from a dictionary using dot notation path.""" keys = path.split(".") current = data for key in keys[:-1]: if not isinstance(current, dict) or key not in current: return None current = current[key] return current def _delete_nested_path(data: dict[str, Any], path: str) -> None: """Delete a nested path from a dictionary using dot notation.""" keys = path.split(".") current = data for key in keys[:-1]: if not isinstance(current, dict) or key not in current: return current = current[key] if isinstance(current, dict) and keys[-1] in current: del current[keys[-1]] def _deep_copy_dict(data: dict[str, Any]) -> dict[str, Any]: """Create a deep copy of a dictionary, handling nested dicts and lists.""" result: dict[str, Any] = {} for key, value in data.items(): if isinstance(value, dict): result[key] = _deep_copy_dict(value) elif isinstance(value, list): result[key] = [ _deep_copy_dict(item) if isinstance(item, dict) else item for item in value ] else: result[key] = value return result def filter_variants(variants: list[dict[str, Any]]) -> list[dict[str, Any]]: """ Filter out specified paths from variant data. Args: variants: List of variant dictionaries from MyVariant.info API Returns: List of variant dictionaries with specified paths removed """ # Create a deep copy to avoid modifying the input filtered_variants = [] for variant in variants: # Create a deep copy of the variant filtered_variant = _deep_copy_dict(variant) # Remove specified paths for path in PATH_FILTERS: _delete_nested_path(filtered_variant, path) filtered_variants.append(filtered_variant) return filtered_variants PATH_FILTERS = [ "civic.contributors", "civic.molecularProfiles", "dbsnp.gene.rnas", "dbnsfp.clinvar", # duplicate of root-level clinvar "civic.lastAcceptedRevisionEvent", "civic.lastSubmittedRevisionEvent", "civic.creationActivity", ] ``` -------------------------------------------------------------------------------- /.github/workflows/on-release-main.yml: -------------------------------------------------------------------------------- ```yaml name: release-main on: release: types: [published] branches: [main] jobs: set-version: runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v5 - name: Export tag id: vars run: echo tag=${GITHUB_REF#refs/*/} >> $GITHUB_OUTPUT if: ${{ github.event_name == 'release' }} - name: Update project version run: | sed -i "s/^version = \".*\"/version = \"$RELEASE_VERSION\"/" pyproject.toml env: RELEASE_VERSION: ${{ steps.vars.outputs.tag }} if: ${{ github.event_name == 'release' }} - name: Upload updated pyproject.toml uses: actions/upload-artifact@v4 with: name: pyproject-toml path: pyproject.toml test: runs-on: ubuntu-latest needs: [set-version] steps: - name: Check out uses: actions/checkout@v5 - name: Download updated pyproject.toml uses: actions/download-artifact@v5 with: name: pyproject-toml - name: Set up Python uses: actions/setup-python@v6 with: python-version: "3.12" - name: Install uv uses: astral-sh/setup-uv@v7 with: version: "0.4.29" - name: Install dependencies run: uv sync --group dev - name: Run tests run: uv run python -m pytest tests --cov --cov-config=pyproject.toml --cov-report=xml publish: runs-on: ubuntu-latest needs: [set-version, test] permissions: id-token: write environment: release steps: - name: Check out uses: actions/checkout@v5 - name: Set up the environment uses: ./.github/actions/setup-python-env - name: Download updated pyproject.toml uses: actions/download-artifact@v5 with: name: pyproject-toml - name: Build package run: uvx --from build pyproject-build --installer uv - name: Check package run: uvx twine check dist/* - name: Publish package uses: pypa/gh-action-pypi-publish@release/v1 with: verbose: true deploy-docs: needs: publish runs-on: ubuntu-latest steps: - name: Check out uses: actions/checkout@v5 - name: Set up the environment uses: ./.github/actions/setup-python-env - name: Deploy documentation run: uv run mkdocs gh-deploy --force ``` -------------------------------------------------------------------------------- /tests/data/openfda/drugsfda_detail.json: -------------------------------------------------------------------------------- ```json { "meta": { "results": { "skip": 0, "limit": 1, "total": 1 } }, "results": [ { "application_number": "BLA125514", "sponsor_name": "MERCK SHARP DOHME", "openfda": { "application_number": ["BLA125514"], "brand_name": ["KEYTRUDA"], "generic_name": ["PEMBROLIZUMAB"], "manufacturer_name": ["Merck Sharp & Dohme Corp."], "substance_name": ["PEMBROLIZUMAB"], "product_ndc": ["0006-3026-02", "0006-3029-02"], "spl_set_id": ["c0e2de11-29e0-48a1-92f0-d9cb4dd56b15"], "unii": ["DPT0O3T46P"] }, "products": [ { "product_number": "001", "reference_drug": "Yes", "brand_name": "KEYTRUDA", "active_ingredients": [ { "name": "PEMBROLIZUMAB", "strength": "100MG/4ML" } ], "reference_standard": "Yes", "dosage_form": "INJECTION, SOLUTION", "route": "INTRAVENOUS", "marketing_status": "Prescription" }, { "product_number": "002", "reference_drug": "Yes", "brand_name": "KEYTRUDA", "active_ingredients": [ { "name": "PEMBROLIZUMAB", "strength": "50MG/VIAL" } ], "reference_standard": "Yes", "dosage_form": "INJECTION, POWDER, LYOPHILIZED, FOR SOLUTION", "route": "INTRAVENOUS", "marketing_status": "Prescription" } ], "submissions": [ { "submission_type": "BLA", "submission_number": "125514", "submission_status": "AP", "submission_status_date": "20140904", "review_priority": "P", "submission_class_code": "BLA", "submission_class_code_description": "Biologic License Application", "application_docs": [ { "id": "52674", "url": "https://www.accessdata.fda.gov/drugsatfda_docs/label/2014/125514lbl.pdf", "date": "20140905", "type": "Label" } ] }, { "submission_type": "SUPPL", "submission_number": "109", "submission_status": "AP", "submission_status_date": "20230316", "submission_class_code": "SUPPL", "submission_class_code_description": "Supplement" } ] } ] } ``` -------------------------------------------------------------------------------- /src/biomcp/exceptions.py: -------------------------------------------------------------------------------- ```python """Custom exceptions for BioMCP.""" from typing import Any class BioMCPError(Exception): """Base exception for all BioMCP errors.""" def __init__(self, message: str, details: dict[str, Any] | None = None): super().__init__(message) self.message = message self.details = details or {} class BioMCPSearchError(BioMCPError): """Base exception for search-related errors.""" pass class InvalidDomainError(BioMCPSearchError): """Raised when an invalid domain is specified.""" def __init__(self, domain: str, valid_domains: list[str]): message = f"Unknown domain: {domain}. Valid domains are: {', '.join(valid_domains)}" super().__init__( message, {"domain": domain, "valid_domains": valid_domains} ) class InvalidParameterError(BioMCPSearchError): """Raised when invalid parameters are provided.""" def __init__(self, parameter: str, value: Any, expected: str): message = f"Invalid value for parameter '{parameter}': {value}. Expected: {expected}" super().__init__( message, {"parameter": parameter, "value": value, "expected": expected}, ) class SearchExecutionError(BioMCPSearchError): """Raised when a search fails to execute.""" def __init__(self, domain: str, error: Exception): message = f"Failed to execute search for domain '{domain}': {error!s}" super().__init__( message, {"domain": domain, "original_error": str(error)} ) class ResultParsingError(BioMCPSearchError): """Raised when results cannot be parsed.""" def __init__(self, domain: str, error: Exception): message = f"Failed to parse results for domain '{domain}': {error!s}" super().__init__( message, {"domain": domain, "original_error": str(error)} ) class QueryParsingError(BioMCPError): """Raised when a query cannot be parsed.""" def __init__(self, query: str, error: Exception): message = f"Failed to parse query '{query}': {error!s}" super().__init__( message, {"query": query, "original_error": str(error)} ) class ThinkingError(BioMCPError): """Raised when sequential thinking encounters an error.""" def __init__(self, thought_number: int, error: str): message = f"Error in thought {thought_number}: {error}" super().__init__( message, {"thought_number": thought_number, "error": error} ) ``` -------------------------------------------------------------------------------- /docs/stylesheets/announcement.css: -------------------------------------------------------------------------------- ```css /* Announcement Banner Styles */ .announcement-banner { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 12px; padding: 2rem; margin: 2rem 0; box-shadow: 0 10px 30px rgba(0, 0, 0, 0.15); position: relative; overflow: hidden; } .announcement-banner::before { content: ""; position: absolute; top: -50%; right: -50%; width: 200%; height: 200%; background: radial-gradient( circle, rgba(255, 255, 255, 0.1) 0%, transparent 70% ); animation: shimmer 3s infinite; } @keyframes shimmer { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } } .announcement-content { position: relative; z-index: 1; } .announcement-banner h2 { color: white !important; margin-top: 0 !important; font-size: 1.8rem; display: flex; align-items: center; gap: 0.5rem; } .announcement-banner .badge-new { background: #ff6b6b; color: white; padding: 0.2rem 0.6rem; border-radius: 20px; font-size: 0.8rem; font-weight: bold; animation: pulse 2s infinite; } @keyframes pulse { 0%, 100% { transform: scale(1); } 50% { transform: scale(1.05); } } .announcement-banner p { color: rgba(255, 255, 255, 0.95) !important; font-size: 1.1rem; margin: 1rem 0; } .announcement-banner .announcement-features { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; margin: 1.5rem 0; } .announcement-banner .feature-item { background: rgba(255, 255, 255, 0.1); padding: 0.8rem; border-radius: 8px; backdrop-filter: blur(10px); border: 1px solid rgba(255, 255, 255, 0.2); } .announcement-banner .feature-item strong { color: white; display: block; margin-bottom: 0.3rem; } .announcement-banner .feature-item span { color: rgba(255, 255, 255, 0.85); font-size: 0.9rem; } .announcement-banner .cta-button { display: inline-block; background: white; color: #667eea !important; padding: 0.8rem 2rem; border-radius: 50px; text-decoration: none !important; font-weight: bold; margin-top: 1rem; transition: all 0.3s ease; box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2); } .announcement-banner .cta-button:hover { transform: translateY(-2px); box-shadow: 0 6px 20px rgba(0, 0, 0, 0.25); background: #f8f9fa; } .announcement-banner .cta-button::after { content: " →"; font-size: 1.2rem; transition: transform 0.3s ease; display: inline-block; } .announcement-banner .cta-button:hover::after { transform: translateX(5px); } ``` -------------------------------------------------------------------------------- /tests/integration/test_simple.py: -------------------------------------------------------------------------------- ```python """Simple test to verify APIs work without Mastermind key.""" import asyncio from biomcp.articles.preprints import EuropePMCClient from biomcp.variants.external import ExternalVariantAggregator async def test_preprints(): """Test that preprint search works.""" print("Testing Europe PMC preprint search...") client = EuropePMCClient() # Search for a common term results = await client.search("cancer") if results: print(f"✓ Found {len(results)} preprints") print(f" First: {results[0].title[:60]}...") return True else: print("✗ No results found") return False async def test_variants_without_mastermind(): """Test variant aggregator without Mastermind API key.""" print("\nTesting variant aggregator without Mastermind key...") # Create aggregator aggregator = ExternalVariantAggregator() # Test with a variant - even if individual sources fail, # the aggregator should handle it gracefully result = await aggregator.get_enhanced_annotations( "BRAF V600E", include_tcga=True, include_1000g=True ) print("✓ Aggregator completed without errors") print(f" Variant ID: {result.variant_id}") print(f" TCGA data: {'Found' if result.tcga else 'Not found'}") print( f" 1000G data: {'Found' if result.thousand_genomes else 'Not found'}" ) print( f" Errors: {result.error_sources if result.error_sources else 'None'}" ) # Key test: aggregator should complete successfully if True: # Always passes now without Mastermind print("✓ Mastermind correctly skipped without API key") return True else: print("✗ Mastermind handling incorrect") return False async def main(): """Run all tests.""" print("=" * 60) print("Testing BioMCP features without external API keys") print("=" * 60) # Test preprints preprint_ok = await test_preprints() # Test variants variant_ok = await test_variants_without_mastermind() print("\n" + "=" * 60) print("Summary:") print(f" Preprint search: {'✓ PASS' if preprint_ok else '✗ FAIL'}") print(f" Variant aggregator: {'✓ PASS' if variant_ok else '✗ FAIL'}") print("=" * 60) if preprint_ok and variant_ok: print("\n✓ All features work without external API keys!") return 0 else: print("\n✗ Some features failed") return 1 if __name__ == "__main__": exit_code = asyncio.run(main()) exit(exit_code) ``` -------------------------------------------------------------------------------- /tests/tdd/variants/test_links.py: -------------------------------------------------------------------------------- ```python """Tests for the links module.""" import json import os from typing import Any import pytest from biomcp.variants.links import inject_links @pytest.fixture def braf_variants() -> list[dict[str, Any]]: """Load BRAF V600 test data.""" test_data_path = os.path.join( os.path.dirname(__file__), "../../data/myvariant/variants_part_braf_v600_multiple.json", ) with open(test_data_path) as f: return json.load(f) def test_inject_links_braf_variants(braf_variants): """Test URL injection for BRAF variants data.""" result = inject_links(braf_variants) # Test first variant (no CIViC) variant0 = result[0] assert ( variant0["dbsnp"]["url"] == f"https://www.ncbi.nlm.nih.gov/snp/{variant0['dbsnp']['rsid']}" ) assert ( variant0["clinvar"]["url"] == f"https://www.ncbi.nlm.nih.gov/clinvar/variation/{variant0['clinvar']['variant_id']}/" ) assert ( variant0["cosmic"]["url"] == f"https://cancer.sanger.ac.uk/cosmic/mutation/overview?id={variant0['cosmic']['cosmic_id']}" ) assert "civic" not in variant0 or "url" not in variant0["civic"] assert ( variant0["url"]["ensembl"] == f"https://ensembl.org/Homo_sapiens/Variation/Explore?v={variant0['dbsnp']['rsid']}" ) assert variant0["url"]["ucsc_genome_browser"].startswith( "https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position=chr7:" ) assert ( variant0["url"]["hgnc"] == "https://www.genenames.org/data/gene-symbol-report/#!/symbol/BRAF" ) # Test second variant (with CIViC) variant1 = result[1] assert ( variant1["civic"]["url"] == f"https://civicdb.org/variants/{variant1['civic']['id']}/summary" ) # Test empty list assert inject_links([]) == [] # Test insertion (no REF) insertion = { "chrom": "7", "vcf": {"position": "123", "alt": "A"}, "dbnsfp": {"genename": "GENE1"}, } result = inject_links([insertion])[0] assert ( result["url"]["ucsc_genome_browser"] == "https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position=chr7:123-124" ) # Test deletion (no ALT) deletion = { "chrom": "7", "vcf": {"position": "123", "ref": "AAA"}, "dbnsfp": {"genename": "GENE1"}, } result = inject_links([deletion])[0] assert ( result["url"]["ucsc_genome_browser"] == "https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position=chr7:123-126" ) ``` -------------------------------------------------------------------------------- /docs/genomoncology.md: -------------------------------------------------------------------------------- ```markdown # **GenomOncology: Powering the Future of Precision Medicine** ## **Who We Are** GenomOncology is a leading healthcare technology company dedicated to transforming precision medicine through innovative genomic analysis solutions. We connect complex genomic data and actionable clinical insights, enabling healthcare providers to deliver personalized treatment strategies for cancer patients. ## **Our Commitment to Open Healthcare** We believe in the power of open source systems to further the impact of precision medicine. Through the BioMCP initiative we hope to engage the healthcare community in this open-access ecosystem designed to accelerate innovation in precision medicine. By evolving this open framework, we're moving to create a more collaborative, efficient, and transparent healthcare environment. ## **Our Precision Oncology Platform** Today, our proprietary knowledge management system, known as the Precision Oncology Platform (POP), serves as the backbone of our solutions, continuously aggregating and curating the latest genomic research, clinical trials, and treatment guidelines. This system: - Processes and harmonizes data from 40+ scientific and clinical sources - Updates weekly to incorporate the newest research findings - Utilizes advanced NLP to extract meaningful insights from unstructured text - Maintains a comprehensive database of 25,000+ variant-drug associations ## **Real-World Impact** Our technology currently powers precision medicine programs at: - 120+ hospitals and cancer centers - 15 academic medical centers - 8 commercial reference laboratories - 10+ pharmaceutical research programs Processing over 100,000 genomic profiles monthly, our solutions have helped match thousands of patients to targeted therapies and clinical trials, significantly improving outcomes. ## **Join Us In The Next Phase of Transforming Healthcare** By contributing to the BioMCP ecosystem, we're inviting developers to collaborate with us in creating the next generation of precision medicine tools. Whether you're looking to build applications that leverage genomic data, create integrations with existing healthcare systems, or explore novel approaches to biomarker analysis, GenomOncology provides the technological foundation to bring your ideas to life. ## **Get Started** Ready to explore what's possible with GenomOncology and BioMCP? - Clone our repositories on GitHub - Register for API access - Join our developer community Together, we can accelerate precision medicine through open collaboration and innovation. --- _GenomOncology: Transforming data into treatment decisions_ ``` -------------------------------------------------------------------------------- /src/biomcp/cli/biomarkers.py: -------------------------------------------------------------------------------- ```python """CLI commands for biomarker search.""" import asyncio from typing import Annotated import typer from ..biomarkers import search_biomarkers from ..biomarkers.search import format_biomarker_results from ..integrations.cts_api import CTSAPIError, get_api_key_instructions biomarker_app = typer.Typer( no_args_is_help=True, help="Search biomarkers used in clinical trial eligibility criteria", ) @biomarker_app.command("search") def search_biomarkers_cli( name: Annotated[ str | None, typer.Argument( help="Biomarker name to search for (e.g., 'PD-L1', 'EGFR mutation')" ), ] = None, biomarker_type: Annotated[ str | None, typer.Option( "--type", help="Type of biomarker ('reference_gene' or 'branch')", ), ] = None, page_size: Annotated[ int, typer.Option( "--page-size", help="Number of results per page", min=1, max=100, ), ] = 20, page: Annotated[ int, typer.Option( "--page", help="Page number", min=1, ), ] = 1, api_key: Annotated[ str | None, typer.Option( "--api-key", help="NCI API key (overrides NCI_API_KEY env var)", envvar="NCI_API_KEY", ), ] = None, ) -> None: """ Search for biomarkers used in clinical trial eligibility criteria. Note: Biomarker data availability may be limited in CTRP. Results focus on biomarkers referenced in trial eligibility criteria. For detailed variant annotations, use 'biomcp variant search' with MyVariant.info. Examples: # Search by biomarker name biomcp biomarker search "PD-L1" # Search by type biomcp biomarker search --type reference_gene # Search for specific biomarker biomcp biomarker search "EGFR mutation" """ try: results = asyncio.run( search_biomarkers( name=name, biomarker_type=biomarker_type, page_size=page_size, page=page, api_key=api_key, ) ) output = format_biomarker_results(results) typer.echo(output) except CTSAPIError as e: if "API key required" in str(e): typer.echo(get_api_key_instructions()) else: typer.echo(f"Error: {e}", err=True) raise typer.Exit(1) from e except Exception as e: typer.echo(f"Unexpected error: {e}", err=True) raise typer.Exit(1) from e ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/constants.py: -------------------------------------------------------------------------------- ```python """ Constants for OpenFDA API integration. """ # OpenFDA API Base OPENFDA_BASE_URL = "https://api.fda.gov" # Drug endpoints OPENFDA_DRUG_EVENTS_URL = f"{OPENFDA_BASE_URL}/drug/event.json" OPENFDA_DRUG_LABELS_URL = f"{OPENFDA_BASE_URL}/drug/label.json" OPENFDA_DRUG_ENFORCEMENT_URL = f"{OPENFDA_BASE_URL}/drug/enforcement.json" OPENFDA_DRUGSFDA_URL = f"{OPENFDA_BASE_URL}/drug/drugsfda.json" # Device endpoints OPENFDA_DEVICE_EVENTS_URL = f"{OPENFDA_BASE_URL}/device/event.json" OPENFDA_DEVICE_CLASSIFICATION_URL = ( f"{OPENFDA_BASE_URL}/device/classification.json" ) OPENFDA_DEVICE_RECALL_URL = f"{OPENFDA_BASE_URL}/device/recall.json" # API limits OPENFDA_DEFAULT_LIMIT = 25 OPENFDA_MAX_LIMIT = 100 OPENFDA_RATE_LIMIT_NO_KEY = 40 # requests per minute without key OPENFDA_RATE_LIMIT_WITH_KEY = 240 # requests per minute with key # Genomic device filters - product codes for genomic/diagnostic devices GENOMIC_DEVICE_PRODUCT_CODES = [ "OOI", # Next Generation Sequencing Oncology Panel Test System "PQP", # Nucleic Acid Based In Vitro Diagnostic Devices "OYD", # Gene Mutation Detection System "NYE", # DNA Sequencer "OEO", # Hereditary or Somatic Variant Detection System "QIN", # Tumor Profiling Test "QDI", # Companion Diagnostic "PTA", # Cancer Predisposition Risk Assessment System ] # Common adverse event search fields ADVERSE_EVENT_FIELDS = [ "patient.drug.medicinalproduct", "patient.drug.openfda.brand_name", "patient.drug.openfda.generic_name", "patient.drug.drugindication", "patient.reaction.reactionmeddrapt", ] # Label search fields LABEL_FIELDS = [ "openfda.brand_name", "openfda.generic_name", "indications_and_usage", "boxed_warning", "warnings_and_precautions", "adverse_reactions", "drug_interactions", ] # Device event search fields DEVICE_FIELDS = [ "device.brand_name", "device.generic_name", "device.manufacturer_d_name", "device.openfda.device_name", "device.openfda.medical_specialty_description", ] # Disclaimer text OPENFDA_DISCLAIMER = ( "⚠️ **FDA Data Notice**: Information from openFDA API. " "Not for clinical decision-making. Adverse events don't prove causation. " "Data may be incomplete or delayed. Consult healthcare professionals and " "official FDA sources at fda.gov for medical decisions." ) OPENFDA_SHORTAGE_DISCLAIMER = ( "🚨 **Critical Warning**: Drug shortage information is time-sensitive. " "Always verify current availability with FDA Drug Shortages Database at " "https://www.accessdata.fda.gov/scripts/drugshortages/ before making " "supply chain or treatment decisions." ) ``` -------------------------------------------------------------------------------- /tests/tdd/utils/test_rate_limiter.py: -------------------------------------------------------------------------------- ```python """Tests for rate limiting utilities.""" import asyncio import time import pytest from biomcp.utils.rate_limiter import RateLimiter class TestRateLimiter: """Test rate limiting functionality.""" @pytest.mark.asyncio async def test_basic_rate_limiting(self): """Test basic rate limiting behavior.""" # Create limiter with 2 requests per second limiter = RateLimiter(rate=2, per_seconds=1) # First two requests should be allowed allowed1, wait1 = await limiter.check_rate_limit() assert allowed1 is True assert wait1 is None allowed2, wait2 = await limiter.check_rate_limit() assert allowed2 is True assert wait2 is None # Third request should be denied with wait time allowed3, wait3 = await limiter.check_rate_limit() assert allowed3 is False assert wait3 is not None assert wait3 > 0 @pytest.mark.asyncio async def test_rate_limit_replenishment(self): """Test that tokens replenish over time.""" # Create limiter with 1 request per second limiter = RateLimiter(rate=1, per_seconds=1) # Use the token allowed1, _ = await limiter.check_rate_limit() assert allowed1 is True # Should be denied immediately allowed2, wait2 = await limiter.check_rate_limit() assert allowed2 is False # Wait for replenishment await asyncio.sleep(1.1) # Should be allowed now allowed3, _ = await limiter.check_rate_limit() assert allowed3 is True @pytest.mark.asyncio async def test_multiple_keys(self): """Test rate limiting with different keys.""" limiter = RateLimiter(rate=1, per_seconds=1) # Use token for key1 allowed1, _ = await limiter.check_rate_limit("key1") assert allowed1 is True # key2 should still have tokens allowed2, _ = await limiter.check_rate_limit("key2") assert allowed2 is True # key1 should be limited allowed3, wait3 = await limiter.check_rate_limit("key1") assert allowed3 is False assert wait3 is not None @pytest.mark.asyncio async def test_wait_if_needed(self): """Test the wait_if_needed helper.""" limiter = RateLimiter(rate=1, per_seconds=1) # First call should not wait start = time.time() await limiter.wait_if_needed() elapsed = time.time() - start assert elapsed < 0.1 # Second call should wait start = time.time() await limiter.wait_if_needed() elapsed = time.time() - start assert elapsed >= 0.9 # Should wait approximately 1 second ``` -------------------------------------------------------------------------------- /src/biomcp/utils/metrics.py: -------------------------------------------------------------------------------- ```python """Metrics and monitoring utilities.""" import asyncio import logging import time from collections.abc import Callable from functools import wraps from typing import Any, TypeVar, cast logger = logging.getLogger(__name__) T = TypeVar("T") def track_api_call(api_name: str): """Track API call metrics. Args: api_name: Name of the API being called Returns: Decorator function """ def decorator(func: Callable[..., T]) -> Callable[..., T]: @wraps(func) async def async_wrapper(*args: Any, **kwargs: Any) -> T: start_time = time.time() try: result = await func(*args, **kwargs) # type: ignore[misc] duration = time.time() - start_time logger.info( f"{api_name} call succeeded", extra={ "api": api_name, "duration": duration, "status": "success", }, ) return result except Exception as e: duration = time.time() - start_time logger.error( f"{api_name} call failed: {e}", extra={ "api": api_name, "duration": duration, "status": "error", "error_type": type(e).__name__, }, ) raise @wraps(func) def sync_wrapper(*args: Any, **kwargs: Any) -> T: start_time = time.time() try: result = func(*args, **kwargs) duration = time.time() - start_time logger.info( f"{api_name} call succeeded", extra={ "api": api_name, "duration": duration, "status": "success", }, ) return result except Exception as e: duration = time.time() - start_time logger.error( f"{api_name} call failed: {e}", extra={ "api": api_name, "duration": duration, "status": "error", "error_type": type(e).__name__, }, ) raise # Return appropriate wrapper based on function type if asyncio.iscoroutinefunction(func): return cast(Callable[..., T], async_wrapper) else: return cast(Callable[..., T], sync_wrapper) return decorator ``` -------------------------------------------------------------------------------- /tests/tdd/trials/test_getter.py: -------------------------------------------------------------------------------- ```python from biomcp.trials.getter import Module, get_trial, modules async def test_get_protocol(anyio_backend): markdown = await get_trial("NCT04280705", Module.PROTOCOL) assert markdown.startswith("Url: https://clinicaltrials.gov/study/") assert len(markdown) > 10000 # 10370 on 2025-03-23 async def test_get_locations(anyio_backend): markdown = await get_trial("NCT04280705", Module.LOCATIONS) starts_with = """Url: https://clinicaltrials.gov/study/NCT04280705 # Protocol Section """ assert markdown.startswith(starts_with) assert "University of California San Francisco" in markdown assert len(markdown) > 12000 # 12295 on 2025-03-23 async def test_get_references(anyio_backend): markdown = await get_trial("NCT04280705", Module.REFERENCES) assert "# Protocol Section" in markdown assert "## References Module" in markdown assert len(markdown) > 0 async def test_get_outcomes(anyio_backend): markdown = await get_trial("NCT04280705", Module.OUTCOMES) assert "# Protocol Section" in markdown assert ( "## Outcomes Module" in markdown or "## Results Sections" in markdown ) assert len(markdown) > 0 async def test_invalid_nct_id(anyio_backend): markdown = await get_trial("NCT99999999") assert "NCT number NCT99999999 not found" in markdown def test_all_modules_exist(): # Verify all modules are defined assert "Protocol" in modules assert "Locations" in modules assert "References" in modules assert "Outcomes" in modules # Verify protocol module contains critical sections protocol_sections = modules[Module.PROTOCOL] assert "IdentificationModule" in protocol_sections assert "StatusModule" in protocol_sections assert "DescriptionModule" in protocol_sections async def test_cli_default_module_functionality(anyio_backend): # Test directly with both explicit Protocol and None (which should use Protocol) markdown_with_protocol = await get_trial("NCT04280705", Module.PROTOCOL) assert len(markdown_with_protocol) > 10000 # In a real CLI context, the default would be set at the CLI level # This test ensures the Protocol module is valid for that purpose assert "Protocol Section" in markdown_with_protocol async def test_json_output(anyio_backend): # Test JSON output format json_output = await get_trial( "NCT04280705", Module.PROTOCOL, output_json=True ) assert json_output.startswith("{") assert "URL" in json_output assert "NCT04280705" in json_output async def test_error_handling_json_output(anyio_backend): # Test error handling with JSON output json_output = await get_trial( "NCT99999999", Module.PROTOCOL, output_json=True ) assert "error" in json_output assert "NCT99999999" in json_output ``` -------------------------------------------------------------------------------- /wrangler.toml: -------------------------------------------------------------------------------- ```toml name = "biomcp-worker" main = "src/biomcp/workers/worker_entry_stytch.js" compatibility_date = "2025-04-28" [vars] # Environment variables for the worker # These can be overridden in several ways: # 1. In the Cloudflare dashboard under Workers & Pages > your-worker > Settings > Variables # 2. Using wrangler CLI: wrangler secret put REMOTE_MCP_SERVER_URL # 3. During local development: wrangler dev --var REMOTE_MCP_SERVER_URL="http://localhost:8000" # 4. In your CI/CD pipeline using environment variables with the format CF_REMOTE_MCP_SERVER_URL REMOTE_MCP_SERVER_URL = "http://localhost:8000" # Replace with your MCP server URL in production # Stytch OAuth Configuration # Replace these placeholder values with your actual Stytch credentials # For development, use test credentials from https://stytch.com/dashboard # For production, use production credentials and api.stytch.com instead of test.stytch.com STYTCH_PROJECT_ID = "project-test-xxxxxxxxxxxx" # Replace with your Stytch Project ID STYTCH_SECRET = "secret-test-xxxxxxxxxxxx" # Replace with your Stytch Secret (use wrangler secret for production) STYTCH_PUBLIC_TOKEN = "public-token-test-xxxxxxxxxxxx" # Replace with your Stytch Public Token STYTCH_API_URL = "https://test.stytch.com/v1" # Use https://api.stytch.com/v1 for production STYTCH_OAUTH_URL = "https://test.stytch.com/v1/public/oauth/google/start" # Update for production # Debug mode - set to true for development, false for production DEBUG = false # JWT Secret for signing tokens - use a strong, unique secret in production # For production, set this as a secret: wrangler secret put JWT_SECRET JWT_SECRET = "replace-with-a-strong-secret-key" # BigQuery variables # For production, set these as secrets or environment variables: # wrangler secret put BQ_PROJECT_ID # wrangler secret put BQ_DATASET # wrangler secret put BQ_SA_KEY_JSON BQ_PROJECT_ID = "your-project-id" # Replace with your actual project ID in production BQ_DATASET = "your_dataset_name" # Replace with your actual dataset in production BQ_TABLE="worker_logs" # Sensitive variables should be stored in the Cloudflare dashboard under Workers & Pages > your-worker > Settings > Secrets # OR you can declare them using npx wrangler secret put BQ_SA_KEY_JSON # Potential secrets: # BQ_SA_KEY_JSON # STYTCH_SECRET # Note: The ability to allow plaintext connections is now configured in the Cloudflare dashboard # under Security settings for your Worker [build] command = "" [triggers] crons = [] [observability.logs] enabled = true # KV namespace for storing OAuth tokens and state # Create your KV namespace with: wrangler kv:namespace create OAUTH_KV # Then replace the ID below with your namespace ID [[kv_namespaces]] binding = "OAUTH_KV" id = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" # Replace with your KV namespace ID ``` -------------------------------------------------------------------------------- /docs/concepts/01-what-is-biomcp.md: -------------------------------------------------------------------------------- ```markdown # What is BioMCP? BioMCP is an open-source implementation of the Model Context Protocol (MCP) designed for biomedical research. It connects AI assistants to specialized biomedical databases, enabling natural language access to complex scientific data. [](https://www.youtube.com/watch?v=bKxOWrWUUhM) ## The Bridge to Biomedical Data BioMCP provides AI assistants with direct access to specialized biomedical databases that aren't available through general web search. Built on Anthropic's Model Context Protocol standard, it creates a toolbox that enables natural language queries across multiple scientific data sources. ## Connected Data Sources - **PubMed/PubTator3**: 30M+ research articles with entity recognition for genes, diseases, drugs, and variants - **ClinicalTrials.gov**: 400K+ clinical trials searchable by condition, location, phase, and eligibility - **MyVariant.info**: Comprehensive variant annotations with clinical significance - **cBioPortal**: Cancer genomics data automatically integrated with searches - **BioThings APIs**: Real-time gene, drug, and disease information - **NCI CTS API**: Enhanced cancer trial search with biomarker filtering - **AlphaGenome**: Variant effect predictions using Google DeepMind's AI ## How Does It Transform Research? What makes BioMCP particularly powerful is its conversational nature. A researcher might begin with a simple question about a disease, then naturally progress to exploring related clinical trials, and finally investigate genetic variants that affect treatment efficacy—all within a single, flowing conversation. The system remembers context throughout the interaction, allowing for natural follow-up questions and a research experience that mirrors how scientists actually work. Instead of requiring researchers to master complex query languages for each database, BioMCP translates natural language into the precise syntax each system requires. ## Why This Matters BioMCP represents a significant advancement in making specialized biomedical knowledge accessible. For researchers and clinicians, it means spending less time wrestling with complex database interfaces and more time advancing their work. For the broader field of AI in healthcare, it demonstrates how specialized knowledge domains can be made accessible through conversation. As both AI assistants (synchronous conversation partners) and AI agents ( autonomous systems working toward goals over time) continue to evolve, tools like BioMCP will be essential in connecting these systems to the specialized knowledge they need to deliver meaningful insights in complex domains. By open-sourcing BioMCP, we're inviting the community to build upon this foundation, creating more powerful and accessible tools for biomedical research and ultimately accelerating the pace of scientific discovery. ``` -------------------------------------------------------------------------------- /tests/tdd/variants/test_alphagenome.py: -------------------------------------------------------------------------------- ```python """Tests for AlphaGenome integration.""" from unittest.mock import patch import pytest from biomcp.variants.alphagenome import predict_variant_effects @pytest.mark.asyncio async def test_predict_variant_effects_no_api_key(): """Test that missing API key returns helpful error message.""" with patch.dict("os.environ", {}, clear=True): result = await predict_variant_effects( chromosome="chr7", position=140753336, reference="A", alternate="T", ) assert "AlphaGenome API key required" in result assert "https://deepmind.google.com/science/alphagenome" in result assert "ALPHAGENOME_API_KEY" in result @pytest.mark.asyncio async def test_predict_variant_effects_not_installed(): """Test that missing AlphaGenome package returns installation instructions or API error.""" # Since AlphaGenome might be installed in test environments, we need to test both cases # We'll set a dummy API key and check what error we get import os original_key = os.environ.get("ALPHAGENOME_API_KEY") try: os.environ["ALPHAGENOME_API_KEY"] = "test-key" result = await predict_variant_effects( chromosome="chr7", position=140753336, reference="A", alternate="T", skip_cache=True, # Skip cache to ensure fresh results ) # The function should either: # 1. Handle ImportError if AlphaGenome is not installed # 2. Return API error if AlphaGenome is installed but API key is invalid # 3. Return a prediction failure for other errors assert any([ "AlphaGenome not installed" in result, "AlphaGenome prediction failed" in result, "API key not valid" in result, # This can happen with invalid test keys ]) if "AlphaGenome not installed" in result: assert "git clone" in result assert "pip install" in result finally: # Restore original key if original_key is None: os.environ.pop("ALPHAGENOME_API_KEY", None) else: os.environ["ALPHAGENOME_API_KEY"] = original_key @pytest.mark.asyncio async def test_predict_variant_effects_basic_parameters(): """Test that function accepts the expected parameters.""" # This tests the function interface without requiring AlphaGenome with patch.dict("os.environ", {}, clear=True): # Test with all parameters result = await predict_variant_effects( chromosome="chrX", position=12345, reference="G", alternate="C", interval_size=500_000, tissue_types=["UBERON:0002367", "UBERON:0001157"], ) # Should get API key error (not import error), proving parameters were accepted assert "AlphaGenome API key required" in result ``` -------------------------------------------------------------------------------- /example_scripts/mcp_integration.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env -S uv --quiet run --script # /// script # requires-python = ">=3.11" # dependencies = [ # "mcp", # ] # /// # Scripts to reproduce this page: # https://biomcp.org/mcp_integration/ import asyncio from mcp.client.session import ClientSession from mcp.client.stdio import StdioServerParameters, stdio_client from mcp.types import TextContent async def check_server(): # Run with pypi package using `uv` not `uvx` server_params = StdioServerParameters( command="uv", args=["run", "--with", "biomcp-python", "biomcp", "run"], ) # # Run with local code # server_params = StdioServerParameters( # command="python", # args=["-m", "biomcp", "run"], # ) async with ( stdio_client(server_params) as (read, write), ClientSession(read, write) as session, ): await session.initialize() # list prompts prompts = await session.list_prompts() print("Available prompts:", prompts) # list resources resources = await session.list_resources() print("Available resources:", resources) # list tools tool_result = await session.list_tools() tools = tool_result.tools print("Available tools:", tools) assert len(tools) == 13 # 3 core tools + 10 individual tools # IMPORTANT: Always use think tool first! think_result = await session.call_tool( "think", { "thought": "Planning to analyze variant rs113488022 for BRAF gene...", "thoughtNumber": 1, "totalThoughts": 2, "nextThoughtNeeded": True, }, ) assert ( think_result.isError is False ), f"Think error: {think_result.content}" # Now fetch variant details using unified fetch tool tool_name = "fetch" tool_args = {"domain": "variant", "id_": "rs113488022"} result = await session.call_tool(tool_name, tool_args) assert result.isError is False, f"Error: {result.content}" # --- Assertions --- # 1. Check the call was successful (not an error) assert ( result.isError is False ), f"Tool call resulted in error: {result.content}" # 2. Check there is content assert result.content is not None assert len(result.content) >= 1 # 3. Check the type of the first content block content_block = result.content[0] assert isinstance(content_block, TextContent) markdown_output = content_block.text # print(markdown_output) assert isinstance(markdown_output, str) assert "rs113488022" in markdown_output assert "BRAF" in markdown_output assert "Pathogenic" in markdown_output print(f"Successfully called tool '{tool_name}' with args {tool_args}") if __name__ == "__main__": asyncio.run(check_server()) ``` -------------------------------------------------------------------------------- /src/biomcp/variants/cancer_types.py: -------------------------------------------------------------------------------- ```python """Cancer type configuration for gene-specific studies.""" # Gene to cancer type keyword mapping # These keywords are used to filter relevant studies from cBioPortal GENE_CANCER_KEYWORDS = { "BRAF": [ "skcm", # melanoma "thca", # thyroid "coad", # colorectal "lung", "glioma", # brain "hairy_cell", # hairy cell leukemia ], "KRAS": [ "coad", # colorectal "paad", # pancreatic "lung", "stad", # stomach "coadread", # colorectal adenocarcinoma "ampca", # ampullary carcinoma ], "TP53": [ "brca", # breast "ov", # ovarian "lung", "hnsc", # head/neck "lgg", # lower grade glioma "gbm", # glioblastoma "blca", # bladder "lihc", # liver ], "EGFR": [ "lung", "nsclc", # non-small cell lung cancer "gbm", # glioblastoma "hnsc", # head/neck ], "PIK3CA": [ "brca", # breast "hnsc", # head/neck "coad", # colorectal "ucec", # endometrial ], "PTEN": [ "prad", # prostate "gbm", # glioblastoma "ucec", # endometrial "brca", # breast ], "APC": [ "coad", # colorectal "coadread", "stad", # stomach ], "VHL": [ "rcc", # renal cell carcinoma "ccrcc", # clear cell RCC "kirc", # kidney clear cell ], "RB1": [ "rbl", # retinoblastoma "sclc", # small cell lung cancer "blca", # bladder ], "BRCA1": [ "brca", # breast "ov", # ovarian "prad", # prostate "paad", # pancreatic ], "BRCA2": [ "brca", # breast "ov", # ovarian "prad", # prostate "paad", # pancreatic ], "ALK": [ "lung", "nsclc", # non-small cell lung cancer "alcl", # anaplastic large cell lymphoma "nbl", # neuroblastoma ], "MYC": [ "burkitt", # Burkitt lymphoma "dlbcl", # diffuse large B-cell lymphoma "mm", # multiple myeloma "nbl", # neuroblastoma ], "NRAS": [ "mel", # melanoma "skcm", "thca", # thyroid "aml", # acute myeloid leukemia ], "KIT": [ "gist", # gastrointestinal stromal tumor "mel", # melanoma "aml", # acute myeloid leukemia ], } # Default keywords for genes not in the mapping DEFAULT_CANCER_KEYWORDS = ["msk", "tcga", "metabric", "dfci", "broad"] # Maximum number of studies to query per gene MAX_STUDIES_PER_GENE = 20 # Maximum mutations to process per study MAX_MUTATIONS_PER_STUDY = 5000 def get_cancer_keywords(gene: str) -> list[str]: """Get cancer type keywords for a given gene. Args: gene: Gene symbol (e.g., "BRAF") Returns: List of cancer type keywords to search for """ return GENE_CANCER_KEYWORDS.get(gene.upper(), DEFAULT_CANCER_KEYWORDS) ``` -------------------------------------------------------------------------------- /src/biomcp/cli/main.py: -------------------------------------------------------------------------------- ```python import importlib.metadata from typing import Annotated import typer from .articles import article_app from .biomarkers import biomarker_app from .diseases import disease_app from .health import health_app from .interventions import intervention_app from .openfda import openfda_app from .organizations import organization_app from .server import run_server from .trials import trial_app from .variants import variant_app # --- Get version from installed package metadata --- try: __version__ = importlib.metadata.version("biomcp-python") except importlib.metadata.PackageNotFoundError: __version__ = "unknown" # Fallback if package not installed properly # --- Callback for --version option --- def version_callback(value: bool): if value: typer.echo(f"biomcp version: {__version__}") raise typer.Exit() # --- Main Typer App --- app = typer.Typer( help="BioMCP: Biomedical Model Context Protocol", no_args_is_help=True, # Add a callback to handle top-level options like --version # This callback itself doesn't do much, but allows defining eager options callback=lambda: None, ) app.add_typer( trial_app, name="trial", no_args_is_help=True, ) app.add_typer( article_app, name="article", no_args_is_help=True, ) app.add_typer( variant_app, name="variant", no_args_is_help=True, ) app.add_typer( health_app, name="health", no_args_is_help=True, ) app.add_typer( organization_app, name="organization", no_args_is_help=True, ) app.add_typer( intervention_app, name="intervention", no_args_is_help=True, ) app.add_typer( biomarker_app, name="biomarker", no_args_is_help=True, ) app.add_typer( disease_app, name="disease", no_args_is_help=True, ) app.add_typer( openfda_app, name="openfda", no_args_is_help=True, ) # --- Add --version Option using Annotation --- # We add this directly to the app's callback invocation signature via annotation # Note: This relies on Typer magic linking Annotated options in the callback signature # This approach is cleaner than adding it to every subcommand. @app.callback() def main_callback( version: Annotated[ bool | None, # Allows the option to not be present typer.Option( "--version", # The flag name callback=version_callback, # Function to call when flag is used is_eager=True, # Process this option before any commands help="Show the application's version and exit.", ), ] = None, # Default value ): """ BioMCP main application callback. Handles global options like --version. """ # The actual logic is in version_callback due to is_eager=True pass # --- Add Explicit 'version' Command --- @app.command() def version(): """ Display the installed biomcp version. """ typer.echo(f"biomcp version: {__version__}") # Directly expose run_server as the 'run' command with all its options app.command("run")(run_server) if __name__ == "__main__": app() ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/drug_shortages_helpers.py: -------------------------------------------------------------------------------- ```python """ Helper functions for drug shortage search to reduce complexity. """ from datetime import datetime from typing import Any def matches_drug_filter(shortage: dict[str, Any], drug: str | None) -> bool: """Check if shortage matches drug name filter.""" if not drug: return True drug_lower = drug.lower() generic = shortage.get("generic_name", "").lower() brands = [b.lower() for b in shortage.get("brand_names", [])] return drug_lower in generic or any(drug_lower in b for b in brands) def matches_status_filter( shortage: dict[str, Any], status: str | None ) -> bool: """Check if shortage matches status filter.""" if not status: return True status_lower = status.lower() shortage_status = shortage.get("status", "").lower() if status_lower == "current": return "current" in shortage_status elif status_lower == "resolved": return "resolved" in shortage_status return False def matches_category_filter( shortage: dict[str, Any], therapeutic_category: str | None ) -> bool: """Check if shortage matches therapeutic category filter.""" if not therapeutic_category: return True cat_lower = therapeutic_category.lower() shortage_cat = shortage.get("therapeutic_category", "").lower() return cat_lower in shortage_cat def filter_shortages( shortages: list[dict[str, Any]], drug: str | None, status: str | None, therapeutic_category: str | None, ) -> list[dict[str, Any]]: """Filter shortage list based on criteria.""" filtered = [] for shortage in shortages: if not matches_drug_filter(shortage, drug): continue if not matches_status_filter(shortage, status): continue if not matches_category_filter(shortage, therapeutic_category): continue filtered.append(shortage) return filtered def format_shortage_search_header( drug: str | None, status: str | None, therapeutic_category: str | None, last_updated: str | None, ) -> list[str]: """Format header for shortage search results.""" output = [] # Add last updated time if last_updated: try: updated_dt = datetime.fromisoformat(last_updated) output.append( f"*Last Updated: {updated_dt.strftime('%Y-%m-%d %H:%M')}*\n" ) except (ValueError, TypeError): pass if drug: output.append(f"**Drug**: {drug}") if status: output.append(f"**Status Filter**: {status}") if therapeutic_category: output.append(f"**Category**: {therapeutic_category}") return output def format_cache_timestamp(data: dict[str, Any]) -> str | None: """Format cache timestamp from data.""" last_updated = data.get("last_updated") or data.get("_fetched_at") if not last_updated: return None try: updated_dt = datetime.fromisoformat(last_updated) return f"*Data Updated: {updated_dt.strftime('%Y-%m-%d %H:%M')}*\n" except (ValueError, TypeError): return None ``` -------------------------------------------------------------------------------- /src/biomcp/core.py: -------------------------------------------------------------------------------- ```python """Core module for BioMCP containing shared resources.""" from contextlib import asynccontextmanager from enum import Enum from typing import Any from mcp.server.fastmcp import FastMCP from mcp.server.fastmcp.utilities.logging import get_logger from .logging_filter import setup_logging_filters # Set up logger first logger = get_logger(__name__) # Set up logging filters to suppress non-critical ASGI errors setup_logging_filters() # Define a lifespan function for startup tasks @asynccontextmanager async def lifespan(mcp): """Lifespan context manager for startup/shutdown tasks.""" # Startup try: from .prefetch import start_prefetching await start_prefetching() except Exception as e: # Don't fail startup if prefetching fails logger.warning(f"Prefetching failed: {e}") yield # Shutdown (if needed) # Initialize the MCP app with lifespan # Note: stateless_http=True is needed for proper streamable HTTP support mcp_app = FastMCP( name="BioMCP - Biomedical Model Context Protocol Server", lifespan=lifespan, stateless_http=True, # Enable stateless HTTP for streamable transport ) class StrEnum(str, Enum): def __str__(self): return self.value @classmethod def _missing_(cls, value): if isinstance(value, str): for member in cls: if member.lower() == value.lower(): return member m = member.lower().replace(" ", "_") v = value.lower().replace(" ", "_") if m == v: return member return None class PublicationState(StrEnum): """Publication state of an article.""" PREPRINT = "preprint" PEER_REVIEWED = "peer_reviewed" UNKNOWN = "unknown" def ensure_list(value: Any, split_strings: bool = False) -> list[Any]: """ Convert a value to a list if it's not already. This is particularly useful for handling inputs from LLMs that might provide comma-separated strings instead of proper lists. Args: value: The value to convert to a list split_strings: If True, splits string values by comma and strips whitespace. If False, wraps the string in a list without splitting. Returns: A list containing the value(s) - If value is None, returns an empty list - If value is a string and split_strings is True, splits by comma and strips whitespace - If value is a string and split_strings is False, wraps it in a list - If value is already a list, returns it unchanged - For other types, wraps them in a list """ if value is None: return [] if isinstance(value, str) and split_strings: # Split by comma and strip whitespace return [item.strip() for item in value.split(",")] if isinstance(value, list): return value # For any other type, wrap it in a list return [value] # Set httpx logger to warn level only httpx_logger = get_logger("httpx") httpx_logger.setLevel("WARN") # Set main logger level logger.setLevel("INFO") ``` -------------------------------------------------------------------------------- /src/biomcp/utils/mutation_filter.py: -------------------------------------------------------------------------------- ```python """Mutation filtering utilities.""" import re from collections.abc import Sequence from typing import Protocol class MutationHitProtocol(Protocol): """Protocol for mutation hit objects.""" protein_change: str class MutationFilter: """Filter mutations based on specific mutation or pattern.""" def __init__( self, specific_mutation: str | None = None, pattern: str | None = None ): """Initialize the filter. Args: specific_mutation: Exact mutation to match (e.g., "V600E") pattern: Pattern to match (e.g., "V600*" for any V600 mutation) """ self.specific_mutation = specific_mutation self.pattern = pattern def matches(self, protein_change: str) -> bool: """Check if a protein change matches the filter criteria. Args: protein_change: The protein change to check Returns: True if matches, False otherwise """ if not protein_change: return False if self.specific_mutation: return protein_change == self.specific_mutation if self.pattern: return self._matches_pattern(protein_change) # No filter specified, match all return True def _matches_pattern(self, protein_change: str) -> bool: """Check if protein change matches pattern. Args: protein_change: The protein change to check Returns: True if matches pattern, False otherwise """ if not self.pattern: return False if self.pattern.endswith("*"): # Wildcard pattern (e.g., "V600*" matches "V600E", "V600K", etc.) prefix = self.pattern[:-1] return protein_change.startswith(prefix) # Try regex match try: # Escape special regex characters except * escaped_pattern = re.escape(self.pattern).replace(r"\*", ".*") return bool(re.match(f"^{escaped_pattern}$", protein_change)) except re.error: # Fallback to simple prefix match return protein_change.startswith(self.pattern) def filter_mutations( self, mutations: Sequence[MutationHitProtocol] ) -> list[MutationHitProtocol]: """Filter a list of mutations. Args: mutations: List of mutation objects with protein_change attribute Returns: Filtered list of mutations """ if not self.specific_mutation and not self.pattern: return list(mutations) return [mut for mut in mutations if self.matches(mut.protein_change)] def __str__(self) -> str: """String representation of the filter.""" if self.specific_mutation: return f"MutationFilter(specific={self.specific_mutation})" elif self.pattern: return f"MutationFilter(pattern={self.pattern})" else: return "MutationFilter(no_filter)" def __repr__(self) -> str: """Detailed representation of the filter.""" return f"MutationFilter(specific_mutation={self.specific_mutation!r}, pattern={self.pattern!r})" ``` -------------------------------------------------------------------------------- /docs/apis/overview.md: -------------------------------------------------------------------------------- ```markdown # API Reference Overview BioMCP provides multiple interfaces for programmatic access to biomedical data. This reference covers the Python SDK, MCP protocol implementation, and HTTP API endpoints. ## Available APIs ### 1. Python SDK The Python SDK provides async/await interfaces for all BioMCP functionality: - **Client API**: High-level client for all domains - **Domain-specific APIs**: Specialized interfaces for articles, trials, variants - **Streaming API**: For real-time data processing - **Batch API**: For bulk operations See [Python SDK Reference](python-sdk.md) for detailed documentation. ### 2. MCP Protocol BioMCP implements the Model Context Protocol for AI assistant integration: - **24 specialized tools** for biomedical research - **Unified search** across all domains - **Sequential thinking** for complex queries - **Streaming responses** for large datasets See [MCP Tools Reference](../user-guides/02-mcp-tools-reference.md) for implementation details. ### 3. HTTP REST API When running in HTTP mode, BioMCP exposes RESTful endpoints: - **Search endpoints** for each domain - **Fetch endpoints** for detailed records - **Health monitoring** endpoints - **WebSocket support** for streaming See [Transport Protocol Guide](../developer-guides/04-transport-protocol.md) for endpoint documentation. ## Common Patterns ### Authentication Most endpoints work without authentication. API keys enable enhanced features: ```python # Python SDK client = BioMCPClient( nci_api_key="your-key", alphagenome_api_key="your-key" ) # HTTP API headers = { "X-NCI-API-Key": "your-key", "X-AlphaGenome-API-Key": "your-key" } ``` ### Error Handling All APIs use consistent error codes: | Code | Meaning | Action | | ---- | ------------ | ------------------ | | 400 | Bad Request | Check parameters | | 401 | Unauthorized | Check API key | | 404 | Not Found | Verify ID exists | | 429 | Rate Limited | Retry with backoff | | 500 | Server Error | Retry later | ### Pagination Standard pagination across all APIs: ```python # Python SDK results = await client.search( domain="article", page=1, page_size=20 ) # HTTP API GET /api/articles?page=1&page_size=20 ``` ### Response Formats All APIs support multiple response formats: - **JSON**: Default, structured data - **JSONL**: Streaming line-delimited JSON - **Markdown**: Human-readable formatting - **CSV**: Tabular data export ## Rate Limits | API | Without Key | With Key | | ------------------ | ----------- | ------------ | | PubMed/PubTator3 | 3 req/sec | 10 req/sec | | ClinicalTrials.gov | 50 req/min | 50 req/min | | BioThings | 3 req/sec | 10 req/sec | | NCI | N/A | 1000 req/day | | AlphaGenome | N/A | 100 req/day | ## Next Steps - [Python SDK Reference](python-sdk.md) - Detailed Python API documentation - [MCP Tools Reference](../user-guides/02-mcp-tools-reference.md) - MCP implementation details - [Transport Protocol Guide](../developer-guides/04-transport-protocol.md) - REST endpoint documentation - [Error Codes Reference](error-codes.md) - Complete error code listing ``` -------------------------------------------------------------------------------- /example_scripts/python_sdk.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env -S uv --quiet run --script # /// script # requires-python = ">=3.11" # dependencies = [ # "biomcp-python", # ] # /// # Scripts to reproduce this page: # https://biomcp.org/python_sdk/ import asyncio import json from biomcp.trials.search import ( RecruitingStatus, TrialPhase, TrialQuery, search_trials, ) from biomcp.variants.getter import get_variant from biomcp.variants.search import VariantQuery, search_variants async def find_pathogenic_tp53(): # noinspection PyTypeChecker query = VariantQuery(gene="TP53", significance="pathogenic", size=5) # Get results as Markdown (default) json_output_str = await search_variants(query, output_json=True) data = json.loads(json_output_str) assert len(data) == 5 for item in data: clinvar = item.get("clinvar") for rcv in clinvar.get("rcv", []): assert "pathogenic" in rcv["clinical_significance"].lower() async def get_braf_v600e_details(): variant_id = "chr7:g.140453136A>T" # BRAF V600E variant # Get results as JSON string json_output_str = await get_variant(variant_id, output_json=True) data = json.loads(json_output_str) # Process the variant data assert data, "No data returned for BRAF V600E variant" variant = data[0] clinvar = variant.get("clinvar", {}) cosmic = variant.get("cosmic", {}) docm = variant.get("docm", {}) # Verify key variant details assert clinvar.get("gene", {}).get("symbol") == "BRAF" assert clinvar.get("chrom") == "7" assert clinvar.get("cytogenic") == "7q34" assert cosmic.get("cosmic_id") == "COSM476" assert docm.get("aa_change") == "p.V600E" # Verify HGVS coding variants hgvs_coding = clinvar.get("hgvs", {}).get("coding", []) assert len(hgvs_coding) >= 13 assert "NM_004333.6:c.1799T>A" in hgvs_coding async def find_melanoma_trials(): query = TrialQuery( conditions=["Melanoma"], interventions=["Pembrolizumab"], recruiting_status=RecruitingStatus.OPEN, phase=TrialPhase.PHASE3, ) # Get results as JSON string json_output_str = await search_trials(query, output_json=True) data = json.loads(json_output_str) # Verify we got results assert data, "No trials found" assert len(data) >= 2, "Expected at least 2 melanoma trials" # Verify first trial details (NCT05727904) trial1 = data[0] assert trial1["NCT Number"] == "NCT05727904" assert "lifileucel" in trial1["Study Title"].lower() assert trial1["Study Status"] == "RECRUITING" assert trial1["Phases"] == "PHASE3" assert int(trial1["Enrollment"]) == 670 assert "Melanoma" in trial1["Conditions"] assert "Pembrolizumab" in trial1["Interventions"] # Verify second trial details (NCT06697301) trial2 = data[1] assert trial2["NCT Number"] == "NCT06697301" assert "EIK1001" in trial2["Study Title"] assert trial2["Study Status"] == "RECRUITING" assert "PHASE3" in trial2["Phases"] assert int(trial2["Enrollment"]) == 740 assert trial2["Conditions"] == "Advanced Melanoma" def run(): asyncio.run(find_pathogenic_tp53()) asyncio.run(get_braf_v600e_details()) asyncio.run(find_melanoma_trials()) if __name__ == "__main__": run() ``` -------------------------------------------------------------------------------- /src/biomcp/genes/getter.py: -------------------------------------------------------------------------------- ```python """Gene information retrieval from MyGene.info.""" import json import logging from typing import Annotated from pydantic import Field from ..integrations import BioThingsClient from ..render import to_markdown logger = logging.getLogger(__name__) async def get_gene( gene_id_or_symbol: str, output_json: bool = False, ) -> str: """ Get gene information from MyGene.info. Args: gene_id_or_symbol: Gene ID (Entrez, Ensembl) or symbol (e.g., "TP53", "7157") output_json: Return as JSON instead of markdown Returns: Gene information as markdown or JSON string """ client = BioThingsClient() try: gene_info = await client.get_gene_info(gene_id_or_symbol) if not gene_info: error_data = { "error": f"Gene '{gene_id_or_symbol}' not found", "suggestion": "Please check the gene symbol or ID", } return ( json.dumps(error_data, indent=2) if output_json else to_markdown([error_data]) ) # Convert to dict for rendering result = gene_info.model_dump(exclude_none=True) # Add helpful links if gene_info.entrezgene: result["_links"] = { "NCBI Gene": f"https://www.ncbi.nlm.nih.gov/gene/{gene_info.entrezgene}", "PubMed": f"https://pubmed.ncbi.nlm.nih.gov/?term={gene_info.symbol}", } # Format aliases nicely if gene_info.alias: result["alias"] = ", ".join( gene_info.alias[:10] ) # Limit to first 10 if len(gene_info.alias) > 10: result["alias"] += f" (and {len(gene_info.alias) - 10} more)" if output_json: return json.dumps(result, indent=2) else: return to_markdown([result]) except Exception as e: logger.error(f"Error fetching gene info for {gene_id_or_symbol}: {e}") error_data = { "error": "Failed to retrieve gene information", "details": str(e), } return ( json.dumps(error_data, indent=2) if output_json else to_markdown([error_data]) ) async def _gene_details( call_benefit: Annotated[ str, "Define and summarize why this function is being called and the intended benefit", ], gene_id_or_symbol: Annotated[ str, Field(description="Gene symbol (e.g., TP53, BRAF) or ID (e.g., 7157)"), ], ) -> str: """ Retrieves detailed information for a single gene from MyGene.info. This tool provides real-time gene annotations including: - Official gene name and symbol - Gene summary/description - Aliases and alternative names - Gene type (protein-coding, etc.) - Links to external databases Parameters: - call_benefit: Define why this function is being called - gene_id_or_symbol: Gene symbol (e.g., "TP53") or Entrez ID (e.g., "7157") Process: Queries MyGene.info API for up-to-date gene annotations Output: Markdown formatted gene information with description and metadata Note: For variant information, use variant_searcher. For articles about genes, use article_searcher. """ return await get_gene(gene_id_or_symbol, output_json=False) ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/drug_recalls_helpers.py: -------------------------------------------------------------------------------- ```python """ Helper functions for drug recall search to reduce complexity. """ def build_drug_search_query(drug: str) -> str: """Build search query for drug name.""" return ( f'(openfda.brand_name:"{drug}" OR ' f'openfda.generic_name:"{drug}" OR ' f'product_description:"{drug}")' ) def build_class_search_query(recall_class: str) -> str | None: """Build search query for recall classification.""" # Handle various input formats recall_class = recall_class.strip() # If already in "Class X" format, use it directly if recall_class.upper().startswith("CLASS "): return f'classification:"{recall_class.title()}"' # Map single digits/numerals to Class format class_map = { "1": "Class I", "I": "Class I", "2": "Class II", "II": "Class II", "3": "Class III", "III": "Class III", } if mapped_class := class_map.get(recall_class.upper()): return f'classification:"{mapped_class}"' return None def build_status_search_query(status: str) -> str | None: """Build search query for recall status.""" status_lower = status.lower() if status_lower in ["ongoing", "completed", "terminated"]: return f'status:"{status_lower.capitalize()}"' return None def build_date_search_query(since_date: str) -> str | None: """Build search query for date range.""" if len(since_date) == 8: formatted_date = f"{since_date[:4]}-{since_date[4:6]}-{since_date[6:]}" return f"recall_initiation_date:[{formatted_date} TO *]" return None def format_recall_search_header( drug: str | None, recall_class: str | None, status: str | None, since_date: str | None, total: int, ) -> list[str]: """Format header for recall search results.""" output = [] if drug: output.append(f"**Drug**: {drug}") if recall_class: output.append(f"**Classification**: Class {recall_class}") if status: output.append(f"**Status**: {status}") if since_date: output.append(f"**Since**: {since_date}") return output def build_recall_search_params( drug: str | None, recall_class: str | None, status: str | None, reason: str | None, since_date: str | None, limit: int, skip: int, ) -> dict: """Build search parameters for recall API.""" # Build search query search_parts = [] # Default to human drugs only (exclude veterinary) search_parts.append('product_type:"Human"') if drug: search_parts.append(build_drug_search_query(drug)) if recall_class and ( class_query := build_class_search_query(recall_class) ): search_parts.append(class_query) if status and (status_query := build_status_search_query(status)): search_parts.append(status_query) if reason: search_parts.append(f'reason_for_recall:"{reason}"') if since_date and (date_query := build_date_search_query(since_date)): search_parts.append(date_query) # Combine search parts search_params = {} if search_parts: search_params["search"] = " AND ".join(search_parts) # Add pagination search_params["limit"] = str(min(limit, 100)) search_params["skip"] = str(skip) # Sort by recall date (most recent first) search_params["sort"] = "recall_initiation_date:desc" return search_params ``` -------------------------------------------------------------------------------- /src/biomcp/shared_context.py: -------------------------------------------------------------------------------- ```python """Shared context for search operations to avoid redundant validations. This module provides a context manager that maintains validated entities (genes, diseases, chemicals) across multiple search operations to improve performance by eliminating redundant API calls. Example: ```python from biomcp.shared_context import SearchContextManager with SearchContextManager() as context: # First validation hits the API is_valid = await context.validate_gene("BRAF") # Subsequent validation uses cache is_valid_again = await context.validate_gene("BRAF") ``` """ from typing import Any class SearchContext: """Shared context to avoid redundant operations across searches. This class maintains a cache of validated entities to prevent redundant API calls during a search session. Attributes: validated_genes: Cache of gene validation results validated_cache: General validation cache for other entities """ def __init__(self): self.validated_genes: dict[str, bool] = {} self.gene_summaries: dict[str, Any] = {} self.cancer_types: dict[str, Any] | None = None self._validation_cache: dict[str, Any] = {} async def validate_gene(self, gene: str) -> bool: """Validate gene symbol with caching.""" if gene in self.validated_genes: return self.validated_genes[gene] # Import here to avoid circular imports from .utils.gene_validator import is_valid_gene_symbol is_valid = is_valid_gene_symbol(gene) self.validated_genes[gene] = is_valid return is_valid def get_gene_summary(self, gene: str) -> Any | None: """Get cached gene summary if available.""" return self.gene_summaries.get(gene) def set_gene_summary(self, gene: str, summary: Any): """Cache gene summary.""" self.gene_summaries[gene] = summary def cache_validation(self, key: str, value: Any): """Cache arbitrary validation results.""" self._validation_cache[key] = value def get_cached_validation(self, key: str) -> Any | None: """Get cached validation result.""" return self._validation_cache.get(key) # Thread-local context for current search operation _search_context: SearchContext | None = None def get_search_context() -> SearchContext | None: """Get the current search context.""" return _search_context def set_search_context(context: SearchContext | None): """Set the current search context.""" global _search_context _search_context = context class SearchContextManager: """Context manager for search operations.""" _instance = None def __init__(self): self.context = None self.previous_context = None def __enter__(self): # Use singleton pattern within context if SearchContextManager._instance is None: SearchContextManager._instance = SearchContext() self.context = SearchContextManager._instance self.previous_context = get_search_context() set_search_context(self.context) return self.context def __exit__(self, exc_type, exc_val, exc_tb): set_search_context(self.previous_context) # Clear singleton when last context exits if self.previous_context is None: SearchContextManager._instance = None return False ``` -------------------------------------------------------------------------------- /src/biomcp/utils/request_cache.py: -------------------------------------------------------------------------------- ```python """Simple request-level caching for API calls.""" import asyncio import time from collections import OrderedDict from collections.abc import Awaitable, Callable from functools import wraps from typing import Any, TypeVar # LRU cache with size limit class LRUCache: """Simple LRU cache with TTL support.""" def __init__(self, max_size: int = 1000): self.cache: OrderedDict[str, tuple[Any, float]] = OrderedDict() self.max_size = max_size self._lock = asyncio.Lock() async def get(self, key: str) -> Any | None: """Get item from cache if not expired.""" async with self._lock: if key not in self.cache: return None value, expiry = self.cache[key] if time.time() > expiry: del self.cache[key] return None # Move to end (most recently used) self.cache.move_to_end(key) return value async def set(self, key: str, value: Any, ttl: float): """Set item in cache with TTL.""" async with self._lock: # Remove oldest items if at capacity while len(self.cache) >= self.max_size: self.cache.popitem(last=False) expiry = time.time() + ttl self.cache[key] = (value, expiry) # Global LRU cache instance _cache = LRUCache(max_size=1000) # Default TTL in seconds (15 minutes) DEFAULT_TTL = 900 # Named caches for different purposes _named_caches: dict[str, LRUCache] = {} def get_cache( name: str, ttl_seconds: int = 300, max_size: int = 100 ) -> LRUCache: """Get or create a named cache.""" if name not in _named_caches: _named_caches[name] = LRUCache(max_size=max_size) return _named_caches[name] T = TypeVar("T") def cache_key(*args, **kwargs) -> str: """Generate a cache key from function arguments.""" key_parts = [str(arg) for arg in args] key_parts.extend(f"{k}={v}" for k, v in sorted(kwargs.items())) return ":".join(key_parts) async def get_cached(key: str) -> Any | None: """Get a value from cache if not expired.""" return await _cache.get(key) async def set_cached(key: str, value: Any, ttl: int = DEFAULT_TTL) -> None: """Set a value in cache with TTL.""" await _cache.set(key, value, ttl) def request_cache(ttl: int = DEFAULT_TTL) -> Callable: """Decorator for caching async function results. Args: ttl: Time to live in seconds Returns: Decorated function with caching """ def decorator( func: Callable[..., Awaitable[T]], ) -> Callable[..., Awaitable[T]]: @wraps(func) async def wrapper(*args, **kwargs) -> T: # Skip caching if explicitly disabled if kwargs.pop("skip_cache", False): return await func(*args, **kwargs) # Generate cache key key = f"{func.__module__}.{func.__name__}:{cache_key(*args, **kwargs)}" # Check cache cached_value = await get_cached(key) if cached_value is not None: return cached_value # Call function and cache result result = await func(*args, **kwargs) if result is not None: # Only cache non-None results await set_cached(key, result, ttl) return result return wrapper return decorator async def clear_cache() -> None: """Clear all cached entries.""" # Use the LRU cache's clear method _cache.cache.clear() ``` -------------------------------------------------------------------------------- /src/biomcp/utils/cbio_http_adapter.py: -------------------------------------------------------------------------------- ```python """Adapter for using centralized HTTP client with cBioPortal API. This module provides a thin wrapper around the centralized HTTP client specifically for cBioPortal API calls. It handles: - Authorization header injection for authenticated requests - Consistent error handling and response formatting - Endpoint-specific caching and rate limiting - Seamless migration from direct httpx usage Example: adapter = CBioHTTPAdapter() data, error = await adapter.get("/genes/BRAF") if error: print(f"Failed to fetch gene: {error}") else: print(f"Gene ID: {data.get('entrezGeneId')}") """ import json from typing import Any from ..http_client import RequestError, request_api from ..variants.constants import CBIO_BASE_URL, CBIO_TOKEN class CBioHTTPAdapter: """Adapter for cBioPortal API calls using centralized HTTP client.""" def __init__(self): self.base_url = CBIO_BASE_URL self.headers = self._build_headers() def _build_headers(self) -> dict[str, str]: """Build authorization headers if token is available.""" headers = {} if CBIO_TOKEN: if not CBIO_TOKEN.startswith("Bearer "): headers["Authorization"] = f"Bearer {CBIO_TOKEN}" else: headers["Authorization"] = CBIO_TOKEN return headers async def get( self, path: str, params: dict[str, Any] | None = None, endpoint_key: str = "cbioportal_api", cache_ttl: int = 900, # 15 minutes default ) -> tuple[dict[str, Any] | None, RequestError | None]: """Make a GET request to cBioPortal API. Args: path: API path (e.g., "/genes/BRAF") params: Query parameters endpoint_key: Registry key for endpoint tracking cache_ttl: Cache time-to-live in seconds Returns: Tuple of (response_data, error) """ url = f"{self.base_url}{path}" # Prepare request with headers request_params = params or {} if self.headers: # Need to pass headers through params for centralized client request_params["_headers"] = json.dumps(self.headers) result, error = await request_api( url=url, request=request_params, method="GET", domain="cbioportal", # For rate limiting endpoint_key=endpoint_key, cache_ttl=cache_ttl, enable_retry=True, ) return result, error async def post( self, path: str, data: dict[str, Any], endpoint_key: str = "cbioportal_api", cache_ttl: int = 0, # No caching for POST by default ) -> tuple[dict[str, Any] | None, RequestError | None]: """Make a POST request to cBioPortal API. Args: path: API path data: Request body data endpoint_key: Registry key for endpoint tracking cache_ttl: Cache time-to-live in seconds Returns: Tuple of (response_data, error) """ url = f"{self.base_url}{path}" # Add headers to request if self.headers: data["_headers"] = json.dumps(self.headers) result, error = await request_api( url=url, request=data, method="POST", domain="cbioportal", endpoint_key=endpoint_key, cache_ttl=cache_ttl, enable_retry=True, ) return result, error ``` -------------------------------------------------------------------------------- /tests/tdd/utils/test_gene_validator.py: -------------------------------------------------------------------------------- ```python """Tests for gene validation utilities.""" from biomcp.utils.gene_validator import ( is_valid_gene_symbol, sanitize_gene_symbol, ) class TestGeneValidator: """Test gene symbol validation.""" def test_valid_gene_symbols(self): """Test that valid gene symbols are accepted.""" valid_genes = [ "BRAF", "TP53", "KRAS", "EGFR", "PIK3CA", "BRCA1", "BRCA2", "MYC", "ERBB2", "CDKN2A", "VHL", "RB1", "PTEN", "APC", "MLH1", "MSH2", "MSH6", "PMS2", "ATM", "CHEK2", "PALB2", "RAD51C", "RAD51D", "BRIP1", "CDH1", "STK11", "MUTYH", "BMPR1A", "SMAD4", "ALK", "ROS1", "RET", "MET", "HER2", "FGFR1", "FGFR2", "FGFR3", "FGFR4", "IDH1", "IDH2", "TERT", "ATRX", "H3F3A", "HIST1H3B", "BRAFV600E", # With mutation "KRASG12D", # With mutation "EGFRL858R", # With mutation ] for gene in valid_genes: assert is_valid_gene_symbol( gene ), f"Should accept valid gene: {gene}" def test_invalid_gene_symbols(self): """Test that invalid gene symbols are rejected.""" invalid_genes = [ None, "", " ", " ", "123", # Starts with number "A", # Too short "INVALID_GENE_XYZ", # Known invalid "TEST", "NULL", "NONE", "UNKNOWN", "gene", # Lowercase "Braf", # Mixed case "GENE-WITH-SPECIAL-CHARS!", "GENE WITH SPACES", "GENE/WITH/SLASHES", "GENE.WITH.DOTS", "VERYLONGGENENAMETHATEXCEEDSLIMIT", # Too long "_GENE", # Starts with underscore "-GENE", # Starts with hyphen ] for gene in invalid_genes: assert not is_valid_gene_symbol( gene ), f"Should reject invalid gene: {gene}" def test_gene_symbols_with_version(self): """Test gene symbols with version suffixes.""" versioned_genes = [ "MT-CO1", "MT-CO2", "MT-CO3", "HLA-A", "HLA-B", "HLA-C", "HLA-DRB1", "HLA-DQB1", "HLA-DPB1", ] for gene in versioned_genes: assert is_valid_gene_symbol( gene ), f"Should accept versioned gene: {gene}" def test_sanitize_gene_symbol(self): """Test gene symbol sanitization.""" # Test uppercase conversion assert sanitize_gene_symbol("braf") == "BRAF" assert sanitize_gene_symbol("Tp53") == "TP53" assert sanitize_gene_symbol("kRaS") == "KRAS" # Test whitespace stripping assert sanitize_gene_symbol(" BRAF ") == "BRAF" assert sanitize_gene_symbol("\tTP53\n") == "TP53" assert sanitize_gene_symbol(" KRAS ") == "KRAS" # Test combination assert sanitize_gene_symbol(" braf ") == "BRAF" assert sanitize_gene_symbol("\ttp53\n") == "TP53" ``` -------------------------------------------------------------------------------- /src/biomcp/cli/server.py: -------------------------------------------------------------------------------- ```python from enum import Enum from typing import Annotated import typer from dotenv import load_dotenv from .. import logger, mcp_app # mcp_app is already instantiated in core.py # Load environment variables from .env file load_dotenv() server_app = typer.Typer(help="Server operations") class ServerMode(str, Enum): STDIO = "stdio" WORKER = "worker" STREAMABLE_HTTP = "streamable_http" def run_stdio_server(): """Run server in STDIO mode.""" logger.info("Starting MCP server with STDIO transport:") mcp_app.run(transport="stdio") def run_http_server(host: str, port: int, mode: ServerMode): """Run server in HTTP-based mode (worker or streamable_http).""" try: from typing import Any import uvicorn app: Any # Type will be either FastAPI or Starlette if mode == ServerMode.WORKER: logger.info("Starting MCP server with Worker/SSE transport") try: from ..workers.worker import app except ImportError as e: logger.error( f"Failed to import worker mode dependencies: {e}\n" "Please install with: pip install biomcp-python[worker]" ) raise typer.Exit(1) from e else: # STREAMABLE_HTTP logger.info( f"Starting MCP server with Streamable HTTP transport on {host}:{port}" ) logger.info(f"Endpoint: http://{host}:{port}/mcp") logger.info("Using FastMCP's native Streamable HTTP support") try: from starlette.responses import JSONResponse from starlette.routing import Route except ImportError as e: logger.error( f"Failed to import Starlette dependencies: {e}\n" "Please install with: pip install biomcp-python[worker]" ) raise typer.Exit(1) from e from .. import mcp_app # Get FastMCP's streamable_http_app app = mcp_app.streamable_http_app() # Add health endpoint to the Starlette app async def health_check(request): return JSONResponse({"status": "healthy"}) health_route = Route("/health", health_check, methods=["GET"]) app.routes.append(health_route) uvicorn.run( app, host=host, port=port, log_level="info", ) except ImportError as e: logger.error(f"Failed to start {mode.value} mode: {e}") raise typer.Exit(1) from e except Exception as e: logger.error(f"An unexpected error occurred: {e}", exc_info=True) raise typer.Exit(1) from e @server_app.command("run") def run_server( mode: Annotated[ ServerMode, typer.Option( help="Server mode: stdio (local), worker (legacy SSE), or streamable_http (MCP spec compliant)", case_sensitive=False, ), ] = ServerMode.STDIO, host: Annotated[ str, typer.Option( help="Host to bind to (for HTTP modes)", ), ] = "0.0.0.0", # noqa: S104 - Required for Docker container networking port: Annotated[ int, typer.Option( help="Port to bind to (for HTTP modes)", ), ] = 8000, ): """Run the BioMCP server with selected transport mode.""" if mode == ServerMode.STDIO: run_stdio_server() else: run_http_server(host, port, mode) ``` -------------------------------------------------------------------------------- /src/biomcp/thinking/sequential.py: -------------------------------------------------------------------------------- ```python """Sequential thinking module for BioMCP.""" from typing import Annotated from .session import ThoughtEntry, _session_manager def get_current_timestamp() -> str: """Get current timestamp in ISO format.""" from datetime import datetime return datetime.now().isoformat() async def _sequential_thinking( thought: Annotated[ str, "Current thinking step - be detailed and thorough" ], nextThoughtNeeded: Annotated[ bool, "True if more thinking needed, False only when completely done" ], thoughtNumber: Annotated[int, "Current thought number (start at 1)"], totalThoughts: Annotated[ int, "Best estimate of total thoughts (adjust as needed)" ], isRevision: Annotated[ bool, "True when correcting/improving a previous thought" ] = False, revisesThought: Annotated[ int | None, "The thought number being revised" ] = None, branchFromThought: Annotated[ int | None, "Create alternative path from this thought number" ] = None, needsMoreThoughts: Annotated[ bool | None, "True when problem is significantly larger than initially estimated", ] = None, ) -> str: """ ALWAYS use this tool for complex reasoning, analysis, or problem-solving. This facilitates a detailed, step-by-step thinking process that helps break down problems systematically. Use this tool when: - Analyzing complex problems or questions - Planning multi-step solutions - Breaking down tasks into components - Reasoning through uncertainties - Exploring alternative approaches Start with thoughtNumber=1 and totalThoughts as your best estimate. Set nextThoughtNeeded=true to continue thinking, or false when done. You can revise earlier thoughts or branch into alternative paths as needed. This is your primary reasoning tool - USE IT LIBERALLY for any non-trivial thinking task. """ # Validate inputs if thoughtNumber < 1: return "Error: thoughtNumber must be >= 1" if totalThoughts < 1: return "Error: totalThoughts must be >= 1" if isRevision and not revisesThought: return "Error: revisesThought must be specified when isRevision=True" # Get or create session session = _session_manager.get_or_create_session() # Create thought entry branch_id = f"branch_{branchFromThought}" if branchFromThought else None entry = ThoughtEntry( thought=thought, thought_number=thoughtNumber, total_thoughts=totalThoughts, next_thought_needed=nextThoughtNeeded, is_revision=isRevision, revises_thought=revisesThought, branch_from_thought=branchFromThought, branch_id=branch_id, metadata={"needsMoreThoughts": needsMoreThoughts} if needsMoreThoughts else {}, ) # Add thought to session session.add_thought(entry) # Generate status message if branchFromThought: status_msg = f"Added thought {thoughtNumber} to branch '{branch_id}'" elif isRevision and revisesThought: status_msg = ( f"Revised thought {revisesThought} (now thought {thoughtNumber})" ) else: status_msg = f"Added thought {thoughtNumber} to main sequence" # Generate progress information progress_msg = f"Progress: {thoughtNumber}/{totalThoughts} thoughts" next_msg = ( "Next thought needed" if nextThoughtNeeded else "Thinking sequence complete" ) return f"{status_msg}. {progress_msg}. {next_msg}." ``` -------------------------------------------------------------------------------- /docs/stylesheets/extra.css: -------------------------------------------------------------------------------- ```css /* Custom styles for BioMCP documentation */ /* Style for main navigation tabs */ .md-tabs__link { font-weight: 600; text-transform: uppercase; letter-spacing: 0.03em; } /* Bold section headers in sidebar */ .md-nav__item--section > .md-nav__link { font-weight: 700 !important; font-size: 0.9rem !important; margin-top: 0.8rem; margin-bottom: 0.4rem; padding-bottom: 0.4rem; border-bottom: 1px solid var(--md-default-fg-color--lightest); display: block; } /* Nested section headers - slightly smaller */ .md-nav__item--section .md-nav__item--section > .md-nav__link { font-weight: 600 !important; font-size: 0.85rem !important; margin-top: 0.4rem; margin-bottom: 0.2rem; } /* Regular navigation links */ .md-nav__link { font-weight: 400; } /* Active/current page link */ .md-nav__link--active { font-weight: 600 !important; color: var(--md-accent-fg-color) !important; } /* Table of contents header - make it lighter */ .md-nav--secondary > .md-nav__title { font-weight: 600 !important; font-size: 0.8rem !important; text-transform: none !important; letter-spacing: normal !important; color: var(--md-default-fg-color--light) !important; background-color: transparent !important; box-shadow: none !important; border-bottom: 1px solid var(--md-default-fg-color--lightest); padding-bottom: 0.4rem; } /* Add visual separation between major sections */ .md-nav--primary > .md-nav__list > .md-nav__item { margin-bottom: 0.5rem; } /* Improve readability of code blocks */ .highlight pre { line-height: 1.5; overflow-x: auto; white-space: pre; } /* Fix code blocks in grid cards */ .md-typeset .grid.cards code, .md-typeset .grid.cards pre { word-break: break-word; white-space: pre-wrap; overflow-wrap: break-word; } /* Specific fix for grid card code blocks */ .md-typeset .grid.cards .highlight { margin: 0.5em 0; } .md-typeset .grid.cards .highlight pre { padding: 0.5em; font-size: 0.8em; } /* Prevent horizontal scroll for inline code */ .md-typeset code { word-break: break-word; } /* Better spacing for admonitions */ .admonition { margin: 1.5rem 0; } /* Improve table readability */ .md-typeset table { font-size: 0.85rem; } /* Make external links more visible */ .md-content a[href^="http"]:not(.md-button)::after { content: " ↗"; font-size: 0.75em; vertical-align: super; opacity: 0.7; } /* Better spacing for navigation expansion arrows */ .md-nav__icon { margin-left: 0.2rem; } /* Accessibility improvements */ /* Ensure focus indicators are visible */ a:focus, button:focus, input:focus, select:focus, textarea:focus { outline: 2px solid var(--md-accent-fg-color); outline-offset: 2px; } /* Skip to main content link */ .md-skip { position: fixed; top: -40px; left: 0; background: var(--md-primary-fg-color); color: var(--md-primary-bg-color); padding: 8px; z-index: 100; text-decoration: none; } .md-skip:focus { top: 0; } /* Improve readability with better line height */ .md-typeset { line-height: 1.6; } /* Ensure code blocks have sufficient contrast */ .highlight pre code { font-size: 0.85rem; line-height: 1.5; } /* Make interactive elements more obvious */ .md-typeset .tabbed-set > input:checked + label { border-bottom: 2px solid var(--md-accent-fg-color); } /* Improve form accessibility */ .md-search__input { font-size: 1rem; } /* Screen reader only text utility */ .sr-only { position: absolute; width: 1px; height: 1px; padding: 0; margin: -1px; overflow: hidden; clip: rect(0, 0, 0, 0); white-space: nowrap; border: 0; } ``` -------------------------------------------------------------------------------- /tests/tdd/test_error_scenarios.py: -------------------------------------------------------------------------------- ```python """Tests for error scenarios and edge cases - fixed version.""" import asyncio from unittest.mock import MagicMock, patch import pytest from biomcp.exceptions import ( InvalidDomainError, ) from biomcp.rate_limiter import RateLimiter from biomcp.router import format_results @pytest.fixture(autouse=True) def enable_metrics_for_concurrent_test(monkeypatch): """Enable metrics for concurrent test.""" monkeypatch.setenv("BIOMCP_METRICS_ENABLED", "true") # Force reload of the module to pick up the new env var import importlib import biomcp.metrics importlib.reload(biomcp.metrics) def test_format_results_invalid_domain(): """Test format_results with invalid domain.""" with pytest.raises(InvalidDomainError) as exc_info: format_results([], "invalid_domain", 1, 10, 100) assert "invalid_domain" in str(exc_info.value) assert "Valid domains are:" in str(exc_info.value) def test_format_results_handler_exception(): """Test format_results when handler raises exception.""" # Create a result that will cause formatting to fail bad_result = {"missing": "required_fields"} with patch( "biomcp.domain_handlers.ArticleHandler.format_result" ) as mock_format: mock_format.side_effect = KeyError("id") # Should handle the error gracefully result = format_results([bad_result], "article", 1, 10, 100) assert result["results"] == [] # Bad result is skipped @pytest.mark.asyncio async def test_rate_limiter_basic(): """Test basic rate limiter functionality.""" # Test normal operation limiter = RateLimiter(requests_per_second=10, burst_size=5) # Should allow burst through context manager for _ in range(5): async with limiter.limit(): pass # Should not raise @pytest.mark.asyncio async def test_concurrent_operations(): """Test system behavior under concurrent load.""" # Clear metrics from biomcp.metrics import ( _metrics_collector, get_metric_summary, record_metric, ) await _metrics_collector.clear() # Simulate concurrent metric recording async def record_operation(i): await record_metric( "concurrent_test", duration=0.1 * (i % 5), success=i % 10 != 0, # 10% failure rate ) # Run 100 concurrent operations tasks = [record_operation(i) for i in range(100)] await asyncio.gather(*tasks) # Check metrics summary = await get_metric_summary("concurrent_test") assert summary is not None assert summary.count == 100 assert summary.error_rate == 0.1 # 10% errors assert ( 0.18 <= summary.avg_duration <= 0.22 ) # Average of 0.1, 0.2, 0.3, 0.4 def test_cache_corruption_handling(): """Test handling of corrupted cache data.""" from biomcp.http_client import get_cached_response # Simulate corrupted cache entry with patch("biomcp.http_client.get_cache") as mock_get_cache: mock_cache = MagicMock() mock_cache.get.return_value = "corrupted\x00data" # Invalid data mock_get_cache.return_value = mock_cache # Should handle corrupted data gracefully result = get_cached_response("test_key") assert ( result == "corrupted\x00data" ) # Returns as-is, parsing handles it def test_exception_hierarchy(): """Test custom exception hierarchy and messages.""" # Test InvalidDomainError exc = InvalidDomainError("bad_domain", ["article", "trial"]) assert "bad_domain" in str(exc) assert "article" in str(exc) ```