This is page 4 of 15. Use http://codebase.md/genomoncology/biomcp?lines=false&page={x} to view the full context. # Directory Structure ``` ├── .github │ ├── actions │ │ └── setup-python-env │ │ └── action.yml │ ├── dependabot.yml │ └── workflows │ ├── ci.yml │ ├── deploy-docs.yml │ ├── main.yml.disabled │ ├── on-release-main.yml │ └── validate-codecov-config.yml ├── .gitignore ├── .pre-commit-config.yaml ├── BIOMCP_DATA_FLOW.md ├── CHANGELOG.md ├── CNAME ├── codecov.yaml ├── docker-compose.yml ├── Dockerfile ├── docs │ ├── apis │ │ ├── error-codes.md │ │ ├── overview.md │ │ └── python-sdk.md │ ├── assets │ │ ├── biomcp-cursor-locations.png │ │ ├── favicon.ico │ │ ├── icon.png │ │ ├── logo.png │ │ ├── mcp_architecture.txt │ │ └── remote-connection │ │ ├── 00_connectors.png │ │ ├── 01_add_custom_connector.png │ │ ├── 02_connector_enabled.png │ │ ├── 03_connect_to_biomcp.png │ │ ├── 04_select_google_oauth.png │ │ └── 05_success_connect.png │ ├── backend-services-reference │ │ ├── 01-overview.md │ │ ├── 02-biothings-suite.md │ │ ├── 03-cbioportal.md │ │ ├── 04-clinicaltrials-gov.md │ │ ├── 05-nci-cts-api.md │ │ ├── 06-pubtator3.md │ │ └── 07-alphagenome.md │ ├── blog │ │ ├── ai-assisted-clinical-trial-search-analysis.md │ │ ├── images │ │ │ ├── deep-researcher-video.png │ │ │ ├── researcher-announce.png │ │ │ ├── researcher-drop-down.png │ │ │ ├── researcher-prompt.png │ │ │ ├── trial-search-assistant.png │ │ │ └── what_is_biomcp_thumbnail.png │ │ └── researcher-persona-resource.md │ ├── changelog.md │ ├── CNAME │ ├── concepts │ │ ├── 01-what-is-biomcp.md │ │ ├── 02-the-deep-researcher-persona.md │ │ └── 03-sequential-thinking-with-the-think-tool.md │ ├── developer-guides │ │ ├── 01-server-deployment.md │ │ ├── 02-contributing-and-testing.md │ │ ├── 03-third-party-endpoints.md │ │ ├── 04-transport-protocol.md │ │ ├── 05-error-handling.md │ │ ├── 06-http-client-and-caching.md │ │ ├── 07-performance-optimizations.md │ │ └── generate_endpoints.py │ ├── faq-condensed.md │ ├── FDA_SECURITY.md │ ├── genomoncology.md │ ├── getting-started │ │ ├── 01-quickstart-cli.md │ │ ├── 02-claude-desktop-integration.md │ │ └── 03-authentication-and-api-keys.md │ ├── how-to-guides │ │ ├── 01-find-articles-and-cbioportal-data.md │ │ ├── 02-find-trials-with-nci-and-biothings.md │ │ ├── 03-get-comprehensive-variant-annotations.md │ │ ├── 04-predict-variant-effects-with-alphagenome.md │ │ ├── 05-logging-and-monitoring-with-bigquery.md │ │ └── 06-search-nci-organizations-and-interventions.md │ ├── index.md │ ├── policies.md │ ├── reference │ │ ├── architecture-diagrams.md │ │ ├── quick-architecture.md │ │ ├── quick-reference.md │ │ └── visual-architecture.md │ ├── robots.txt │ ├── stylesheets │ │ ├── announcement.css │ │ └── extra.css │ ├── troubleshooting.md │ ├── tutorials │ │ ├── biothings-prompts.md │ │ ├── claude-code-biomcp-alphagenome.md │ │ ├── nci-prompts.md │ │ ├── openfda-integration.md │ │ ├── openfda-prompts.md │ │ ├── pydantic-ai-integration.md │ │ └── remote-connection.md │ ├── user-guides │ │ ├── 01-command-line-interface.md │ │ ├── 02-mcp-tools-reference.md │ │ └── 03-integrating-with-ides-and-clients.md │ └── workflows │ └── all-workflows.md ├── example_scripts │ ├── mcp_integration.py │ └── python_sdk.py ├── glama.json ├── LICENSE ├── lzyank.toml ├── Makefile ├── mkdocs.yml ├── package-lock.json ├── package.json ├── pyproject.toml ├── README.md ├── scripts │ ├── check_docs_in_mkdocs.py │ ├── check_http_imports.py │ └── generate_endpoints_doc.py ├── smithery.yaml ├── src │ └── biomcp │ ├── __init__.py │ ├── __main__.py │ ├── articles │ │ ├── __init__.py │ │ ├── autocomplete.py │ │ ├── fetch.py │ │ ├── preprints.py │ │ ├── search_optimized.py │ │ ├── search.py │ │ └── unified.py │ ├── biomarkers │ │ ├── __init__.py │ │ └── search.py │ ├── cbioportal_helper.py │ ├── circuit_breaker.py │ ├── cli │ │ ├── __init__.py │ │ ├── articles.py │ │ ├── biomarkers.py │ │ ├── diseases.py │ │ ├── health.py │ │ ├── interventions.py │ │ ├── main.py │ │ ├── openfda.py │ │ ├── organizations.py │ │ ├── server.py │ │ ├── trials.py │ │ └── variants.py │ ├── connection_pool.py │ ├── constants.py │ ├── core.py │ ├── diseases │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── domain_handlers.py │ ├── drugs │ │ ├── __init__.py │ │ └── getter.py │ ├── exceptions.py │ ├── genes │ │ ├── __init__.py │ │ └── getter.py │ ├── http_client_simple.py │ ├── http_client.py │ ├── individual_tools.py │ ├── integrations │ │ ├── __init__.py │ │ ├── biothings_client.py │ │ └── cts_api.py │ ├── interventions │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── logging_filter.py │ ├── metrics_handler.py │ ├── metrics.py │ ├── openfda │ │ ├── __init__.py │ │ ├── adverse_events_helpers.py │ │ ├── adverse_events.py │ │ ├── cache.py │ │ ├── constants.py │ │ ├── device_events_helpers.py │ │ ├── device_events.py │ │ ├── drug_approvals.py │ │ ├── drug_labels_helpers.py │ │ ├── drug_labels.py │ │ ├── drug_recalls_helpers.py │ │ ├── drug_recalls.py │ │ ├── drug_shortages_detail_helpers.py │ │ ├── drug_shortages_helpers.py │ │ ├── drug_shortages.py │ │ ├── exceptions.py │ │ ├── input_validation.py │ │ ├── rate_limiter.py │ │ ├── utils.py │ │ └── validation.py │ ├── organizations │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── parameter_parser.py │ ├── prefetch.py │ ├── query_parser.py │ ├── query_router.py │ ├── rate_limiter.py │ ├── render.py │ ├── request_batcher.py │ ├── resources │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── instructions.md │ │ └── researcher.md │ ├── retry.py │ ├── router_handlers.py │ ├── router.py │ ├── shared_context.py │ ├── thinking │ │ ├── __init__.py │ │ ├── sequential.py │ │ └── session.py │ ├── thinking_tool.py │ ├── thinking_tracker.py │ ├── trials │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── nci_getter.py │ │ ├── nci_search.py │ │ └── search.py │ ├── utils │ │ ├── __init__.py │ │ ├── cancer_types_api.py │ │ ├── cbio_http_adapter.py │ │ ├── endpoint_registry.py │ │ ├── gene_validator.py │ │ ├── metrics.py │ │ ├── mutation_filter.py │ │ ├── query_utils.py │ │ ├── rate_limiter.py │ │ └── request_cache.py │ ├── variants │ │ ├── __init__.py │ │ ├── alphagenome.py │ │ ├── cancer_types.py │ │ ├── cbio_external_client.py │ │ ├── cbioportal_mutations.py │ │ ├── cbioportal_search_helpers.py │ │ ├── cbioportal_search.py │ │ ├── constants.py │ │ ├── external.py │ │ ├── filters.py │ │ ├── getter.py │ │ ├── links.py │ │ └── search.py │ └── workers │ ├── __init__.py │ ├── worker_entry_stytch.js │ ├── worker_entry.js │ └── worker.py ├── tests │ ├── bdd │ │ ├── cli_help │ │ │ ├── help.feature │ │ │ └── test_help.py │ │ ├── conftest.py │ │ ├── features │ │ │ └── alphagenome_integration.feature │ │ ├── fetch_articles │ │ │ ├── fetch.feature │ │ │ └── test_fetch.py │ │ ├── get_trials │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── get_variants │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── search_articles │ │ │ ├── autocomplete.feature │ │ │ ├── search.feature │ │ │ ├── test_autocomplete.py │ │ │ └── test_search.py │ │ ├── search_trials │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ ├── search_variants │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ └── steps │ │ └── test_alphagenome_steps.py │ ├── config │ │ └── test_smithery_config.py │ ├── conftest.py │ ├── data │ │ ├── ct_gov │ │ │ ├── clinical_trials_api_v2.yaml │ │ │ ├── trials_NCT04280705.json │ │ │ └── trials_NCT04280705.txt │ │ ├── myvariant │ │ │ ├── myvariant_api.yaml │ │ │ ├── myvariant_field_descriptions.csv │ │ │ ├── variants_full_braf_v600e.json │ │ │ ├── variants_full_braf_v600e.txt │ │ │ └── variants_part_braf_v600_multiple.json │ │ ├── openfda │ │ │ ├── drugsfda_detail.json │ │ │ ├── drugsfda_search.json │ │ │ ├── enforcement_detail.json │ │ │ └── enforcement_search.json │ │ └── pubtator │ │ ├── pubtator_autocomplete.json │ │ └── pubtator3_paper.txt │ ├── integration │ │ ├── test_openfda_integration.py │ │ ├── test_preprints_integration.py │ │ ├── test_simple.py │ │ └── test_variants_integration.py │ ├── tdd │ │ ├── articles │ │ │ ├── test_autocomplete.py │ │ │ ├── test_cbioportal_integration.py │ │ │ ├── test_fetch.py │ │ │ ├── test_preprints.py │ │ │ ├── test_search.py │ │ │ └── test_unified.py │ │ ├── conftest.py │ │ ├── drugs │ │ │ ├── __init__.py │ │ │ └── test_drug_getter.py │ │ ├── openfda │ │ │ ├── __init__.py │ │ │ ├── test_adverse_events.py │ │ │ ├── test_device_events.py │ │ │ ├── test_drug_approvals.py │ │ │ ├── test_drug_labels.py │ │ │ ├── test_drug_recalls.py │ │ │ ├── test_drug_shortages.py │ │ │ └── test_security.py │ │ ├── test_biothings_integration_real.py │ │ ├── test_biothings_integration.py │ │ ├── test_circuit_breaker.py │ │ ├── test_concurrent_requests.py │ │ ├── test_connection_pool.py │ │ ├── test_domain_handlers.py │ │ ├── test_drug_approvals.py │ │ ├── test_drug_recalls.py │ │ ├── test_drug_shortages.py │ │ ├── test_endpoint_documentation.py │ │ ├── test_error_scenarios.py │ │ ├── test_europe_pmc_fetch.py │ │ ├── test_mcp_integration.py │ │ ├── test_mcp_tools.py │ │ ├── test_metrics.py │ │ ├── test_nci_integration.py │ │ ├── test_nci_mcp_tools.py │ │ ├── test_network_policies.py │ │ ├── test_offline_mode.py │ │ ├── test_openfda_unified.py │ │ ├── test_pten_r173_search.py │ │ ├── test_render.py │ │ ├── test_request_batcher.py.disabled │ │ ├── test_retry.py │ │ ├── test_router.py │ │ ├── test_shared_context.py.disabled │ │ ├── test_unified_biothings.py │ │ ├── thinking │ │ │ ├── __init__.py │ │ │ └── test_sequential.py │ │ ├── trials │ │ │ ├── test_backward_compatibility.py │ │ │ ├── test_getter.py │ │ │ └── test_search.py │ │ ├── utils │ │ │ ├── test_gene_validator.py │ │ │ ├── test_mutation_filter.py │ │ │ ├── test_rate_limiter.py │ │ │ └── test_request_cache.py │ │ ├── variants │ │ │ ├── constants.py │ │ │ ├── test_alphagenome_api_key.py │ │ │ ├── test_alphagenome_comprehensive.py │ │ │ ├── test_alphagenome.py │ │ │ ├── test_cbioportal_mutations.py │ │ │ ├── test_cbioportal_search.py │ │ │ ├── test_external_integration.py │ │ │ ├── test_external.py │ │ │ ├── test_extract_gene_aa_change.py │ │ │ ├── test_filters.py │ │ │ ├── test_getter.py │ │ │ ├── test_links.py │ │ │ └── test_search.py │ │ └── workers │ │ └── test_worker_sanitization.js │ └── test_pydantic_ai_integration.py ├── THIRD_PARTY_ENDPOINTS.md ├── tox.ini ├── uv.lock └── wrangler.toml ``` # Files -------------------------------------------------------------------------------- /docs/getting-started/02-claude-desktop-integration.md: -------------------------------------------------------------------------------- ```markdown # Claude Desktop Integration This guide covers how to integrate BioMCP with Claude Desktop, enabling AI-powered biomedical research directly in your Claude conversations. ## Prerequisites - [Claude Desktop](https://claude.ai/download) application - One of the following: - **Option A**: Python 3.10+ and [uv](https://docs.astral.sh/uv/) (recommended) - **Option B**: [Docker](https://www.docker.com/products/docker-desktop/) ## Installation Methods ### Option A: Using uv (Recommended) This method is fastest and easiest for most users. #### 1. Install uv ```bash # macOS/Linux curl -LsSf https://astral.sh/uv/install.sh | sh # Windows powershell -c "irm https://astral.sh/uv/install.ps1 | iex" ``` #### 2. Configure Claude Desktop Add BioMCP to your Claude Desktop configuration file: **macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json` **Windows**: `%APPDATA%\Claude\claude_desktop_config.json` ```json { "mcpServers": { "biomcp": { "command": "uv", "args": ["run", "--with", "biomcp-python", "biomcp", "run"], "env": { "NCI_API_KEY": "your-nci-api-key-here", "ALPHAGENOME_API_KEY": "your-alphagenome-key-here", "CBIO_TOKEN": "your-cbioportal-token-here" } } } } ``` ### Option B: Using Docker This method provides better isolation and consistency across systems. #### 1. Create a Dockerfile Create a file named `Dockerfile`: ```dockerfile FROM python:3.11-slim # Install BioMCP RUN pip install biomcp-python # Set the entrypoint ENTRYPOINT ["biomcp", "run"] ``` #### 2. Build the Docker Image ```bash docker build -t biomcp:latest . ``` #### 3. Configure Claude Desktop Add BioMCP to your configuration file: ```json { "mcpServers": { "biomcp": { "command": "docker", "args": ["run", "-i", "--rm", "biomcp:latest"], "env": { "NCI_API_KEY": "your-nci-api-key-here", "ALPHAGENOME_API_KEY": "your-alphagenome-key-here", "CBIO_TOKEN": "your-cbioportal-token-here" } } } } ``` ## Verification 1. Restart Claude Desktop after updating the configuration 2. Start a new conversation 3. Look for the 🔌 icon indicating MCP is connected 4. Test with: "Can you search for articles about BRAF mutations in melanoma?" ## Setting Up API Keys While BioMCP works without API keys, some features require them for full functionality: ### NCI API Key (Optional) Enables access to NCI's clinical trials database with advanced filters: - Get your key from [NCI API Portal](https://api.cancer.gov) - Add to configuration as `NCI_API_KEY` ### AlphaGenome API Key (Optional) Enables variant effect predictions using Google DeepMind's AlphaGenome: - Register at [AlphaGenome Portal](https://alphagenome.google.com) - Add to configuration as `ALPHAGENOME_API_KEY` ### cBioPortal Token (Optional) Enables enhanced cancer genomics queries: - Get token from [cBioPortal](https://www.cbioportal.org/webAPI) - Add to configuration as `CBIO_TOKEN` ## Usage Examples Once configured, you can ask Claude to perform various biomedical research tasks: ### Literature Search ``` "Find recent articles about CAR-T therapy for B-cell lymphomas" ``` ### Clinical Trials ``` "Search for actively recruiting trials for EGFR-mutant lung cancer" ``` ### Variant Analysis ``` "What is known about the pathogenicity of BRCA1 c.5266dupC?" ``` ### Drug Information ``` "Tell me about the mechanism of action and indications for pembrolizumab" ``` ### Complex Research ``` "I need a comprehensive analysis of treatment options for a patient with BRAF V600E melanoma who has progressed on dabrafenib/trametinib" ``` ## The Deep Researcher Persona BioMCP includes a specialized "Deep Researcher" persona that enhances Claude's biomedical research capabilities: - **Sequential Thinking**: Automatically uses the `think` tool for systematic analysis - **Comprehensive Coverage**: Searches multiple databases and synthesizes findings - **Evidence-Based**: Provides citations and links to primary sources - **Clinical Focus**: Understands medical context and terminology To activate, simply ask biomedical questions naturally. The persona automatically engages for research tasks. ## Troubleshooting ### "MCP Connection Failed" 1. Verify the configuration file path is correct 2. Check JSON syntax (no trailing commas) 3. Ensure Claude Desktop has been restarted 4. Check that uv or Docker is properly installed ### "Command Not Found" **For uv**: ```bash # Verify uv installation uv --version # Ensure PATH includes uv echo $PATH | grep -q "\.local/bin" || echo "PATH needs updating" ``` **For Docker**: ```bash # Verify Docker is running docker ps # Test BioMCP container docker run -it --rm biomcp:latest --help ``` ### "No Results Found" - Check your internet connection - Verify API keys are correctly set (if using optional features) - Try simpler queries first - Use official gene symbols (e.g., "TP53" not "p53") ### Performance Issues **For uv**: - First run may be slow due to package downloads - Subsequent runs use cached environments **For Docker**: - Ensure Docker has sufficient memory allocated - Consider building with `--platform` flag for Apple Silicon ## Advanced Configuration ### Custom Environment Variables Add any additional environment variables your research requires: ```json { "mcpServers": { "biomcp": { "command": "uv", "args": ["run", "--with", "biomcp-python", "biomcp", "run"], "env": { "BIOMCP_LOG_LEVEL": "DEBUG", "BIOMCP_CACHE_DIR": "/path/to/cache", "HTTP_PROXY": "http://your-proxy:8080" } } } } ``` ### Multiple Configurations You can run multiple BioMCP instances with different settings: ```json { "mcpServers": { "biomcp-prod": { "command": "uv", "args": ["run", "--with", "biomcp-python", "biomcp", "run"], "env": { "BIOMCP_ENV": "production" } }, "biomcp-dev": { "command": "uv", "args": ["run", "--with", "biomcp-python@latest", "biomcp", "run"], "env": { "BIOMCP_ENV": "development", "BIOMCP_LOG_LEVEL": "DEBUG" } } } } ``` ## Best Practices 1. **Start Simple**: Test with basic queries before complex research tasks 2. **Be Specific**: Use official gene symbols and disease names 3. **Iterate**: Refine queries based on initial results 4. **Verify Sources**: Always check the provided citations 5. **Save Important Findings**: Export conversation or copy key results ## Getting Help - **Documentation**: [BioMCP Docs](https://github.com/genomoncology/biomcp) - **Issues**: [GitHub Issues](https://github.com/genomoncology/biomcp/issues) - **Community**: [Discussions](https://github.com/genomoncology/biomcp/discussions) ## Next Steps Now that BioMCP is integrated with Claude Desktop: 1. Try the [example queries](#usage-examples) above 2. Explore [How-to Guides](../how-to-guides/01-find-articles-and-cbioportal-data.md) for specific research workflows 3. Learn about [Sequential Thinking](../concepts/03-sequential-thinking-with-the-think-tool.md) for complex analyses 4. Set up [additional API keys](03-authentication-and-api-keys.md) for enhanced features ``` -------------------------------------------------------------------------------- /src/biomcp/articles/unified.py: -------------------------------------------------------------------------------- ```python """Unified article search combining PubMed and preprint sources.""" import asyncio import json import logging from collections.abc import Coroutine from typing import Any from .. import render from .preprints import search_preprints from .search import PubmedRequest, search_articles logger = logging.getLogger(__name__) def _deduplicate_articles(articles: list[dict]) -> list[dict]: """Remove duplicate articles based on DOI.""" seen_dois = set() unique_articles = [] for article in articles: doi = article.get("doi") if doi and doi in seen_dois: continue if doi: seen_dois.add(doi) unique_articles.append(article) return unique_articles def _parse_search_results(results: list) -> list[dict]: """Parse search results from JSON strings.""" all_articles = [] for result in results: if isinstance(result, str): try: articles = json.loads(result) if isinstance(articles, list): all_articles.extend(articles) except json.JSONDecodeError: continue return all_articles async def _extract_mutation_pattern( keywords: list[str], ) -> tuple[str | None, str | None]: """Extract mutation pattern from keywords asynchronously.""" if not keywords: return None, None # Use asyncio.to_thread for CPU-bound regex operations import re def _extract_sync(): for keyword in keywords: # Check for specific mutations (e.g., F57Y, V600E) if re.match(r"^[A-Z]\d+[A-Z*]$", keyword): if keyword.endswith("*"): return keyword, None # mutation_pattern else: return None, keyword # specific_mutation return None, None # Run CPU-bound operation in thread pool return await asyncio.to_thread(_extract_sync) async def _get_mutation_summary( gene: str, mutation: str | None, pattern: str | None ) -> str | None: """Get mutation-specific cBioPortal summary.""" from ..variants.cbioportal_mutations import ( CBioPortalMutationClient, format_mutation_search_result, ) mutation_client = CBioPortalMutationClient() if mutation: logger.info(f"Searching for specific mutation {gene} {mutation}") result = await mutation_client.search_specific_mutation( gene=gene, mutation=mutation, max_studies=20 ) else: logger.info(f"Searching for mutation pattern {gene} {pattern}") result = await mutation_client.search_specific_mutation( gene=gene, pattern=pattern, max_studies=20 ) return format_mutation_search_result(result) if result else None async def _get_gene_summary(gene: str) -> str | None: """Get regular gene cBioPortal summary.""" from ..variants.cbioportal_search import ( CBioPortalSearchClient, format_cbioportal_search_summary, ) client = CBioPortalSearchClient() summary = await client.get_gene_search_summary(gene, max_studies=5) return format_cbioportal_search_summary(summary) if summary else None async def _get_cbioportal_summary(request: PubmedRequest) -> str | None: """Get cBioPortal summary for the search request.""" if not request.genes: return None try: gene = request.genes[0] mutation_pattern, specific_mutation = await _extract_mutation_pattern( request.keywords ) if specific_mutation or mutation_pattern: return await _get_mutation_summary( gene, specific_mutation, mutation_pattern ) else: return await _get_gene_summary(gene) except Exception as e: logger.warning( f"Failed to get cBioPortal summary for gene search: {e}" ) return None async def search_articles_unified( # noqa: C901 request: PubmedRequest, include_pubmed: bool = True, include_preprints: bool = False, include_cbioportal: bool = True, output_json: bool = False, ) -> str: """Search for articles across PubMed and preprint sources.""" # Import here to avoid circular imports from ..shared_context import SearchContextManager # Use shared context to avoid redundant validations with SearchContextManager() as context: # Pre-validate genes once if request.genes: valid_genes = [] for gene in request.genes: if await context.validate_gene(gene): valid_genes.append(gene) request.genes = valid_genes tasks: list[Coroutine[Any, Any, Any]] = [] task_labels = [] if include_pubmed: tasks.append(search_articles(request, output_json=True)) task_labels.append("pubmed") if include_preprints: tasks.append(search_preprints(request, output_json=True)) task_labels.append("preprints") # Add cBioPortal to parallel execution if include_cbioportal and request.genes: tasks.append(_get_cbioportal_summary(request)) task_labels.append("cbioportal") if not tasks: return json.dumps([]) if output_json else render.to_markdown([]) # Run all operations in parallel results = await asyncio.gather(*tasks, return_exceptions=True) # Create result map for easier processing result_map = dict(zip(task_labels, results, strict=False)) # Extract cBioPortal summary if it was included cbioportal_summary: str | None = None if "cbioportal" in result_map: result = result_map["cbioportal"] if not isinstance(result, Exception) and isinstance(result, str): cbioportal_summary = result # Parse article search results article_results = [] for label, result in result_map.items(): if label != "cbioportal" and not isinstance(result, Exception): article_results.append(result) # Parse and deduplicate results all_articles = _parse_search_results(article_results) unique_articles = _deduplicate_articles(all_articles) # Sort by publication state (peer-reviewed first) and then by date unique_articles.sort( key=lambda x: ( 0 if x.get("publication_state", "peer_reviewed") == "peer_reviewed" else 1, x.get("date", "0000-00-00"), ), reverse=True, ) if unique_articles and not output_json: result = render.to_markdown(unique_articles) if cbioportal_summary and isinstance(cbioportal_summary, str): # Add cBioPortal summary at the beginning result = cbioportal_summary + "\n\n---\n\n" + result return result else: if cbioportal_summary: return json.dumps( { "cbioportal_summary": cbioportal_summary, "articles": unique_articles, }, indent=2, ) return json.dumps(unique_articles, indent=2) ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/adverse_events.py: -------------------------------------------------------------------------------- ```python """ OpenFDA Drug Adverse Events (FAERS) integration. """ import logging from .adverse_events_helpers import ( format_drug_details, format_reaction_details, format_report_metadata, format_report_summary, format_search_summary, format_top_reactions, ) from .constants import ( OPENFDA_DEFAULT_LIMIT, OPENFDA_DISCLAIMER, OPENFDA_DRUG_EVENTS_URL, OPENFDA_MAX_LIMIT, ) from .exceptions import ( OpenFDAConnectionError, OpenFDARateLimitError, OpenFDATimeoutError, ) from .input_validation import sanitize_input from .utils import clean_text, make_openfda_request logger = logging.getLogger(__name__) def _build_search_query( drug: str | None, reaction: str | None, serious: bool | None ) -> str: """Build the search query for adverse events.""" search_parts = [] if drug: # Sanitize drug input to prevent injection drug = sanitize_input(drug, max_length=100) if drug: drug_query = ( f'(patient.drug.medicinalproduct:"{drug}" OR ' f'patient.drug.openfda.brand_name:"{drug}" OR ' f'patient.drug.openfda.generic_name:"{drug}")' ) search_parts.append(drug_query) if reaction: # Sanitize reaction input reaction = sanitize_input(reaction, max_length=200) if reaction: search_parts.append( f'patient.reaction.reactionmeddrapt:"{reaction}"' ) if serious is not None: serious_value = "1" if serious else "2" search_parts.append(f"serious:{serious_value}") return " AND ".join(search_parts) async def search_adverse_events( # noqa: C901 drug: str | None = None, reaction: str | None = None, serious: bool | None = None, limit: int = OPENFDA_DEFAULT_LIMIT, skip: int = 0, api_key: str | None = None, ) -> str: """ Search FDA adverse event reports (FAERS). Args: drug: Drug name to search for reaction: Adverse reaction term to search for serious: Filter for serious events only limit: Maximum number of results skip: Number of results to skip api_key: Optional OpenFDA API key (overrides OPENFDA_API_KEY env var) Returns: Formatted string with adverse event information """ if not drug and not reaction: return ( "⚠️ Please specify either a drug name or reaction term to search " "adverse events.\n\n" "Examples:\n" "- Search by drug: --drug 'imatinib'\n" "- Search by reaction: --reaction 'nausea'\n" "- Both: --drug 'imatinib' --reaction 'nausea'" ) # Build and execute search search_query = _build_search_query(drug, reaction, serious) params = { "search": search_query, "limit": min(limit, OPENFDA_MAX_LIMIT), "skip": skip, } try: response, error = await make_openfda_request( OPENFDA_DRUG_EVENTS_URL, params, "openfda_adverse_events", api_key ) except OpenFDARateLimitError: return ( "⚠️ **FDA API Rate Limit Exceeded**\n\n" "You've exceeded the FDA's rate limit. Options:\n" "• Wait a moment and try again\n" "• Provide an FDA API key for higher limits (240/min vs 40/min)\n" "• Get a free key at: https://open.fda.gov/apis/authentication/" ) except OpenFDATimeoutError: return ( "⏱️ **Request Timeout**\n\n" "The FDA API is taking too long to respond. This may be due to:\n" "• High server load\n" "• Complex query\n" "• Network issues\n\n" "Please try again in a moment." ) except OpenFDAConnectionError as e: return ( "🔌 **Connection Error**\n\n" f"Unable to connect to FDA API: {e}\n\n" "Please check your internet connection and try again." ) if error: return f"⚠️ Error searching adverse events: {error}" if not response or not response.get("results"): search_desc = [] if drug: search_desc.append(f"drug '{drug}'") if reaction: search_desc.append(f"reaction '{reaction}'") return ( f"No adverse event reports found for {' and '.join(search_desc)}." ) results = response["results"] total = ( response.get("meta", {}).get("results", {}).get("total", len(results)) ) # Build output output = ["## FDA Adverse Event Reports\n"] output.extend(format_search_summary(drug, reaction, serious, total)) # Add top reactions if searching by drug if drug and not reaction: output.extend(format_top_reactions(results)) # Add sample reports output.append( f"### Sample Reports (showing {min(len(results), 3)} of {total}):\n" ) for i, result in enumerate(results[:3], 1): output.extend(format_report_summary(result, i)) output.append(f"\n{OPENFDA_DISCLAIMER}") return "\n".join(output) async def get_adverse_event(report_id: str, api_key: str | None = None) -> str: """ Get detailed information for a specific adverse event report. Args: report_id: Safety report ID api_key: Optional OpenFDA API key (overrides OPENFDA_API_KEY env var) Returns: Formatted string with detailed report information """ params = { "search": f'safetyreportid:"{report_id}"', "limit": 1, } response, error = await make_openfda_request( OPENFDA_DRUG_EVENTS_URL, params, "openfda_adverse_event_detail", api_key, ) if error: return f"⚠️ Error retrieving adverse event report: {error}" if not response or not response.get("results"): return f"Adverse event report '{report_id}' not found." result = response["results"][0] patient = result.get("patient", {}) # Build detailed output output = [f"## Adverse Event Report: {report_id}\n"] # Patient Information output.extend(_format_patient_info(patient)) # Drug Information if drugs := patient.get("drug", []): output.extend(format_drug_details(drugs)) # Reactions if reactions := patient.get("reaction", []): output.extend(format_reaction_details(reactions)) # Event Summary if summary := patient.get("summary", {}).get("narrativeincludeclinical"): output.append("### Event Narrative") output.append(clean_text(summary)) output.append("") # Report metadata output.extend(format_report_metadata(result)) output.append(f"\n{OPENFDA_DISCLAIMER}") return "\n".join(output) def _format_patient_info(patient: dict) -> list[str]: """Format patient information section.""" output = ["### Patient Information"] if age := patient.get("patientonsetage"): output.append(f"- **Age**: {age} years") sex_map = {0: "Unknown", 1: "Male", 2: "Female"} sex_code = patient.get("patientsex") sex = ( sex_map.get(sex_code, "Unknown") if sex_code is not None else "Unknown" ) output.append(f"- **Sex**: {sex}") if weight := patient.get("patientweight"): output.append(f"- **Weight**: {weight} kg") output.append("") return output ``` -------------------------------------------------------------------------------- /docs/how-to-guides/01-find-articles-and-cbioportal-data.md: -------------------------------------------------------------------------------- ```markdown # How to Find Articles and cBioPortal Data This guide walks you through searching biomedical literature with automatic cancer genomics integration from cBioPortal. ## Overview When searching for articles about genes, BioMCP automatically enriches your results with: - **cBioPortal Summary**: Mutation frequencies, hotspots, and cancer type distribution ([API Reference](../backend-services-reference/03-cbioportal.md)) - **PubMed Articles**: Peer-reviewed research with entity annotations ([PubTator3 Reference](../backend-services-reference/06-pubtator3.md)) - **Preprints**: Latest findings from bioRxiv and medRxiv ## Basic Article Search ### Search by Gene Find articles about a specific gene: ```bash # CLI biomcp article search --gene BRAF --limit 5 # Python articles = await client.articles.search(genes=["BRAF"], limit=5) # MCP Tool article_searcher(genes=["BRAF"], limit=5) ``` This automatically includes: 1. cBioPortal summary showing BRAF mutation frequency across cancers 2. Top mutation hotspots (e.g., V600E) 3. Recent articles mentioning BRAF ### Search by Disease Find articles about a specific disease: ```bash # CLI biomcp article search --disease melanoma --limit 10 # Python articles = await client.articles.search(diseases=["melanoma"]) # MCP Tool article_searcher(diseases=["melanoma"]) ``` ## Advanced Search Techniques ### Combining Multiple Filters Search for articles at the intersection of genes, diseases, and chemicals: ```bash # CLI - EGFR mutations in lung cancer treated with erlotinib biomcp article search \ --gene EGFR \ --disease "lung cancer" \ --chemical erlotinib \ --limit 20 # Python articles = await client.articles.search( genes=["EGFR"], diseases=["lung cancer"], chemicals=["erlotinib"] ) ``` ### Using OR Logic in Keywords Find articles mentioning different notations of the same variant: ```bash # CLI - Find any notation of BRAF V600E biomcp article search \ --gene BRAF \ --keyword "V600E|p.V600E|c.1799T>A" # Python - Different names for same concept articles = await client.articles.search( diseases=["NSCLC|non-small cell lung cancer"], chemicals=["pembrolizumab|Keytruda|anti-PD-1"] ) ``` ### Excluding Preprints For peer-reviewed articles only: ```bash # CLI biomcp article search --gene TP53 --no-preprints # Python articles = await client.articles.search( genes=["TP53"], include_preprints=False ) ``` ## Understanding cBioPortal Integration ### What cBioPortal Provides When you search for a gene, the first result includes: ```markdown ### cBioPortal Summary for BRAF - **Mutation Frequency**: 76.7% (368 mutations in 480 samples) - **Studies**: 1 of 5 studies have mutations **Top Hotspots:** 1. V600E: 310 mutations (84.2%) 2. V600K: 23 mutations (6.3%) 3. V600M: 12 mutations (3.3%) **Cancer Type Distribution:** - Skin Cancer, Non-Melanoma: 156 mutations - Melanoma: 91 mutations - Thyroid Cancer: 87 mutations ``` ### Mutation-Specific Searches Search for articles about specific mutations: ```python # Search for BRAF V600E specifically articles = await client.articles.search( genes=["BRAF"], keywords=["V600E"], include_cbioportal=True # Default ) ``` The cBioPortal summary will highlight the specific mutation if found. ### Disabling cBioPortal If you don't need cancer genomics data: ```bash # CLI biomcp article search --gene BRCA1 --no-cbioportal # Python articles = await client.articles.search( genes=["BRCA1"], include_cbioportal=False ) ``` ## Practical Examples ### Example 1: Resistance Mechanism Research Find articles about EGFR T790M resistance: ```python # Using think tool first (for MCP) think( thought="Researching EGFR T790M resistance mechanisms in lung cancer", thoughtNumber=1 ) # Search with multiple relevant terms articles = await article_searcher( genes=["EGFR"], diseases=["lung cancer|NSCLC"], keywords=["T790M|p.T790M|resistance|resistant"], chemicals=["osimertinib|gefitinib|erlotinib"] ) ``` ### Example 2: Combination Therapy Research Research BRAF/MEK combination therapy: ```bash # CLI approach biomcp article search \ --gene BRAF --gene MEK1 --gene MEK2 \ --disease melanoma \ --chemical dabrafenib --chemical trametinib \ --keyword "combination therapy|combined treatment" ``` ### Example 3: Biomarker Discovery Find articles about potential biomarkers: ```python # Search for PD-L1 as a biomarker articles = await client.articles.search( genes=["CD274"], # PD-L1 gene symbol keywords=["biomarker|predictive|prognostic"], diseases=["cancer"], limit=50 ) # Filter results programmatically biomarker_articles = [ a for a in articles if "biomarker" in a.title.lower() or "predictive" in a.abstract.lower() ] ``` ## Working with Results ### Extracting Key Information ```python # Process article results for article in articles: print(f"Title: {article.title}") print(f"PMID: {article.pmid}") print(f"URL: {article.url}") # Extract annotated entities genes = article.metadata.get("genes", []) diseases = article.metadata.get("diseases", []) chemicals = article.metadata.get("chemicals", []) print(f"Genes mentioned: {', '.join(genes)}") print(f"Diseases: {', '.join(diseases)}") print(f"Chemicals: {', '.join(chemicals)}") ``` ### Fetching Full Article Details Get complete article information: ```python # Get article by PMID full_article = await client.articles.get("38768446") # Access full abstract print(full_article.abstract) # Check for full text availability if full_article.full_text_url: print(f"Full text: {full_article.full_text_url}") ``` ## Tips for Effective Searches ### 1. Use Official Gene Symbols ```python # ✅ Correct - Official HGNC symbol articles = await search(genes=["ERBB2"]) # ❌ Avoid - Common name articles = await search(genes=["HER2"]) # May miss results ``` ### 2. Include Synonyms for Diseases ```python # Cover all variations articles = await search( diseases=["GIST|gastrointestinal stromal tumor|gastrointestinal stromal tumour"] ) ``` ### 3. Leverage PubTator Annotations PubTator automatically annotates articles with: - Gene mentions (normalized to official symbols) - Disease concepts (mapped to MeSH terms) - Chemical/drug entities - Genetic variants - Species ### 4. Combine with Other Tools ```python # 1. Find articles about a gene articles = await article_searcher(genes=["ALK"]) # 2. Get gene details for context gene_info = await gene_getter("ALK") # 3. Find relevant trials trials = await trial_searcher( other_terms=["ALK positive", "ALK rearrangement"] ) ``` ## Troubleshooting ### No Results Found 1. **Check gene symbols**: Use [genenames.org](https://www.genenames.org) 2. **Broaden search**: Remove filters one by one 3. **Try synonyms**: Especially for diseases and drugs ### cBioPortal Data Missing - Some genes may not have cancer genomics data - Try searching for cancer-related genes - Check if gene symbol is correct ### Preprint Issues - Europe PMC may have delays in indexing - Some preprints may not have DOIs - Try searching by title keywords instead ## Next Steps - Learn to [find trials with NCI and BioThings](02-find-trials-with-nci-and-biothings.md) - Explore [variant annotations](03-get-comprehensive-variant-annotations.md) - Set up [API keys](../getting-started/03-authentication-and-api-keys.md) for enhanced features ``` -------------------------------------------------------------------------------- /tests/tdd/test_network_policies.py: -------------------------------------------------------------------------------- ```python """Comprehensive tests for network policies and HTTP centralization.""" from pathlib import Path from unittest.mock import patch import pytest from biomcp.http_client import request_api from biomcp.utils.endpoint_registry import ( DataType, EndpointCategory, EndpointInfo, EndpointRegistry, get_registry, ) class TestEndpointRegistry: """Test the endpoint registry functionality.""" def test_registry_initialization(self): """Test that registry initializes with known endpoints.""" registry = EndpointRegistry() endpoints = registry.get_all_endpoints() # Check we have endpoints registered assert len(endpoints) > 0 # Check specific endpoints exist assert "pubtator3_search" in endpoints assert "clinicaltrials_search" in endpoints assert "myvariant_query" in endpoints assert "cbioportal_api" in endpoints def test_get_endpoints_by_category(self): """Test filtering endpoints by category.""" registry = EndpointRegistry() # Get biomedical literature endpoints lit_endpoints = registry.get_endpoints_by_category( EndpointCategory.BIOMEDICAL_LITERATURE ) assert len(lit_endpoints) > 0 assert all( e.category == EndpointCategory.BIOMEDICAL_LITERATURE for e in lit_endpoints.values() ) # Get clinical trials endpoints trial_endpoints = registry.get_endpoints_by_category( EndpointCategory.CLINICAL_TRIALS ) assert len(trial_endpoints) > 0 assert all( e.category == EndpointCategory.CLINICAL_TRIALS for e in trial_endpoints.values() ) def test_get_unique_domains(self): """Test getting unique domains.""" registry = EndpointRegistry() domains = registry.get_unique_domains() assert len(domains) > 0 assert "www.ncbi.nlm.nih.gov" in domains assert "clinicaltrials.gov" in domains assert "myvariant.info" in domains assert "www.cbioportal.org" in domains def test_endpoint_info_properties(self): """Test EndpointInfo dataclass properties.""" endpoint = EndpointInfo( url="https://api.example.com/test", category=EndpointCategory.BIOMEDICAL_LITERATURE, data_types=[DataType.RESEARCH_ARTICLES], description="Test endpoint", compliance_notes="Test compliance", rate_limit="10 requests/second", authentication="API key required", ) assert endpoint.domain == "api.example.com" assert endpoint.category == EndpointCategory.BIOMEDICAL_LITERATURE assert DataType.RESEARCH_ARTICLES in endpoint.data_types def test_markdown_report_generation(self): """Test markdown report generation.""" registry = EndpointRegistry() report = registry.generate_markdown_report() # Check report contains expected sections assert "# Third-Party Endpoints Used by BioMCP" in report assert "## Overview" in report assert "## Endpoints by Category" in report assert "## Domain Summary" in report assert "## Compliance and Privacy" in report assert "## Network Control" in report # Check it mentions offline mode assert "BIOMCP_OFFLINE" in report # Check it contains actual endpoints assert "pubtator3" in report assert "clinicaltrials.gov" in report assert "myvariant.info" in report def test_save_markdown_report(self, tmp_path): """Test saving markdown report to file.""" registry = EndpointRegistry() output_path = tmp_path / "test_endpoints.md" saved_path = registry.save_markdown_report(output_path) assert saved_path == output_path assert output_path.exists() # Read and verify content content = output_path.read_text() assert "Third-Party Endpoints Used by BioMCP" in content class TestEndpointTracking: """Test endpoint tracking in HTTP client.""" @pytest.mark.asyncio async def test_valid_endpoint_key(self): """Test that valid endpoint keys are accepted.""" with patch("biomcp.http_client.call_http") as mock_call: mock_call.return_value = (200, '{"data": "test"}') # Should not raise an error result, error = await request_api( url="https://www.ncbi.nlm.nih.gov/research/pubtator3-api/search/", request={"text": "BRAF"}, endpoint_key="pubtator3_search", cache_ttl=0, ) assert result == {"data": "test"} assert error is None @pytest.mark.asyncio async def test_invalid_endpoint_key_raises_error(self): """Test that invalid endpoint keys raise an error.""" with pytest.raises(ValueError, match="Unknown endpoint key"): await request_api( url="https://api.example.com/test", request={"test": "data"}, endpoint_key="invalid_endpoint_key", cache_ttl=0, ) @pytest.mark.asyncio async def test_no_endpoint_key_allowed(self): """Test that requests without endpoint keys are allowed.""" with patch("biomcp.http_client.call_http") as mock_call: mock_call.return_value = (200, '{"data": "test"}') # Should not raise an error result, error = await request_api( url="https://api.example.com/test", request={"test": "data"}, cache_ttl=0, ) assert result == {"data": "test"} assert error is None class TestHTTPImportChecks: """Test the HTTP import checking script.""" def test_check_script_exists(self): """Test that the check script exists.""" script_path = ( Path(__file__).parent.parent.parent / "scripts" / "check_http_imports.py" ) assert script_path.exists() def test_allowed_files_configured(self): """Test that allowed files are properly configured.""" # Import the script module import sys script_path = Path(__file__).parent.parent.parent / "scripts" sys.path.insert(0, str(script_path)) try: from check_http_imports import ALLOWED_FILES, HTTP_LIBRARIES # Check essential files are allowed assert "http_client.py" in ALLOWED_FILES assert "http_client_simple.py" in ALLOWED_FILES # Check we're checking for the right libraries assert "httpx" in HTTP_LIBRARIES assert "aiohttp" in HTTP_LIBRARIES assert "requests" in HTTP_LIBRARIES finally: sys.path.pop(0) class TestGlobalRegistry: """Test the global registry instance.""" def test_get_registry_returns_same_instance(self): """Test that get_registry returns the same instance.""" registry1 = get_registry() registry2 = get_registry() assert registry1 is registry2 def test_global_registry_has_endpoints(self): """Test that the global registry has endpoints.""" registry = get_registry() endpoints = registry.get_all_endpoints() assert len(endpoints) > 0 ``` -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- ```markdown # BioMCP: AI-Powered Biomedical Research [](https://github.com/genomoncology/biomcp/tags) [](https://github.com/genomoncology/biomcp/actions/workflows/main.yml?query=branch%3Amain) [](https://img.shields.io/github/license/genomoncology/biomcp) **Transform how you search and analyze biomedical data** with BioMCP - a powerful tool that connects AI assistants and researchers to critical biomedical databases through natural language. ### Built and Maintained by <a href="https://www.genomoncology.com"><img src="./assets/logo.png" width=200 valign="middle" /></a> <div class="announcement-banner"> <div class="announcement-content"> <h2> <span class="badge-new">NEW</span> Remote BioMCP Now Available! </h2> <p>Connect to BioMCP instantly through Claude - no installation required!</p> <div class="announcement-features"> <div class="feature-item"> <strong>🚀 Instant Access</strong> <span>Start using BioMCP in under 2 minutes</span> </div> <div class="feature-item"> <strong>☁️ Cloud-Powered</strong> <span>Always up-to-date with latest features</span> </div> <div class="feature-item"> <strong>🔒 Secure Auth</strong> <span>Google OAuth authentication</span> </div> <div class="feature-item"> <strong>🛠️ 23+ Tools</strong> <span>Full suite of biomedical research tools</span> </div> </div> <a href="tutorials/remote-connection/" class="cta-button"> Connect to Remote BioMCP Now </a> </div> </div> ## What Can You Do with BioMCP? ### Search Research Literature Find articles about genes, variants, diseases, and drugs with automatic cancer genomics data from cBioPortal ```bash biomcp article search --gene BRAF --disease melanoma ``` ### Discover Clinical Trials Search active trials by condition, location, phase, and eligibility criteria including genetic biomarkers ```bash biomcp trial search --condition "lung cancer" --status RECRUITING ``` ### Analyze Genetic Variants Query variant databases, predict effects, and understand clinical significance ```bash biomcp variant search --gene TP53 --significance pathogenic ``` ### AI-Powered Analysis Use with Claude Desktop for conversational biomedical research with sequential thinking ```python # Claude automatically uses BioMCP tools "What BRAF mutations are found in melanoma?" ``` ## 5-Minute Quick Start ### Choose Your Interface === "Claude Desktop (Recommended)" **Best for**: Conversational research, complex queries, AI-assisted analysis 1. **Install Claude Desktop** from [claude.ai/desktop](https://claude.ai/desktop) 2. **Configure BioMCP**: ```json { "mcpServers": { "biomcp": { "command": "uv", "args": [ "run", "--with", "biomcp-python", "biomcp", "run" ] } } } ``` 3. **Start researching**: Ask Claude about any biomedical topic! [Full Claude Desktop Guide →](getting-started/02-claude-desktop-integration.md) === "Command Line" **Best for**: Direct queries, scripting, automation 1. **Install BioMCP**: ```bash # Using uv (recommended) uv tool install biomcp # Or using pip pip install biomcp-python ``` 2. **Run your first search**: ```bash biomcp article search \ --gene BRAF --disease melanoma \ --limit 5 ``` [CLI Reference →](user-guides/01-command-line-interface.md) === "Python SDK" **Best for**: Integration, custom applications, bulk operations 1. **Install the package**: ```bash pip install biomcp-python ``` 2. **Use in your code**: ```python from biomcp import BioMCPClient async with BioMCPClient() as client: articles = await client.articles.search( genes=["BRAF"], diseases=["melanoma"] ) ``` [Python SDK Docs →](apis/python-sdk.md) ## Key Features ### Unified Search Across Databases - **PubMed/PubTator3**: 30M+ research articles with entity recognition - **ClinicalTrials.gov**: 400K+ clinical trials worldwide - **MyVariant.info**: Comprehensive variant annotations - **cBioPortal**: Automatic cancer genomics integration ### Intelligent Query Processing - Natural language to structured queries - Automatic synonym expansion - OR logic support for flexible matching - Cross-domain relationship discovery ### Built for AI Integration - 24 specialized MCP tools - Sequential thinking for complex analysis - Streaming responses for real-time updates - Context preservation across queries [Explore All Features →](concepts/01-what-is-biomcp.md) ## Learn by Example ### Find Articles About a Specific Mutation ```bash # Search for BRAF V600E mutations biomcp article search --gene BRAF \ --keyword "V600E|p.V600E|c.1799T>A" ``` ### Discover Trials Near You ```bash # Find cancer trials in Boston area biomcp trial search --condition cancer \ --latitude 42.3601 --longitude -71.0589 \ --distance 50 ``` ### Get Gene Information ```bash # Get comprehensive gene data biomcp gene get TP53 ``` [More Examples →](tutorials/biothings-prompts.md) ## Popular Workflows ### Literature Review Systematic search across papers, preprints, and clinical trials [Workflow Guide →](workflows/all-workflows.md#1-literature-review-workflow) ### Variant Interpretation From variant ID to clinical significance and treatment implications [Workflow Guide →](workflows/all-workflows.md#3-variant-interpretation-workflow) ### Trial Matching Find eligible trials based on patient criteria and biomarkers [Workflow Guide →](workflows/all-workflows.md#2-clinical-trial-matching-workflow) ### Drug Research Connect drugs to targets, trials, and research literature [Workflow Guide →](workflows/all-workflows.md) ## Advanced Features - **[NCI Integration](getting-started/03-authentication-and-api-keys.md#nci-clinical-trials-api)**: Enhanced cancer trial search with biomarker filtering - **[AlphaGenome](how-to-guides/04-predict-variant-effects-with-alphagenome.md)**: Predict variant effects on gene regulation - **[BigQuery Logging](how-to-guides/05-logging-and-monitoring-with-bigquery.md)**: Monitor usage and performance - **[HTTP Server Mode](developer-guides/01-server-deployment.md)**: Deploy as a service ## Documentation - **[Getting Started](getting-started/01-quickstart-cli.md)** - Installation and first steps - **[User Guides](user-guides/01-command-line-interface.md)** - Detailed usage instructions - **[API Reference](apis/overview.md)** - Technical documentation - **[FAQ](faq-condensed.md)** - Quick answers to common questions ## Community & Support - **GitHub**: [github.com/genomoncology/biomcp](https://github.com/genomoncology/biomcp) - **Issues**: [Report bugs or request features](https://github.com/genomoncology/biomcp/issues) - **Discussions**: [Ask questions and share tips](https://github.com/genomoncology/biomcp/discussions) ## License BioMCP is licensed under the MIT License. See [LICENSE](https://github.com/genomoncology/biomcp/blob/main/LICENSE) for details. ``` -------------------------------------------------------------------------------- /docs/tutorials/claude-code-biomcp-alphagenome.md: -------------------------------------------------------------------------------- ```markdown # Using Claude Code with BioMCP for AlphaGenome Variant Analysis This tutorial demonstrates how to use Claude Code with BioMCP to analyze genetic variants using Google DeepMind's AlphaGenome. We'll explore both the MCP server integration and CLI approaches, showing how Claude Code can seamlessly work with both interfaces. ## Prerequisites - **Claude Code**: Latest version with MCP support - **Python 3.11+**: Required for BioMCP and AlphaGenome - **uv**: Modern Python package manager ([installation guide](https://docs.astral.sh/uv/getting-started/installation/)) - **AlphaGenome API Key**: Get free access at [Google DeepMind AlphaGenome](https://deepmind.google.com/science/alphagenome) ## Setup Overview BioMCP offers two interfaces that work perfectly with Claude Code: 1. **MCP Server**: Integrated directly into Claude Code for seamless workflows 2. **CLI**: Command-line interface for direct terminal access Both produce identical results, giving you flexibility in how you work. ## Part 1: MCP Server Setup ### Step 1: Install BioMCP CLI ```bash # Install BioMCP CLI globally (note: biomcp-python, not biomcp!) uv tool install -q biomcp-python # Verify installation biomcp --version ``` ### Step 2: Configure MCP Server Add BioMCP to your Claude Code MCP configuration: ```bash # Basic setup (requires ALPHAGENOME_API_KEY environment variable) claude mcp add biomcp -- uv run --with biomcp-python biomcp run # Or with API key in configuration claude mcp add biomcp -e ALPHAGENOME_API_KEY=your-api-key-here -- uv run --with biomcp-python biomcp run ``` Verify the setup: ```bash claude mcp list claude mcp get biomcp ``` ### Step 3: Set Environment Variable ```bash # Add to your shell profile (~/.zshrc or ~/.bashrc) export ALPHAGENOME_API_KEY='your-api-key-here' # Or set per-session export ALPHAGENOME_API_KEY='your-api-key-here' ``` ### Step 4: Install AlphaGenome ```bash # Clone and install AlphaGenome git clone https://github.com/google-deepmind/alphagenome.git cd alphagenome && uv pip install . ``` ## Part 2: Testing with Claude Code ### Example: DLG1 Exon Skipping Variant Let's analyze the variant `chr3:197081044:TACTC>T` from the AlphaGenome paper, which demonstrates exon skipping in the DLG1 gene. #### Using MCP Server (Recommended) ```python # Claude Code automatically uses MCP when available mcp__biomcp__alphagenome_predictor( chromosome="chr3", position=197081044, reference="TACTC", alternate="T" ) ``` **Result:** ```markdown ## AlphaGenome Variant Effect Predictions **Variant**: chr3:197081044 TACTC>T **Analysis window**: 131,072 bp ### Gene Expression - **MELTF**: +2.57 log₂ fold change (↑ increases expression) ### Chromatin Accessibility - **EFO:0005719 DNase-seq**: +17.27 log₂ change (↑ increases accessibility) ### Splicing - Potential splicing alterations detected ### Summary - Analyzed 11796 regulatory tracks - 6045 tracks show substantial changes (|log₂| > 0.5) ``` #### Using CLI Interface ```bash # Same analysis via CLI export ALPHAGENOME_API_KEY='your-api-key-here' uv run biomcp variant predict chr3 197081044 TACTC T ``` **Result:** Identical output to MCP server. ## Part 3: Why Both Interfaces Matter ### MCP Server Advantages 🔌 - **Persistent State**: No need to re-export environment variables - **Workflow Integration**: Seamless chaining with other biomedical tools - **Structured Data**: Direct programmatic access to results - **Auto-Documentation**: Built-in parameter validation ### CLI Advantages 💻 - **Immediate Access**: No server setup required - **Debugging**: Direct command-line testing - **Scripting**: Easy integration into bash scripts - **Standalone Use**: Works without Claude Code ### Claude Code Perspective As Claude Code, both interfaces work equally well. The **MCP approach provides slight benefits**: - Results persist across conversation turns - Built-in error handling and validation - Automatic integration with thinking and search workflows - No need to manage environment variables per session **Trade-off**: MCP requires initial setup, while CLI is immediately available. ## Part 4: Advanced Usage Examples ### Multi-Variant Analysis ```python # Analyze multiple variants from AlphaGenome paper variants = [ ("chr3", 197081044, "TACTC", "T"), # DLG1 exon skipping ("chr21", 46126238, "G", "C"), # COL6A2 splice junction ("chr16", 173694, "A", "G") # HBA2 polyadenylation ] for chr, pos, ref, alt in variants: result = mcp__biomcp__alphagenome_predictor( chromosome=chr, position=pos, reference=ref, alternate=alt ) print(f"Most affected gene: {result}") ``` ### Tissue-Specific Analysis ```python # Analyze with tissue context mcp__biomcp__alphagenome_predictor( chromosome="chr7", position=140753336, reference="A", alternate="T", tissue_types=["UBERON:0000310"] # breast tissue ) ``` ### Combined BioMCP Workflow ```python # 1. First, search for known annotations variant_data = mcp__biomcp__variant_searcher(gene="BRAF") # 2. Then predict regulatory effects regulatory_effects = mcp__biomcp__alphagenome_predictor( chromosome="chr7", position=140753336, reference="A", alternate="T" ) # 3. Search literature for context literature = mcp__biomcp__article_searcher( genes=["BRAF"], variants=["V600E"] ) ``` ## Part 5: Validation and Quality Assurance ### How We Validated the Integration 1. **Raw API Testing**: Directly tested Google's AlphaGenome API 2. **Source Code Analysis**: Verified BioMCP uses correct API methods (`score_variant` + `get_recommended_scorers`) 3. **Cross-Validation**: Confirmed identical results across all three approaches: - Raw Python API: MELTF +2.57 log₂ - BioMCP CLI: MELTF +2.57 log₂ - BioMCP MCP: MELTF +2.57 log₂ ### Key Scientific Finding The variant `chr3:197081044:TACTC>T` most strongly affects **MELTF** (+2.57 log₂ fold change), not DLG1 as initially expected. This demonstrates that AlphaGenome considers the full regulatory landscape, not just the nearest gene. ## Part 6: Best Practices ### For MCP Usage - Use structured thinking with `mcp__biomcp__think` for complex analyses - Leverage `call_benefit` parameter to improve result quality - Chain multiple tools for comprehensive variant characterization ### For CLI Usage - Set `ALPHAGENOME_API_KEY` in your shell profile - Use `--help` to explore all available parameters - Combine with other CLI tools via pipes and scripts ### General Tips - Start with default 131kb analysis window - Use tissue-specific analysis when relevant - Validate surprising results with literature search - Consider both gene expression and chromatin accessibility effects ## Conclusion BioMCP's dual interface approach (MCP + CLI) provides robust variant analysis capabilities. Claude Code works seamlessly with both, offering flexibility for different workflows. The MCP integration provides slight advantages for interactive analysis, while the CLI excels for scripting and debugging. The combination of AlphaGenome's predictive power with BioMCP's comprehensive biomedical data access creates a powerful platform for genetic variant analysis and interpretation. ## Resources - [BioMCP Documentation](https://biomcp.org) - [AlphaGenome Paper](https://deepmind.google/science/alphagenome) - [Claude Code MCP Guide](https://docs.anthropic.com/claude/docs/model-context-protocol) - [uv Documentation](https://docs.astral.sh/uv/) ``` -------------------------------------------------------------------------------- /tests/tdd/articles/test_search.py: -------------------------------------------------------------------------------- ```python import json from unittest.mock import patch import pytest from biomcp.articles.search import ( PubmedRequest, ResultItem, SearchResponse, convert_request, search_articles, ) async def test_convert_search_query(anyio_backend): pubmed_request = PubmedRequest( chemicals=["Caffeine"], diseases=["non-small cell lung cancer"], genes=["BRAF"], variants=["BRAF V600E"], keywords=["therapy"], ) pubtator_request = await convert_request(request=pubmed_request) # The API may or may not return prefixed entity IDs, so we check for both possibilities query_text = pubtator_request.text # Keywords should always be first assert query_text.startswith("therapy AND ") # Check that all terms are present (with or without prefixes) assert "Caffeine" in query_text or "@CHEMICAL_Caffeine" in query_text assert ( "non-small cell lung cancer" in query_text.lower() or "carcinoma" in query_text.lower() or "@DISEASE_" in query_text ) assert "BRAF" in query_text or "@GENE_BRAF" in query_text assert ( "V600E" in query_text or "p.V600E" in query_text or "@VARIANT_" in query_text ) # All terms should be joined with AND assert ( query_text.count(" AND ") >= 4 ) # At least 4 AND operators for 5 terms # default page request (changed to 10 for token efficiency) assert pubtator_request.size == 10 async def test_convert_search_query_with_or_logic(anyio_backend): """Test that keywords with pipe separators are converted to OR queries.""" pubmed_request = PubmedRequest( genes=["PTEN"], keywords=["R173|Arg173|p.R173", "mutation"], ) pubtator_request = await convert_request(request=pubmed_request) query_text = pubtator_request.text # Check that OR logic is properly formatted assert "(R173 OR Arg173 OR p.R173)" in query_text assert "mutation" in query_text assert "PTEN" in query_text or "@GENE_PTEN" in query_text # Check overall structure assert ( query_text.count(" AND ") >= 2 ) # At least 2 AND operators for 3 terms async def test_search(anyio_backend): """Test search with real API call - may be flaky due to network dependency. This test makes real API calls to PubTator3 and can fail due to: - Network connectivity issues (Error 599) - API rate limiting - Changes in search results over time Consider using test_search_mocked for more reliable testing. """ query = { "genes": ["BRAF"], "diseases": ["NSCLC", "Non - Small Cell Lung Cancer"], "keywords": ["BRAF mutations NSCLC"], "variants": ["mutation", "mutations"], } query = PubmedRequest(**query) output = await search_articles(query, output_json=True) data = json.loads(output) assert isinstance(data, list) # Handle potential errors - if the first item has an 'error' key, it's an error response if data and isinstance(data[0], dict) and "error" in data[0]: import pytest pytest.skip(f"API returned error: {data[0]['error']}") assert len(data) == 10 # Changed from 40 to 10 for token efficiency result = ResultItem.model_validate(data[0]) # todo: this might be flaky. assert ( result.title == "[Expert consensus on the diagnosis and treatment in advanced " "non-small cell lung cancer with BRAF mutation in China]." ) @pytest.mark.asyncio async def test_search_mocked(anyio_backend): """Test search with mocked API response to avoid network dependency.""" query = { "genes": ["BRAF"], "diseases": ["NSCLC", "Non - Small Cell Lung Cancer"], "keywords": ["BRAF mutations NSCLC"], "variants": ["mutation", "mutations"], } # Create mock response - don't include abstract here as it will be added by add_abstracts mock_response = SearchResponse( results=[ ResultItem( pmid=37495419, title="[Expert consensus on the diagnosis and treatment in advanced " "non-small cell lung cancer with BRAF mutation in China].", journal="Zhonghua Zhong Liu Za Zhi", authors=["Zhang", "Li", "Wang"], date="2023-07-23", doi="10.3760/cma.j.cn112152-20230314-00115", ) for _ in range(10) # Create 40 results ], page_size=10, current=1, count=10, total_pages=1, ) with patch("biomcp.http_client.request_api") as mock_request: mock_request.return_value = (mock_response, None) # Mock the autocomplete calls with patch("biomcp.articles.search.autocomplete") as mock_autocomplete: mock_autocomplete.return_value = ( None # Simplified - no entity mapping ) # Mock the call_pubtator_api function with patch( "biomcp.articles.search.call_pubtator_api" ) as mock_pubtator: from biomcp.articles.fetch import ( Article, FetchArticlesResponse, Passage, PassageInfo, ) # Create a mock response with abstracts mock_fetch_response = FetchArticlesResponse( PubTator3=[ Article( pmid=37495419, passages=[ Passage( text="This is a test abstract about BRAF mutations in NSCLC.", infons=PassageInfo( section_type="ABSTRACT" ), ) ], ) ] ) mock_pubtator.return_value = (mock_fetch_response, None) query_obj = PubmedRequest(**query) output = await search_articles(query_obj, output_json=True) data = json.loads(output) assert isinstance(data, list) assert ( len(data) == 10 ) # Changed from 40 to 10 for token efficiency result = ResultItem.model_validate(data[0]) assert ( result.title == "[Expert consensus on the diagnosis and treatment in advanced " "non-small cell lung cancer with BRAF mutation in China]." ) assert ( result.abstract == "This is a test abstract about BRAF mutations in NSCLC." ) @pytest.mark.asyncio async def test_search_network_error(anyio_backend): """Test search handles network errors gracefully.""" query = PubmedRequest(genes=["BRAF"]) with patch("biomcp.http_client.request_api") as mock_request: from biomcp.http_client import RequestError mock_request.return_value = ( None, RequestError(code=599, message="Network connectivity error"), ) output = await search_articles(query, output_json=True) data = json.loads(output) assert isinstance(data, list) assert len(data) == 1 assert "error" in data[0] assert "Error 599: Network connectivity error" in data[0]["error"] ``` -------------------------------------------------------------------------------- /BIOMCP_DATA_FLOW.md: -------------------------------------------------------------------------------- ```markdown # BioMCP Data Flow Diagram This document illustrates how BioMCP (Biomedical Model Context Protocol) works, showing the interaction between AI clients, the MCP server, domains, and external data sources. ## High-Level Architecture ```mermaid graph TB subgraph "AI Client Layer" AI[AI Assistant<br/>e.g., Claude, GPT] end subgraph "MCP Server Layer" MCP[MCP Server<br/>router.py] SEARCH[search tool] FETCH[fetch tool] end subgraph "Domain Routing Layer" ROUTER[Query Router] PARSER[Query Parser] UNIFIED[Unified Query<br/>Language] end subgraph "Domain Handlers" ARTICLES[Articles Domain<br/>Handler] TRIALS[Trials Domain<br/>Handler] VARIANTS[Variants Domain<br/>Handler] THINKING[Thinking Domain<br/>Handler] end subgraph "External APIs" subgraph "Article Sources" PUBMED[PubTator3/<br/>PubMed] BIORXIV[bioRxiv/<br/>medRxiv] EUROPEPMC[Europe PMC] end subgraph "Clinical Data" CLINICALTRIALS[ClinicalTrials.gov] end subgraph "Variant Sources" MYVARIANT[MyVariant.info] TCGA[TCGA] KG[1000 Genomes] CBIO[cBioPortal] end end %% Connections AI -->|MCP Protocol| MCP MCP --> SEARCH MCP --> FETCH SEARCH --> ROUTER ROUTER --> PARSER PARSER --> UNIFIED ROUTER --> ARTICLES ROUTER --> TRIALS ROUTER --> VARIANTS ROUTER --> THINKING ARTICLES --> PUBMED ARTICLES --> BIORXIV ARTICLES --> EUROPEPMC ARTICLES -.->|Gene enrichment| CBIO TRIALS --> CLINICALTRIALS VARIANTS --> MYVARIANT MYVARIANT --> TCGA MYVARIANT --> KG VARIANTS --> CBIO THINKING -->|Internal| THINKING classDef clientClass fill:#e1f5fe,stroke:#01579b,stroke-width:2px classDef serverClass fill:#f3e5f5,stroke:#4a148c,stroke-width:2px classDef domainClass fill:#e8f5e9,stroke:#1b5e20,stroke-width:2px classDef apiClass fill:#fff3e0,stroke:#e65100,stroke-width:2px class AI clientClass class MCP,SEARCH,FETCH serverClass class ARTICLES,TRIALS,VARIANTS,THINKING domainClass class PUBMED,BIORXIV,EUROPEPMC,CLINICALTRIALS,MYVARIANT,TCGA,KG,CBIO apiClass ``` ## Detailed Search Flow ```mermaid sequenceDiagram participant AI as AI Client participant MCP as MCP Server participant Router as Query Router participant Domain as Domain Handler participant API as External API AI->>MCP: search(query="gene:BRAF AND disease:melanoma") MCP->>Router: Parse & route query alt Unified Query Router->>Router: Parse field syntax Router->>Router: Create routing plan par Search Articles Router->>Domain: Search articles (BRAF, melanoma) Domain->>API: PubTator3 API call API-->>Domain: Article results Domain->>API: cBioPortal enrichment API-->>Domain: Mutation data and Search Trials Router->>Domain: Search trials (melanoma) Domain->>API: ClinicalTrials.gov API API-->>Domain: Trial results and Search Variants Router->>Domain: Search variants (BRAF) Domain->>API: MyVariant.info API API-->>Domain: Variant results end else Domain-specific Router->>Domain: Direct domain search Domain->>API: Single API call API-->>Domain: Domain results else Sequential Thinking Router->>Domain: Process thought Domain->>Domain: Update session state Domain-->>Router: Thought response end Domain-->>Router: Formatted results Router-->>MCP: Aggregated results MCP-->>AI: Standardized response ``` ## Search Tool Parameters ```mermaid graph LR subgraph "Search Tool Input" PARAMS[Parameters] QUERY[query: string] DOMAIN[domain: article/trial/variant/thinking] GENES[genes: list] DISEASES[diseases: list] CONDITIONS[conditions: list] LAT[lat/long: coordinates] THOUGHT[thought parameters] end subgraph "Search Modes" MODE1[Unified Query Mode<br/>Uses 'query' param] MODE2[Domain-Specific Mode<br/>Uses domain + params] MODE3[Thinking Mode<br/>Uses thought params] end PARAMS --> MODE1 PARAMS --> MODE2 PARAMS --> MODE3 ``` ## Domain-Specific Data Sources ```mermaid graph TD subgraph "Articles Domain" A1[PubTator3/PubMed<br/>- Published articles<br/>- Annotations] A2[bioRxiv/medRxiv<br/>- Preprints<br/>- Early research] A3[Europe PMC<br/>- Open access<br/>- Full text] A4[cBioPortal Integration<br/>- Auto-enrichment when genes specified<br/>- Mutation summaries & hotspots] end subgraph "Trials Domain" T1[ClinicalTrials.gov<br/>- Active trials<br/>- Trial details<br/>- Location search] end subgraph "Variants Domain" V1[MyVariant.info<br/>- Variant annotations<br/>- Clinical significance] V2[TCGA<br/>- Cancer variants<br/>- Somatic mutations] V3[1000 Genomes<br/>- Population frequency<br/>- Allele data] V4[cBioPortal<br/>- Cancer mutations<br/>- Hotspots] end A1 -.->|When genes present| A4 A2 -.->|When genes present| A4 A3 -.->|When genes present| A4 ``` ## Unified Query Language ```mermaid graph TD QUERY[Unified Query<br/>"gene:BRAF AND disease:melanoma"] QUERY --> PARSE[Query Parser] PARSE --> F1[Field: gene<br/>Value: BRAF] PARSE --> F2[Field: disease<br/>Value: melanoma] F1 --> D1[Articles Domain] F1 --> D2[Variants Domain] F2 --> D1 F2 --> D3[Trials Domain] D1 --> R1[PubMed Results] D2 --> R2[Variant Results] D3 --> R3[Trial Results] R1 --> AGG[Aggregated Results] R2 --> AGG R3 --> AGG ``` ## Example: Location-Based Trial Search ```mermaid sequenceDiagram participant User as User participant AI as AI Client participant MCP as BioMCP participant GEO as Geocoding Service participant CT as ClinicalTrials.gov User->>AI: Find active trials in Cleveland for NSCLC AI->>AI: Recognize location needs geocoding AI->>GEO: Geocode "Cleveland" GEO-->>AI: lat: 41.4993, long: -81.6944 AI->>MCP: search(domain="trial",<br/>diseases=["NSCLC"],<br/>lat=41.4993,<br/>long=-81.6944,<br/>distance=50) MCP->>CT: API call with geo filter CT-->>MCP: Trials near Cleveland MCP-->>AI: Formatted trial results AI-->>User: Here are X active NSCLC trials in Cleveland area ``` ## Key Features 1. **Parallel Execution**: Multiple domains are searched simultaneously for unified queries 2. **Smart Enrichment**: Article searches automatically include cBioPortal mutation summaries when genes are specified, providing clinical context alongside literature results 3. **Location Awareness**: Trial searches support geographic filtering with lat/long coordinates 4. **Sequential Thinking**: Built-in reasoning system for complex biomedical questions 5. **Standardized Output**: All results follow OpenAI MCP format for consistency ## Response Format All search results follow this standardized structure: ```json { "results": [ { "id": "PMID12345678", "title": "BRAF V600E mutation in melanoma", "text": "This study investigates BRAF mutations...", "url": "https://pubmed.ncbi.nlm.nih.gov/12345678" } ] } ``` Fetch results include additional domain-specific metadata in the response. ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/drug_labels_helpers.py: -------------------------------------------------------------------------------- ```python """ Helper functions for OpenFDA drug labels to reduce complexity. """ from typing import Any from .input_validation import sanitize_input from .utils import clean_text, extract_drug_names, truncate_text def build_label_search_query( name: str | None, indication: str | None, boxed_warning: bool, section: str | None, ) -> str: """Build the search query for drug labels.""" search_parts = [] if name: # Sanitize input to prevent injection name = sanitize_input(name, max_length=100) if name: name_query = ( f'(openfda.brand_name:"{name}" OR ' f'openfda.generic_name:"{name}" OR ' f'openfda.substance_name:"{name}")' ) search_parts.append(name_query) if indication: # Sanitize indication input indication = sanitize_input(indication, max_length=200) if indication: search_parts.append(f'indications_and_usage:"{indication}"') if boxed_warning: search_parts.append("_exists_:boxed_warning") if section: # Map common section names to FDA fields section_map = { "indications": "indications_and_usage", "dosage": "dosage_and_administration", "contraindications": "contraindications", "warnings": "warnings_and_precautions", "adverse": "adverse_reactions", "interactions": "drug_interactions", "pregnancy": "pregnancy", "pediatric": "pediatric_use", "geriatric": "geriatric_use", "overdose": "overdosage", } field_name = section_map.get(section.lower(), section) search_parts.append(f"_exists_:{field_name}") return " AND ".join(search_parts) def format_label_summary(result: dict[str, Any], index: int) -> list[str]: """Format a single drug label summary.""" output = [] # Extract drug names drug_names = extract_drug_names(result) primary_name = drug_names[0] if drug_names else "Unknown Drug" output.append(f"#### {index}. {primary_name}") # Get OpenFDA data openfda = result.get("openfda", {}) # Show all names if multiple if len(drug_names) > 1: output.append(f"**Also known as**: {', '.join(drug_names[1:])}") # Basic info output.extend(_format_label_basic_info(openfda)) # Boxed warning if "boxed_warning" in result: warning_text = clean_text(" ".join(result["boxed_warning"])) output.append( f"\n⚠️ **BOXED WARNING**: {truncate_text(warning_text, 200)}" ) # Key sections output.extend(_format_label_key_sections(result)) # Set ID for retrieval if "set_id" in result: output.append(f"\n*Label ID: {result['set_id']}*") output.append("") return output def _format_label_basic_info(openfda: dict) -> list[str]: """Format basic label information from OpenFDA data.""" output = [] # Application number if app_numbers := openfda.get("application_number", []): output.append(f"**FDA Application**: {app_numbers[0]}") # Manufacturer if manufacturers := openfda.get("manufacturer_name", []): output.append(f"**Manufacturer**: {manufacturers[0]}") # Route if routes := openfda.get("route", []): output.append(f"**Route**: {', '.join(routes)}") return output def _format_label_key_sections(result: dict) -> list[str]: """Format key label sections.""" output = [] # Indications if "indications_and_usage" in result: indications_text = clean_text( " ".join(result["indications_and_usage"]) ) output.append( f"\n**Indications**: {truncate_text(indications_text, 300)}" ) # Contraindications if "contraindications" in result: contra_text = clean_text(" ".join(result["contraindications"])) output.append( f"\n**Contraindications**: {truncate_text(contra_text, 200)}" ) return output def format_label_header(result: dict[str, Any], set_id: str) -> list[str]: """Format the header for detailed drug label.""" output = [] drug_names = extract_drug_names(result) primary_name = drug_names[0] if drug_names else "Unknown Drug" output.append(f"## FDA Drug Label: {primary_name}\n") # Basic information openfda = result.get("openfda", {}) if len(drug_names) > 1: output.append(f"**Other Names**: {', '.join(drug_names[1:])}") output.extend(_format_detailed_metadata(openfda)) output.append(f"**Label ID**: {set_id}\n") return output def _format_detailed_metadata(openfda: dict) -> list[str]: """Format detailed metadata from OpenFDA.""" output = [] # FDA application numbers if app_numbers := openfda.get("application_number", []): output.append(f"**FDA Application**: {', '.join(app_numbers)}") # Manufacturers if manufacturers := openfda.get("manufacturer_name", []): output.append(f"**Manufacturer**: {', '.join(manufacturers)}") # Routes of administration if routes := openfda.get("route", []): output.append(f"**Route of Administration**: {', '.join(routes)}") # Pharmacologic class if pharm_classes := openfda.get("pharm_class_epc", []): output.append(f"**Pharmacologic Class**: {', '.join(pharm_classes)}") return output def format_label_section( result: dict[str, Any], section: str, section_titles: dict[str, str] ) -> list[str]: """Format a single label section.""" output: list[str] = [] if section not in result: return output title = section_titles.get(section, section.upper().replace("_", " ")) output.append(f"### {title}\n") section_text = result[section] if isinstance(section_text, list): section_text = " ".join(section_text) cleaned_text = clean_text(section_text) # For very long sections, provide a truncated version if len(cleaned_text) > 3000: output.append(truncate_text(cleaned_text, 3000)) output.append("\n*[Section truncated for brevity]*") else: output.append(cleaned_text) output.append("") return output def get_default_sections() -> list[str]: """Get the default sections to display.""" return [ "indications_and_usage", "dosage_and_administration", "contraindications", "warnings_and_precautions", "adverse_reactions", "drug_interactions", "use_in_specific_populations", "clinical_pharmacology", "clinical_studies", ] def get_section_titles() -> dict[str, str]: """Get the mapping of section names to display titles.""" return { "indications_and_usage": "INDICATIONS AND USAGE", "dosage_and_administration": "DOSAGE AND ADMINISTRATION", "contraindications": "CONTRAINDICATIONS", "warnings_and_precautions": "WARNINGS AND PRECAUTIONS", "adverse_reactions": "ADVERSE REACTIONS", "drug_interactions": "DRUG INTERACTIONS", "use_in_specific_populations": "USE IN SPECIFIC POPULATIONS", "clinical_pharmacology": "CLINICAL PHARMACOLOGY", "clinical_studies": "CLINICAL STUDIES", "how_supplied": "HOW SUPPLIED", "storage_and_handling": "STORAGE AND HANDLING", "patient_counseling_information": "PATIENT COUNSELING INFORMATION", "pregnancy": "PREGNANCY", "nursing_mothers": "NURSING MOTHERS", "pediatric_use": "PEDIATRIC USE", "geriatric_use": "GERIATRIC USE", "overdosage": "OVERDOSAGE", } ``` -------------------------------------------------------------------------------- /tests/tdd/test_drug_shortages.py: -------------------------------------------------------------------------------- ```python """Tests for FDA drug shortages module.""" from datetime import datetime from unittest.mock import AsyncMock, patch import pytest from biomcp.openfda.drug_shortages import ( get_drug_shortage, search_drug_shortages, ) class TestDrugShortages: """Test drug shortages functionality.""" @pytest.mark.asyncio async def test_search_drug_shortages_no_data_available(self): """Test drug shortage search when FDA data is unavailable.""" with patch( "biomcp.openfda.drug_shortages._get_cached_shortage_data", new_callable=AsyncMock, ) as mock_get_data: mock_get_data.return_value = None result = await search_drug_shortages(drug="cisplatin") assert "Drug Shortage Data Temporarily Unavailable" in result assert "FDA drug shortage database cannot be accessed" in result assert ( "https://www.accessdata.fda.gov/scripts/drugshortages/" in result ) assert ( "https://www.ashp.org/drug-shortages/current-shortages" in result ) @pytest.mark.asyncio async def test_get_drug_shortage_no_data_available(self): """Test getting specific drug shortage when FDA data is unavailable.""" with patch( "biomcp.openfda.drug_shortages._get_cached_shortage_data", new_callable=AsyncMock, ) as mock_get_data: mock_get_data.return_value = None result = await get_drug_shortage("cisplatin") assert "Drug Shortage Data Temporarily Unavailable" in result assert "FDA drug shortage database cannot be accessed" in result assert "Alternative Options:" in result @pytest.mark.asyncio async def test_mock_data_not_used_in_production(self): """Test that mock data is never returned in production scenarios.""" with patch( "biomcp.openfda.drug_shortages._get_cached_shortage_data", new_callable=AsyncMock, ) as mock_get_data: # Simulate no data available (cache miss and fetch failure) mock_get_data.return_value = None result = await search_drug_shortages(drug="test") assert "Drug Shortage Data Temporarily Unavailable" in result # Ensure mock data is not present assert "Cisplatin Injection" not in result assert "Methotrexate" not in result # Cache functionality test removed - was testing private implementation details # The public API is tested through search_drug_shortages and get_drug_shortage # Cache expiry test removed - was testing private implementation details # The caching behavior is an implementation detail not part of the public API @pytest.mark.asyncio async def test_search_with_filters(self): """Test drug shortage search with various filters.""" mock_data = { "_fetched_at": datetime.now().isoformat(), "shortages": [ { "generic_name": "Drug A", "brand_names": ["Brand A"], "status": "Current Shortage", "therapeutic_category": "Oncology", }, { "generic_name": "Drug B", "brand_names": ["Brand B"], "status": "Resolved", "therapeutic_category": "Cardiology", }, { "generic_name": "Drug C", "brand_names": ["Brand C"], "status": "Current Shortage", "therapeutic_category": "Oncology", }, ], } with patch( "biomcp.openfda.drug_shortages._get_cached_shortage_data", new_callable=AsyncMock, ) as mock_get_data: mock_get_data.return_value = mock_data # Test status filter result = await search_drug_shortages(status="current") assert "Drug A" in result assert "Drug C" in result assert "Drug B" not in result # Test therapeutic category filter result = await search_drug_shortages( therapeutic_category="Oncology" ) assert "Drug A" in result assert "Drug C" in result assert "Drug B" not in result # Test drug name filter result = await search_drug_shortages(drug="Drug B") assert "Drug B" in result assert "Drug A" not in result @pytest.mark.asyncio async def test_get_specific_drug_shortage(self): """Test getting details for a specific drug shortage.""" mock_data = { "_fetched_at": datetime.now().isoformat(), "shortages": [ { "generic_name": "Cisplatin Injection", "brand_names": ["Platinol"], "status": "Current Shortage", "shortage_start_date": "2023-02-10", "estimated_resolution": "Q2 2024", "reason": "Manufacturing delays", "therapeutic_category": "Oncology", "notes": "Limited supplies available", }, ], } with patch( "biomcp.openfda.drug_shortages._get_cached_shortage_data", new_callable=AsyncMock, ) as mock_get_data: mock_get_data.return_value = mock_data result = await get_drug_shortage("cisplatin") assert "Cisplatin Injection" in result assert "Current Shortage" in result assert "Manufacturing delays" in result assert "Oncology" in result assert "Limited supplies available" in result @pytest.mark.asyncio async def test_get_drug_shortage_not_found(self): """Test getting drug shortage for non-existent drug.""" mock_data = { "_fetched_at": datetime.now().isoformat(), "shortages": [ { "generic_name": "Drug A", "status": "Current Shortage", }, ], } with patch( "biomcp.openfda.drug_shortages._get_cached_shortage_data", new_callable=AsyncMock, ) as mock_get_data: mock_get_data.return_value = mock_data result = await get_drug_shortage("nonexistent-drug") assert "No shortage information found" in result assert "nonexistent-drug" in result @pytest.mark.asyncio async def test_api_key_parameter_ignored(self): """Test that API key parameter is accepted but not used (FDA limitation).""" mock_data = { "_fetched_at": datetime.now().isoformat(), "shortages": [ { "generic_name": "Test Drug", "status": "Current Shortage", "therapeutic_category": "Test Category", } ], } with patch( "biomcp.openfda.drug_shortages._get_cached_shortage_data", new_callable=AsyncMock, ) as mock_get_data: mock_get_data.return_value = mock_data # API key should be accepted but not affect functionality result = await search_drug_shortages( drug="test", api_key="test-key", ) # When there's data, it should format results assert "FDA Drug Shortage Information" in result assert "Test Drug" in result # Mock data function has been removed - no longer needed ``` -------------------------------------------------------------------------------- /tests/tdd/thinking/test_sequential.py: -------------------------------------------------------------------------------- ```python """Tests for sequential thinking functionality.""" from datetime import datetime import pytest from biomcp.thinking import sequential from biomcp.thinking.session import ThoughtEntry, _session_manager @pytest.fixture(autouse=True) def clear_thinking_state(): """Clear thinking state before each test.""" _session_manager.clear_all_sessions() yield _session_manager.clear_all_sessions() class TestSequentialThinking: """Test the sequential thinking MCP tool.""" @pytest.mark.anyio async def test_basic_sequential_thinking(self): """Test basic sequential thinking flow.""" result = await sequential._sequential_thinking( thought="First step: analyze the problem", nextThoughtNeeded=True, thoughtNumber=1, totalThoughts=3, ) assert "Added thought 1 to main sequence" in result assert "Progress: 1/3 thoughts" in result assert "Next thought needed" in result # Get current session session = _session_manager.get_session() assert session is not None assert len(session.thought_history) == 1 # Verify thought structure thought = session.thought_history[0] assert thought.thought == "First step: analyze the problem" assert thought.thought_number == 1 assert thought.total_thoughts == 3 assert thought.next_thought_needed is True assert thought.is_revision is False @pytest.mark.anyio async def test_multiple_sequential_thoughts(self): """Test adding multiple thoughts in sequence.""" # Add first thought await sequential._sequential_thinking( thought="First step", nextThoughtNeeded=True, thoughtNumber=1, totalThoughts=3, ) # Add second thought await sequential._sequential_thinking( thought="Second step", nextThoughtNeeded=True, thoughtNumber=2, totalThoughts=3, ) # Add final thought result = await sequential._sequential_thinking( thought="Final step", nextThoughtNeeded=False, thoughtNumber=3, totalThoughts=3, ) assert "Added thought 3 to main sequence" in result assert "Thinking sequence complete" in result session = _session_manager.get_session() assert len(session.thought_history) == 3 @pytest.mark.anyio async def test_thought_revision(self): """Test revising a previous thought.""" # Add initial thought await sequential._sequential_thinking( thought="Initial analysis", nextThoughtNeeded=True, thoughtNumber=1, totalThoughts=2, ) # Revise the thought result = await sequential._sequential_thinking( thought="Better analysis", nextThoughtNeeded=True, thoughtNumber=1, totalThoughts=2, isRevision=True, revisesThought=1, ) assert "Revised thought 1" in result session = _session_manager.get_session() assert len(session.thought_history) == 1 assert session.thought_history[0].thought == "Better analysis" assert session.thought_history[0].is_revision is True @pytest.mark.anyio async def test_branching_logic(self): """Test creating thought branches.""" # Add main sequence thoughts await sequential._sequential_thinking( thought="Main thought 1", nextThoughtNeeded=True, thoughtNumber=1, totalThoughts=3, ) await sequential._sequential_thinking( thought="Main thought 2", nextThoughtNeeded=True, thoughtNumber=2, totalThoughts=3, ) # Create a branch result = await sequential._sequential_thinking( thought="Alternative approach", nextThoughtNeeded=True, thoughtNumber=3, totalThoughts=3, branchFromThought=2, ) assert "Added thought 3 to branch 'branch_2'" in result session = _session_manager.get_session() assert len(session.thought_history) == 2 assert len(session.thought_branches) == 1 assert "branch_2" in session.thought_branches assert len(session.thought_branches["branch_2"]) == 1 @pytest.mark.anyio async def test_validation_errors(self): """Test input validation errors.""" # Test invalid thought number result = await sequential._sequential_thinking( thought="Test", nextThoughtNeeded=False, thoughtNumber=0, totalThoughts=1, ) assert "thoughtNumber must be >= 1" in result # Test invalid total thoughts result = await sequential._sequential_thinking( thought="Test", nextThoughtNeeded=False, thoughtNumber=1, totalThoughts=0, ) assert "totalThoughts must be >= 1" in result # Test revision without specifying which thought result = await sequential._sequential_thinking( thought="Test", nextThoughtNeeded=False, thoughtNumber=1, totalThoughts=1, isRevision=True, ) assert ( "revisesThought must be specified when isRevision=True" in result ) @pytest.mark.anyio async def test_needs_more_thoughts(self): """Test the needsMoreThoughts parameter.""" result = await sequential._sequential_thinking( thought="This problem is more complex than expected", nextThoughtNeeded=True, thoughtNumber=3, totalThoughts=3, needsMoreThoughts=True, ) assert "Added thought 3 to main sequence" in result session = _session_manager.get_session() assert len(session.thought_history) == 1 assert ( session.thought_history[0].metadata.get("needsMoreThoughts") is True ) class TestUtilityFunctions: """Test utility functions.""" def test_get_current_timestamp(self): """Test timestamp generation.""" timestamp = sequential.get_current_timestamp() assert isinstance(timestamp, str) # Should be able to parse as ISO format parsed = datetime.fromisoformat( timestamp.replace("Z", "+00:00").replace("T", " ").split(".")[0] ) assert isinstance(parsed, datetime) def test_session_management(self): """Test session management functionality.""" # Clear any existing sessions _session_manager.clear_all_sessions() # Create a new session session = _session_manager.create_session() assert session is not None assert session.session_id is not None # Add a thought entry entry = ThoughtEntry( thought="Test thought", thought_number=1, total_thoughts=1, next_thought_needed=False, ) session.add_thought(entry) assert len(session.thought_history) == 1 assert session.thought_history[0].thought == "Test thought" # Test branch creation branch_entry = ThoughtEntry( thought="Branch thought", thought_number=2, total_thoughts=2, next_thought_needed=False, branch_id="test-branch", branch_from_thought=1, ) session.add_thought(branch_entry) assert len(session.thought_branches) == 1 assert "test-branch" in session.thought_branches assert len(session.thought_branches["test-branch"]) == 1 ``` -------------------------------------------------------------------------------- /tests/tdd/openfda/test_drug_labels.py: -------------------------------------------------------------------------------- ```python """ Unit tests for OpenFDA drug labels integration. """ from unittest.mock import patch import pytest from biomcp.openfda.drug_labels import get_drug_label, search_drug_labels @pytest.mark.asyncio async def test_search_drug_labels_by_name(): """Test searching drug labels by name.""" mock_response = { "meta": {"results": {"total": 5}}, "results": [ { "set_id": "abc123", "openfda": { "brand_name": ["KEYTRUDA"], "generic_name": ["PEMBROLIZUMAB"], "application_number": ["BLA125514"], "manufacturer_name": ["MERCK"], "route": ["INTRAVENOUS"], }, "indications_and_usage": [ "KEYTRUDA is indicated for the treatment of patients with unresectable or metastatic melanoma." ], "boxed_warning": [ "Immune-mediated adverse reactions can occur." ], } ], } with patch( "biomcp.openfda.drug_labels.make_openfda_request" ) as mock_request: mock_request.return_value = (mock_response, None) result = await search_drug_labels(name="pembrolizumab", limit=10) # Verify request mock_request.assert_called_once() call_args = mock_request.call_args assert "pembrolizumab" in call_args[0][1]["search"].lower() # Check output assert "FDA Drug Labels" in result assert "KEYTRUDA" in result assert "PEMBROLIZUMAB" in result assert "melanoma" in result assert "BOXED WARNING" in result assert "Immune-mediated" in result assert "abc123" in result @pytest.mark.asyncio async def test_search_drug_labels_by_indication(): """Test searching drug labels by indication.""" mock_response = { "meta": {"results": {"total": 10}}, "results": [ { "set_id": "xyz789", "openfda": { "brand_name": ["DRUG X"], "generic_name": ["GENERIC X"], }, "indications_and_usage": [ "Indicated for breast cancer treatment" ], } ], } with patch( "biomcp.openfda.drug_labels.make_openfda_request" ) as mock_request: mock_request.return_value = (mock_response, None) result = await search_drug_labels(indication="breast cancer") # Verify request call_args = mock_request.call_args assert "breast cancer" in call_args[0][1]["search"].lower() # Check output assert "breast cancer" in result assert "10 labels" in result @pytest.mark.asyncio async def test_search_drug_labels_no_params(): """Test that searching without parameters returns helpful message.""" result = await search_drug_labels() assert "Please specify" in result assert "drug name, indication, or label section" in result assert "Examples:" in result @pytest.mark.asyncio async def test_search_drug_labels_boxed_warning_filter(): """Test filtering for drugs with boxed warnings.""" mock_response = { "meta": {"results": {"total": 3}}, "results": [ { "set_id": "warn123", "openfda": {"brand_name": ["WARNING DRUG"]}, "boxed_warning": ["Serious warning text"], } ], } with patch( "biomcp.openfda.drug_labels.make_openfda_request" ) as mock_request: mock_request.return_value = (mock_response, None) result = await search_drug_labels(boxed_warning=True) # Verify boxed warning filter in search call_args = mock_request.call_args assert "_exists_:boxed_warning" in call_args[0][1]["search"] # Check output assert "WARNING DRUG" in result assert "Serious warning" in result @pytest.mark.asyncio async def test_get_drug_label_detail(): """Test getting detailed drug label.""" mock_response = { "results": [ { "set_id": "detail123", "openfda": { "brand_name": ["DETAILED DRUG"], "generic_name": ["GENERIC DETAILED"], "application_number": ["NDA123456"], "manufacturer_name": ["PHARMA CORP"], "route": ["ORAL"], "pharm_class_epc": ["KINASE INHIBITOR"], }, "boxed_warning": ["Serious boxed warning"], "indications_and_usage": ["Indicated for cancer"], "dosage_and_administration": ["Take once daily"], "contraindications": ["Do not use if allergic"], "warnings_and_precautions": ["Monitor liver function"], "adverse_reactions": ["Common: nausea, fatigue"], "drug_interactions": ["Avoid with CYP3A4 inhibitors"], "clinical_pharmacology": ["Mechanism of action details"], "clinical_studies": ["Phase 3 trial results"], } ] } with patch( "biomcp.openfda.drug_labels.make_openfda_request" ) as mock_request: mock_request.return_value = (mock_response, None) result = await get_drug_label("detail123") # Verify request mock_request.assert_called_once() call_args = mock_request.call_args assert "detail123" in call_args[0][1]["search"] # Check detailed output assert "DETAILED DRUG" in result assert "GENERIC DETAILED" in result assert "NDA123456" in result assert "PHARMA CORP" in result assert "ORAL" in result assert "KINASE INHIBITOR" in result assert "BOXED WARNING" in result assert "Serious boxed warning" in result assert "INDICATIONS AND USAGE" in result assert "Indicated for cancer" in result assert "DOSAGE AND ADMINISTRATION" in result assert "Take once daily" in result assert "CONTRAINDICATIONS" in result assert "WARNINGS AND PRECAUTIONS" in result assert "ADVERSE REACTIONS" in result assert "DRUG INTERACTIONS" in result @pytest.mark.asyncio async def test_get_drug_label_specific_sections(): """Test getting specific sections of drug label.""" mock_response = { "results": [ { "set_id": "section123", "openfda": {"brand_name": ["SECTION DRUG"]}, "indications_and_usage": ["Cancer indication"], "adverse_reactions": ["Side effects list"], "clinical_studies": ["Study data"], } ] } with patch( "biomcp.openfda.drug_labels.make_openfda_request" ) as mock_request: mock_request.return_value = (mock_response, None) sections = ["indications_and_usage", "adverse_reactions"] result = await get_drug_label("section123", sections) # Check that requested sections are included assert "INDICATIONS AND USAGE" in result assert "Cancer indication" in result assert "ADVERSE REACTIONS" in result assert "Side effects list" in result # Clinical studies should not be in output since not requested assert "CLINICAL STUDIES" not in result @pytest.mark.asyncio async def test_get_drug_label_not_found(): """Test handling when drug label is not found.""" with patch( "biomcp.openfda.drug_labels.make_openfda_request" ) as mock_request: mock_request.return_value = ({"results": []}, None) result = await get_drug_label("NOTFOUND456") assert "NOTFOUND456" in result assert "not found" in result ``` -------------------------------------------------------------------------------- /docs/getting-started/03-authentication-and-api-keys.md: -------------------------------------------------------------------------------- ```markdown # Authentication and API Keys BioMCP integrates with multiple biomedical databases. While many features work without authentication, some advanced capabilities require API keys for enhanced functionality. ## Overview of API Keys | Service | Required? | Features Enabled | Get Key | | --------------- | ---------- | ------------------------------------------------- | ---------------------------------------------------------------------- | | **NCI API** | Optional | Advanced clinical trial filters, biomarker search | [api.cancer.gov](https://api.cancer.gov) | | **AlphaGenome** | Required\* | Variant effect predictions | [deepmind.google.com](https://deepmind.google.com/science/alphagenome) | | **cBioPortal** | Optional | Enhanced cancer genomics queries | [cbioportal.org](https://www.cbioportal.org/webAPI) | \*Required only when using AlphaGenome features ## Setting Up API Keys ### Method 1: Environment Variables (Recommended for Personal Use) Set environment variables in your shell configuration: ```bash # Add to ~/.bashrc, ~/.zshrc, or equivalent export NCI_API_KEY="your-nci-api-key" export ALPHAGENOME_API_KEY="your-alphagenome-key" export CBIO_TOKEN="your-cbioportal-token" ``` ### Method 2: Configuration Files #### For Claude Desktop Add keys to your Claude Desktop configuration: ```json { "mcpServers": { "biomcp": { "command": "uv", "args": ["run", "--with", "biomcp-python", "biomcp", "run"], "env": { "NCI_API_KEY": "your-nci-api-key", "ALPHAGENOME_API_KEY": "your-alphagenome-key", "CBIO_TOKEN": "your-cbioportal-token" } } } } ``` #### For Docker Deployments Include in your Docker run command: ```bash docker run -e NCI_API_KEY="your-key" \ -e ALPHAGENOME_API_KEY="your-key" \ -e CBIO_TOKEN="your-token" \ biomcp:latest ``` ### Method 3: Per-Request Keys (For Hosted Environments) When using BioMCP through AI assistants or hosted services, provide keys in your request: ``` "Predict effects of BRAF V600E mutation. My AlphaGenome API key is YOUR_KEY_HERE" ``` The AI will recognize patterns like "My [service] API key is..." and use the key for that request only. ## Individual Service Setup ### NCI Clinical Trials API The National Cancer Institute API provides advanced clinical trial search capabilities. #### Getting Your Key 1. Visit [api.cancer.gov](https://api.cancer.gov) 2. Click "Get API Key" 3. Complete registration 4. Key is emailed immediately #### Features Enabled - Advanced biomarker-based trial search - Organization and investigator lookups - Intervention and disease vocabularies - Higher rate limits (1000 requests/day vs 100) #### Usage Example ```bash # With API key set export NCI_API_KEY="your-key" # Search trials with biomarker criteria biomcp trial search --condition melanoma --source nci \ --required-mutations "BRAF V600E" --allow-brain-mets true ``` ### AlphaGenome Google DeepMind's AlphaGenome predicts variant effects on gene expression and chromatin accessibility. #### Getting Your Key 1. Visit [AlphaGenome Portal](https://deepmind.google.com/science/alphagenome) 2. Register for non-commercial use 3. Receive API key via email 4. Accept terms of service #### Features Enabled - Gene expression predictions - Chromatin accessibility analysis - Splicing effect predictions - Tissue-specific analyses #### Usage Examples **CLI with environment variable:** ```bash export ALPHAGENOME_API_KEY="your-key" biomcp variant predict chr7 140753336 A T ``` **CLI with per-request key:** ```bash biomcp variant predict chr7 140753336 A T --api-key YOUR_KEY ``` **Through AI assistant:** ``` "Predict regulatory effects of BRAF V600E (chr7:140753336 A>T). My AlphaGenome API key is YOUR_KEY_HERE" ``` ### cBioPortal The cBioPortal token enables enhanced cancer genomics queries. #### Getting Your Token 1. Create account at [cbioportal.org](https://www.cbioportal.org) 2. Navigate to "Web API" section 3. Generate a personal access token 4. Copy the token (shown only once) #### Features Enabled - Higher API rate limits - Access to private studies (if authorized) - Batch query capabilities - Extended timeout limits #### Usage cBioPortal integration is automatic when searching for genes. The token enables: ```bash # Enhanced gene search with cancer genomics export CBIO_TOKEN="your-token" biomcp article search --gene BRAF --disease melanoma ``` ## Security Best Practices ### DO: - Store keys in environment variables or secure config files - Use per-request keys in shared/hosted environments - Rotate keys periodically - Use separate keys for development/production ### DON'T: - Commit keys to version control - Share keys with others - Include keys in code or documentation - Store keys in plain text files ### Git Security Add to `.gitignore`: ``` .env .env.local *.key config/secrets/ ``` Use git-secrets to prevent accidental commits: ```bash # Install git-secrets brew install git-secrets # macOS # or follow instructions at github.com/awslabs/git-secrets # Set up in your repo git secrets --install git secrets --register-aws # Detects common key patterns ``` ## Troubleshooting ### "API Key Required" Errors **For AlphaGenome:** - This service always requires a key - Provide it via environment variable or per-request - Check key spelling and format **For NCI:** - Basic search works without key - Advanced features require authentication - Verify key is active at api.cancer.gov ### "Invalid API Key" Errors 1. Check for extra spaces or quotes 2. Ensure key hasn't expired 3. Verify you're using the correct service's key 4. Test key directly with the service's API ### Rate Limit Errors **Without API keys:** - Public limits are restrictive (e.g., 100 requests/day) - Add delays between requests - Consider getting API keys **With API keys:** - Limits are much higher but still exist - Implement exponential backoff - Cache results when possible ## Testing Your Setup ### Check Environment Variables ```bash # List all BioMCP-related environment variables env | grep -E "(NCI_API_KEY|ALPHAGENOME_API_KEY|CBIO_TOKEN)" ``` ### Test Each Service ```bash # Test NCI API biomcp trial search --condition cancer --source nci --limit 1 # Test AlphaGenome (requires key) biomcp variant predict chr7 140753336 A T --limit 1 # Test cBioPortal integration biomcp article search --gene TP53 --limit 1 ``` ## API Key Management Tools For managing multiple API keys securely: ### 1. direnv (Recommended) ```bash # Install direnv brew install direnv # macOS # Add to shell: eval "$(direnv hook zsh)" # Create .envrc in project echo 'export NCI_API_KEY="your-key"' > .envrc direnv allow ``` ### 2. 1Password CLI ```bash # Store in 1Password op item create --category=password \ --title="BioMCP API Keys" \ --vault="Development" \ NCI_API_KEY="your-key" # Load in shell export NCI_API_KEY=$(op read "op://Development/BioMCP API Keys/NCI_API_KEY") ``` ### 3. AWS Secrets Manager ```bash # Store secret aws secretsmanager create-secret \ --name biomcp/api-keys \ --secret-string '{"NCI_API_KEY":"your-key"}' # Retrieve in script export NCI_API_KEY=$(aws secretsmanager get-secret-value \ --secret-id biomcp/api-keys \ --query SecretString \ --output text | jq -r .NCI_API_KEY) ``` ## Next Steps Now that you have API keys configured: 1. Test each service to ensure keys work 2. Explore [How-to Guides](../how-to-guides/01-find-articles-and-cbioportal-data.md) for advanced features 3. Set up [logging and monitoring](../how-to-guides/05-logging-and-monitoring-with-bigquery.md) 4. Review [security policies](../policies.md) for your organization ``` -------------------------------------------------------------------------------- /docs/concepts/03-sequential-thinking-with-the-think-tool.md: -------------------------------------------------------------------------------- ```markdown # Sequential Thinking with the Think Tool ## CRITICAL: The Think Tool is MANDATORY **The 'think' tool must be your FIRST action when using BioMCP. This is not optional.** For detailed technical documentation on the think tool parameters and usage, see the [MCP Tools Reference - Think Tool](../user-guides/02-mcp-tools-reference.md#3-think). ## Why Sequential Thinking? Biomedical research is inherently complex, requiring systematic analysis of interconnected data from multiple sources. The think tool enforces a structured approach that: - **Prevents Information Overload**: Breaks complex queries into manageable steps - **Ensures Comprehensive Coverage**: Systematic thinking catches details that might be missed - **Documents Reasoning**: Creates an audit trail of research decisions - **Improves Accuracy**: Thoughtful planning leads to better search strategies ## Mandatory Usage Requirements 🚨 **REQUIRED USAGE:** - You MUST call 'think' BEFORE any search or fetch operations - EVERY biomedical research query requires thinking first - ALL multi-step analyses must begin with the think tool - ANY task using BioMCP tools requires prior planning with think ⚠️ **WARNING - Skipping the think tool will result in:** - Incomplete analysis - Poor search strategies - Missing critical connections - Suboptimal results - Frustrated users ## How to Use the Think Tool The think tool accepts these parameters: ```python think( thought="Your reasoning about the current step", thoughtNumber=1, # Sequential number starting from 1 totalThoughts=5, # Optional: estimated total thoughts needed nextThoughtNeeded=True # Set to False only when analysis is complete ) ``` ## Sequential Thinking Patterns ### Pattern 1: Initial Query Decomposition Always start by breaking down the user's query: ```python # User asks: "What are the treatment options for BRAF V600E melanoma?" think( thought="Breaking down query: Need to find 1) BRAF V600E mutation significance in melanoma, 2) approved treatments for BRAF-mutant melanoma, 3) clinical trials for new therapies, 4) resistance mechanisms and combination strategies", thoughtNumber=1, nextThoughtNeeded=True ) ``` ### Pattern 2: Search Strategy Planning Plan your data collection approach: ```python think( thought="Search strategy: First use gene_getter for BRAF context, then article_searcher for BRAF V600E melanoma treatments focusing on FDA-approved drugs, followed by trial_searcher for ongoing studies with BRAF inhibitors", thoughtNumber=2, nextThoughtNeeded=True ) ``` ### Pattern 3: Progressive Refinement Document findings and adjust strategy: ```python think( thought="Found 3 FDA-approved BRAF inhibitors (vemurafenib, dabrafenib, encorafenib). Need to search for combination therapies with MEK inhibitors based on resistance patterns identified in literature", thoughtNumber=3, nextThoughtNeeded=True ) ``` ### Pattern 4: Synthesis Planning Before creating final output: ```python think( thought="Ready to synthesize: Will organize findings into 1) First-line treatments (BRAF+MEK combos), 2) Second-line options (immunotherapy), 3) Emerging therapies from trials, 4) Resistance mechanisms to consider", thoughtNumber=4, nextThoughtNeeded=False # Analysis complete ) ``` ## Common Think Tool Workflows ### Literature Review Workflow ```python # Step 1: Problem definition think(thought="User wants comprehensive review of CDK4/6 inhibitors in breast cancer...", thoughtNumber=1) # Step 2: Search parameters think(thought="Will search for palbociclib, ribociclib, abemaciclib in HR+/HER2- breast cancer...", thoughtNumber=2) # Step 3: Quality filtering think(thought="Found 47 articles, filtering for Phase III trials and meta-analyses...", thoughtNumber=3) # Step 4: Evidence synthesis think(thought="Identified consistent PFS benefit across trials, now analyzing OS data...", thoughtNumber=4) ``` ### Clinical Trial Analysis Workflow ```python # Step 1: Criteria identification think(thought="Patient has EGFR L858R lung cancer, progressed on osimertinib...", thoughtNumber=1) # Step 2: Trial search strategy think(thought="Searching for trials accepting EGFR-mutant NSCLC after TKI resistance...", thoughtNumber=2) # Step 3: Eligibility assessment think(thought="Found 12 trials, checking for brain metastases eligibility...", thoughtNumber=3) # Step 4: Prioritization think(thought="Ranking trials by proximity, novel mechanisms, and enrollment status...", thoughtNumber=4) ``` ### Variant Interpretation Workflow ```python # Step 1: Variant identification think(thought="Analyzing TP53 R248Q mutation found in patient's tumor...", thoughtNumber=1) # Step 2: Database queries think(thought="Will check MyVariant for population frequency, cBioPortal for cancer prevalence...", thoughtNumber=2) # Step 3: Functional assessment think(thought="Variant is pathogenic, affects DNA binding domain, common in multiple cancers...", thoughtNumber=3) # Step 4: Clinical implications think(thought="Synthesizing prognostic impact and potential therapeutic vulnerabilities...", thoughtNumber=4) ``` ## Think Tool Best Practices ### DO: - Start EVERY BioMCP session with think - Use sequential numbering (1, 2, 3...) - Document key findings in each thought - Adjust strategy based on intermediate results - Use think to track progress through complex analyses ### DON'T: - Skip think and jump to searches - Use think only at the beginning - Set nextThoughtNeeded=false prematurely - Use generic thoughts without specific content - Forget to document decision rationale ## Integration with Other Tools The think tool should wrap around other tool usage: ```python # CORRECT PATTERN think(thought="Planning BRAF melanoma research...", thoughtNumber=1) gene_info = gene_getter("BRAF") think(thought="BRAF is a serine/threonine kinase, V600E creates constitutive activation. Searching for targeted therapies...", thoughtNumber=2) articles = article_searcher(genes=["BRAF"], diseases=["melanoma"], keywords=["vemurafenib", "dabrafenib"]) think(thought="Found key trials showing BRAF+MEK combination superiority. Checking for active trials...", thoughtNumber=3) trials = trial_searcher(conditions=["melanoma"], interventions=["BRAF inhibitor"]) # INCORRECT PATTERN - NO THINKING gene_info = gene_getter("BRAF") # ❌ Started without thinking articles = article_searcher(...) # ❌ No strategy planning ``` ## Reminder System BioMCP includes automatic reminders if you forget to use think: - Search results will include a reminder message - The reminder appears as a system message - It prompts you to use think for better results - This ensures consistent methodology ## Advanced Sequential Thinking ### Branching Logic Use think to handle conditional paths: ```python think( thought="No direct trials found for this rare mutation. Pivoting to search for basket trials and mutation-agnostic approaches...", thoughtNumber=5, nextThoughtNeeded=True ) ``` ### Error Recovery Document and adjust when searches fail: ```python think( thought="MyVariant query failed for this structural variant. Will use article search to find functional studies instead...", thoughtNumber=6, nextThoughtNeeded=True ) ``` ### Complex Integration Coordinate multiple data sources: ```python think( thought="Integrating findings: cBioPortal shows 15% frequency in lung adenocarcinoma, articles describe resistance mechanisms, trials testing combination strategies...", thoughtNumber=7, nextThoughtNeeded=True ) ``` ## Conclusion The think tool is not just a requirement—it's your research companion that ensures systematic, thorough, and reproducible biomedical research. By following sequential thinking patterns, you'll deliver comprehensive insights that address all aspects of complex biomedical queries. Remember: **Always think first, then search. Document your reasoning. Only mark thinking complete when your analysis is truly finished.** ``` -------------------------------------------------------------------------------- /src/biomcp/retry.py: -------------------------------------------------------------------------------- ```python """Retry logic with exponential backoff for handling transient failures.""" import asyncio import functools import logging import secrets from collections.abc import Callable, Coroutine from typing import Any, TypeVar from .constants import ( DEFAULT_EXPONENTIAL_BASE, DEFAULT_INITIAL_RETRY_DELAY, DEFAULT_MAX_RETRY_ATTEMPTS, DEFAULT_MAX_RETRY_DELAY, METRIC_JITTER_RANGE, ) logger = logging.getLogger(__name__) T = TypeVar("T") class RetryConfig: """Configuration for retry behavior.""" def __init__( self, max_attempts: int = DEFAULT_MAX_RETRY_ATTEMPTS, initial_delay: float = DEFAULT_INITIAL_RETRY_DELAY, max_delay: float = DEFAULT_MAX_RETRY_DELAY, exponential_base: float = DEFAULT_EXPONENTIAL_BASE, jitter: bool = True, retryable_exceptions: tuple[type[Exception], ...] = ( ConnectionError, TimeoutError, OSError, ), retryable_status_codes: tuple[int, ...] = (429, 502, 503, 504), ): """Initialize retry configuration. Args: max_attempts: Maximum number of retry attempts initial_delay: Initial delay between retries in seconds max_delay: Maximum delay between retries in seconds exponential_base: Base for exponential backoff calculation jitter: Whether to add random jitter to delays retryable_exceptions: Exception types that should trigger retry retryable_status_codes: HTTP status codes that should trigger retry """ self.max_attempts = max_attempts self.initial_delay = initial_delay self.max_delay = max_delay self.exponential_base = exponential_base self.jitter = jitter self.retryable_exceptions = retryable_exceptions self.retryable_status_codes = retryable_status_codes def calculate_delay(attempt: int, config: RetryConfig) -> float: """Calculate delay for the next retry attempt. Args: attempt: Current attempt number (0-based) config: Retry configuration Returns: Delay in seconds before the next retry """ # Exponential backoff: delay = initial_delay * (base ^ attempt) delay = config.initial_delay * (config.exponential_base**attempt) # Cap at maximum delay delay = min(delay, config.max_delay) # Add jitter to prevent thundering herd if config.jitter: jitter_range = delay * METRIC_JITTER_RANGE # 10% jitter # Use secrets for cryptographically secure randomness # Generate random float between -1 and 1, then scale random_factor = (secrets.randbits(32) / (2**32 - 1)) * 2 - 1 jitter = random_factor * jitter_range delay += jitter return max(0, delay) # Ensure non-negative def is_retryable_exception(exc: Exception, config: RetryConfig) -> bool: """Check if an exception should trigger a retry. Args: exc: The exception that occurred config: Retry configuration Returns: True if the exception is retryable """ return isinstance(exc, config.retryable_exceptions) def is_retryable_status(status_code: int, config: RetryConfig) -> bool: """Check if an HTTP status code should trigger a retry. Args: status_code: HTTP status code config: Retry configuration Returns: True if the status code is retryable """ return status_code in config.retryable_status_codes def with_retry( config: RetryConfig | None = None, ) -> Callable[ [Callable[..., Coroutine[Any, Any, T]]], Callable[..., Coroutine[Any, Any, T]], ]: """Decorator to add retry logic to async functions. Args: config: Retry configuration (uses defaults if not provided) Returns: Decorated function with retry logic """ if config is None: config = RetryConfig() def decorator( func: Callable[..., Coroutine[Any, Any, T]], ) -> Callable[..., Coroutine[Any, Any, T]]: @functools.wraps(func) async def wrapper(*args: Any, **kwargs: Any) -> T: last_exception = None for attempt in range(config.max_attempts): try: return await func(*args, **kwargs) except Exception as exc: last_exception = exc # Check if this is the last attempt if attempt == config.max_attempts - 1: logger.error( f"Max retry attempts ({config.max_attempts}) " f"reached for {func.__name__}: {exc}" ) raise # Check if the exception is retryable if not is_retryable_exception(exc, config): logger.debug( f"Non-retryable exception in {func.__name__}: {exc}" ) raise # Calculate delay for next attempt delay = calculate_delay(attempt, config) logger.warning( f"Retry attempt {attempt + 1}/{config.max_attempts} " f"for {func.__name__} after {delay:.2f}s delay. " f"Error: {exc}" ) # Wait before retrying await asyncio.sleep(delay) # This should never be reached due to the raise in the loop if last_exception: raise last_exception raise RuntimeError("Unexpected retry loop exit") return wrapper return decorator class RetryableHTTPError(Exception): """Exception wrapper for HTTP errors that should be retried.""" def __init__(self, status_code: int, message: str): self.status_code = status_code self.message = message super().__init__(f"HTTP {status_code}: {message}") async def retry_with_backoff( func: Callable[..., Coroutine[Any, Any, T]], *args: Any, config: RetryConfig | None = None, **kwargs: Any, ) -> T: """Execute a function with retry logic and exponential backoff. This is an alternative to the decorator for cases where you need more control over retry behavior. Args: func: Async function to execute *args: Positional arguments for the function config: Retry configuration (uses defaults if not provided) **kwargs: Keyword arguments for the function Returns: Result of the function call Raises: The last exception if all retries fail """ if config is None: config = RetryConfig() last_exception = None for attempt in range(config.max_attempts): try: return await func(*args, **kwargs) except Exception as exc: last_exception = exc # Check if this is the last attempt if attempt == config.max_attempts - 1: logger.error( f"Max retry attempts ({config.max_attempts}) " f"reached for {func.__name__}: {exc}" ) raise # Check if the exception is retryable if not is_retryable_exception(exc, config): logger.debug( f"Non-retryable exception in {func.__name__}: {exc}" ) raise # Calculate delay for next attempt delay = calculate_delay(attempt, config) logger.warning( f"Retry attempt {attempt + 1}/{config.max_attempts} " f"for {func.__name__} after {delay:.2f}s delay. " f"Error: {exc}" ) # Wait before retrying await asyncio.sleep(delay) # This should never be reached due to the raise in the loop if last_exception: raise last_exception raise RuntimeError("Unexpected retry loop exit") ``` -------------------------------------------------------------------------------- /tests/integration/test_openfda_integration.py: -------------------------------------------------------------------------------- ```python """Integration tests for OpenFDA API. These tests make real API calls to verify FDA integration works correctly. They are marked with pytest.mark.integration and can be skipped with --ignore-integration. """ import os import pytest from biomcp.openfda.adverse_events import search_adverse_events from biomcp.openfda.device_events import search_device_events from biomcp.openfda.drug_approvals import search_drug_approvals from biomcp.openfda.drug_labels import search_drug_labels from biomcp.openfda.drug_recalls import search_drug_recalls @pytest.mark.integration class TestOpenFDAIntegration: """Integration tests for OpenFDA API endpoints.""" @pytest.mark.asyncio async def test_adverse_events_real_api(self): """Test real adverse event API call.""" result = await search_adverse_events(drug="aspirin", limit=5) # Should return formatted results assert isinstance(result, str) assert len(result) > 100 # Non-trivial response # Should contain disclaimer assert "FDA Data Notice" in result # Should have structure if "No adverse events found" not in result: assert ( "Total Reports Found:" in result or "adverse" in result.lower() ) @pytest.mark.asyncio async def test_drug_labels_real_api(self): """Test real drug label API call.""" result = await search_drug_labels(name="ibuprofen", limit=5) # Should return formatted results assert isinstance(result, str) assert len(result) > 100 # Should contain disclaimer assert "FDA Data Notice" in result # Should have label information if "No drug labels found" not in result: assert "Total Labels Found:" in result or "label" in result.lower() @pytest.mark.asyncio async def test_device_events_real_api(self): """Test real device event API call.""" result = await search_device_events(device="insulin pump", limit=5) # Should return formatted results assert isinstance(result, str) assert len(result) > 100 # Should contain disclaimer assert "FDA Data Notice" in result # Should have device information if "No device events found" not in result: assert ( "Total Events Found:" in result or "device" in result.lower() ) @pytest.mark.asyncio async def test_drug_approvals_real_api(self): """Test real drug approval API call.""" result = await search_drug_approvals(drug="pembrolizumab", limit=5) # Should return formatted results assert isinstance(result, str) assert len(result) > 100 # Should contain disclaimer assert "FDA Data Notice" in result # Pembrolizumab (Keytruda) should have results if "No drug approvals found" not in result: assert "KEYTRUDA" in result or "pembrolizumab" in result.lower() @pytest.mark.asyncio async def test_drug_recalls_real_api(self): """Test real drug recall API call.""" # Use drug parameter which is more likely to return results result = await search_drug_recalls(drug="acetaminophen", limit=5) # Should return formatted results assert isinstance(result, str) assert len(result) > 100 # Should contain disclaimer OR error message (API might return no results) assert "FDA Data Notice" in result or "Error" in result # Should have recall information if not an error if "Error" not in result and "No drug recalls found" not in result: assert "recall" in result.lower() @pytest.mark.asyncio async def test_rate_limiting_without_key(self): """Test that rate limiting is handled gracefully without API key.""" # Temporarily remove API key if present original_key = os.environ.get("OPENFDA_API_KEY") if original_key: del os.environ["OPENFDA_API_KEY"] try: # Make multiple rapid requests results = [] for i in range(5): result = await search_adverse_events(drug=f"drug{i}", limit=1) results.append(result) # All should return strings (not crash) assert all(isinstance(r, str) for r in results) finally: # Restore API key if original_key: os.environ["OPENFDA_API_KEY"] = original_key @pytest.mark.asyncio async def test_api_key_usage(self): """Test that API key is used when provided.""" # This test only runs if API key is available if not os.environ.get("OPENFDA_API_KEY"): pytest.skip("OPENFDA_API_KEY not set") result = await search_adverse_events(drug="acetaminophen", limit=10) # With API key, should be able to get results assert isinstance(result, str) assert len(result) > 100 @pytest.mark.asyncio async def test_error_handling_invalid_params(self): """Test graceful handling of invalid parameters.""" # Search with invalid/nonsense parameters result = await search_adverse_events( drug="xyzabc123notarealdrugname999", limit=5 ) # Should handle gracefully assert isinstance(result, str) # Should either show no results or error message assert ( "No adverse events found" in result or "Error" in result or "no results" in result.lower() ) @pytest.mark.asyncio async def test_cross_domain_consistency(self): """Test that different FDA domains return consistent formats.""" # Search for a common drug across domains drug_name = "aspirin" adverse_result = await search_adverse_events(drug=drug_name, limit=2) label_result = await search_drug_labels(name=drug_name, limit=2) # Both should have disclaimers assert "FDA Data Notice" in adverse_result assert "FDA Data Notice" in label_result # Both should be properly formatted strings assert isinstance(adverse_result, str) assert isinstance(label_result, str) # Both should mention the drug or indicate no results assert ( drug_name in adverse_result.lower() or "no " in adverse_result.lower() ) assert ( drug_name in label_result.lower() or "no " in label_result.lower() ) @pytest.mark.asyncio async def test_special_characters_handling(self): """Test handling of special characters in queries.""" # Test with special characters result = await search_drug_labels(name="aspirin/dipyridamole", limit=5) # Should handle forward slash gracefully assert isinstance(result, str) # API might return error or no results for complex drug names assert isinstance(result, str) # Just verify we get a response @pytest.mark.asyncio async def test_large_result_handling(self): """Test handling of large result sets.""" # Request maximum allowed results result = await search_adverse_events( drug="ibuprofen", # Common drug with many reports limit=100, # Maximum limit ) # Should handle large results assert isinstance(result, str) assert len(result) > 500 # Should be substantial # Should still include disclaimer assert "FDA Data Notice" in result @pytest.mark.asyncio async def test_empty_query_handling(self): """Test handling of empty/missing query parameters.""" # Search without specifying a drug result = await search_drug_recalls( limit=5 # Only limit, no other filters ) # Should return recent recalls assert isinstance(result, str) assert len(result) > 100 # Should have results (there are always some recalls) if "Error" not in result: assert "recall" in result.lower() ``` -------------------------------------------------------------------------------- /tests/tdd/test_metrics.py: -------------------------------------------------------------------------------- ```python """Tests for performance metrics collection.""" import asyncio import time from datetime import datetime from unittest.mock import patch import pytest from biomcp.metrics import ( MetricSample, MetricsCollector, MetricSummary, Timer, get_all_metrics, get_metric_summary, record_metric, track_performance, ) @pytest.fixture(autouse=True) def enable_metrics(monkeypatch): """Enable metrics for all tests in this module.""" monkeypatch.setenv("BIOMCP_METRICS_ENABLED", "true") # Force reload of the module to pick up the new env var import importlib import biomcp.metrics importlib.reload(biomcp.metrics) def test_metric_sample(): """Test MetricSample dataclass.""" sample = MetricSample( timestamp=datetime.now(), duration=1.5, success=True, error=None, tags={"domain": "article"}, ) assert sample.duration == 1.5 assert sample.success is True assert sample.error is None assert sample.tags["domain"] == "article" def test_metric_summary_from_samples(): """Test MetricSummary calculation from samples.""" now = datetime.now() samples = [ MetricSample(timestamp=now, duration=0.1, success=True), MetricSample(timestamp=now, duration=0.2, success=True), MetricSample( timestamp=now, duration=0.3, success=False, error="timeout" ), MetricSample(timestamp=now, duration=0.4, success=True), MetricSample(timestamp=now, duration=0.5, success=True), ] summary = MetricSummary.from_samples("test_metric", samples) assert summary.name == "test_metric" assert summary.count == 5 assert summary.success_count == 4 assert summary.error_count == 1 assert summary.total_duration == 1.5 assert summary.min_duration == 0.1 assert summary.max_duration == 0.5 assert summary.avg_duration == 0.3 assert summary.error_rate == 0.2 # 1/5 # Check percentiles assert summary.p50_duration == 0.3 # median assert 0.4 <= summary.p95_duration <= 0.5 assert 0.4 <= summary.p99_duration <= 0.5 def test_metric_summary_empty(): """Test MetricSummary with no samples.""" summary = MetricSummary.from_samples("empty", []) assert summary.count == 0 assert summary.success_count == 0 assert summary.error_count == 0 assert summary.total_duration == 0.0 assert summary.error_rate == 0.0 @pytest.mark.asyncio async def test_metrics_collector(): """Test MetricsCollector functionality.""" collector = MetricsCollector(max_samples_per_metric=3) # Record some metrics await collector.record("api_call", 0.1, success=True) await collector.record("api_call", 0.2, success=True) await collector.record("api_call", 0.3, success=False, error="timeout") # Get summary summary = await collector.get_summary("api_call") assert summary is not None assert summary.count == 3 assert summary.success_count == 2 assert summary.error_count == 1 # Test max samples limit await collector.record("api_call", 0.4, success=True) await collector.record("api_call", 0.5, success=True) summary = await collector.get_summary("api_call") assert summary.count == 3 # Still 3 due to limit assert summary.min_duration == 0.3 # Oldest samples dropped # Test clear await collector.clear("api_call") summary = await collector.get_summary("api_call") assert summary is None @pytest.mark.asyncio async def test_global_metrics_functions(): """Test global metrics functions.""" # Clear any existing metrics from biomcp.metrics import _metrics_collector await _metrics_collector.clear() # Record metrics await record_metric("test_op", 0.5, success=True) await record_metric("test_op", 0.7, success=False, error="failed") # Get summary summary = await get_metric_summary("test_op") assert summary is not None assert summary.count == 2 assert summary.success_count == 1 # Get all metrics all_metrics = await get_all_metrics() assert "test_op" in all_metrics @pytest.mark.asyncio async def test_track_performance_decorator_async(): """Test track_performance decorator on async functions.""" from biomcp.metrics import _metrics_collector await _metrics_collector.clear() @track_performance("test_async_func") async def slow_operation(): await asyncio.sleep(0.1) return "done" result = await slow_operation() assert result == "done" # Check metric was recorded summary = await get_metric_summary("test_async_func") assert summary is not None assert summary.count == 1 assert summary.success_count == 1 assert summary.min_duration >= 0.1 @pytest.mark.asyncio async def test_track_performance_decorator_async_error(): """Test track_performance decorator on async functions with errors.""" from biomcp.metrics import _metrics_collector await _metrics_collector.clear() @track_performance("test_async_error") async def failing_operation(): await asyncio.sleep(0.05) raise ValueError("Test error") with pytest.raises(ValueError, match="Test error"): await failing_operation() # Check metric was recorded with error summary = await get_metric_summary("test_async_error") assert summary is not None assert summary.count == 1 assert summary.success_count == 0 assert summary.error_count == 1 def test_track_performance_decorator_sync(): """Test track_performance decorator on sync functions.""" @track_performance("test_sync_func") def fast_operation(): time.sleep(0.05) return "done" # Need to run in an event loop context async def run_test(): from biomcp.metrics import _metrics_collector await _metrics_collector.clear() result = fast_operation() assert result == "done" # Give time for the metric to be recorded await asyncio.sleep(0.1) summary = await get_metric_summary("test_sync_func") assert summary is not None assert summary.count == 1 assert summary.success_count == 1 asyncio.run(run_test()) @pytest.mark.asyncio async def test_timer_context_manager(): """Test Timer context manager.""" from biomcp.metrics import _metrics_collector await _metrics_collector.clear() # Test async timer async with Timer("test_timer", tags={"operation": "test"}): await asyncio.sleep(0.1) summary = await get_metric_summary("test_timer") assert summary is not None assert summary.count == 1 assert summary.success_count == 1 assert summary.min_duration >= 0.1 # Test sync timer (in async context) with Timer("test_sync_timer"): time.sleep(0.05) # Give time for metric to be recorded await asyncio.sleep(0.1) summary = await get_metric_summary("test_sync_timer") assert summary is not None assert summary.count == 1 @pytest.mark.asyncio async def test_timer_with_exception(): """Test Timer context manager with exceptions.""" from biomcp.metrics import _metrics_collector await _metrics_collector.clear() # Test async timer with exception with pytest.raises(ValueError): async with Timer("test_timer_error"): await asyncio.sleep(0.05) raise ValueError("Test error") summary = await get_metric_summary("test_timer_error") assert summary is not None assert summary.count == 1 assert summary.success_count == 0 assert summary.error_count == 1 def test_timer_without_event_loop(): """Test Timer when no event loop is running.""" # This simulates using Timer in a non-async context with patch("biomcp.metrics.logger") as mock_logger: with Timer("test_no_loop"): time.sleep(0.01) # Should log instead of recording metric mock_logger.debug.assert_called_once() call_args = mock_logger.debug.call_args[0][0] assert "test_no_loop" in call_args assert "duration=" in call_args ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/rate_limiter.py: -------------------------------------------------------------------------------- ```python """ Rate limiting and circuit breaker for OpenFDA API requests. This module provides client-side rate limiting to prevent API quota exhaustion and circuit breaker pattern to handle API failures gracefully. """ import asyncio import logging import os import time from collections.abc import Callable from datetime import datetime from enum import Enum from typing import Any logger = logging.getLogger(__name__) class CircuitState(Enum): """Circuit breaker states.""" CLOSED = "closed" # Normal operation OPEN = "open" # Blocking requests HALF_OPEN = "half_open" # Testing recovery class RateLimiter: """ Token bucket rate limiter for FDA API requests. """ def __init__(self, rate: int = 10, per: float = 1.0): """ Initialize rate limiter. Args: rate: Number of requests allowed per: Time period in seconds """ self.rate = rate self.per = per self.allowance = float(rate) self.last_check = time.monotonic() self._lock = asyncio.Lock() async def acquire(self) -> None: """ Acquire permission to make a request. Blocks if rate limit would be exceeded. """ async with self._lock: current = time.monotonic() time_passed = current - self.last_check self.last_check = current # Add tokens based on time passed self.allowance += time_passed * (self.rate / self.per) # Cap at maximum rate if self.allowance > self.rate: self.allowance = float(self.rate) # Check if we can proceed if self.allowance < 1.0: # Calculate wait time deficit = 1.0 - self.allowance wait_time = deficit * (self.per / self.rate) logger.debug(f"Rate limit: waiting {wait_time:.2f}s") await asyncio.sleep(wait_time) # Update allowance after waiting self.allowance = 0.0 else: # Consume one token self.allowance -= 1.0 class CircuitBreaker: """ Circuit breaker to prevent cascading failures. """ def __init__( self, failure_threshold: int = 5, recovery_timeout: int = 60, half_open_max_calls: int = 3, ): """ Initialize circuit breaker. Args: failure_threshold: Number of failures before opening circuit recovery_timeout: Seconds to wait before attempting recovery half_open_max_calls: Max calls allowed in half-open state """ self.failure_threshold = failure_threshold self.recovery_timeout = recovery_timeout self.half_open_max_calls = half_open_max_calls self.failure_count = 0 self.last_failure_time: float | None = None self.state = CircuitState.CLOSED self.half_open_calls = 0 self._lock = asyncio.Lock() async def call(self, func: Callable, *args, **kwargs) -> Any: """ Execute function through circuit breaker. Args: func: Async function to call *args: Function arguments **kwargs: Function keyword arguments Returns: Function result Raises: Exception: If circuit is open or function fails """ async with self._lock: # Check circuit state if self.state == CircuitState.OPEN: if self._should_attempt_reset(): self.state = CircuitState.HALF_OPEN self.half_open_calls = 0 logger.info( "Circuit breaker: attempting recovery (half-open)" ) else: if self.last_failure_time is not None: time_left = self.recovery_timeout - ( time.time() - self.last_failure_time ) raise Exception( f"Circuit breaker is OPEN. Retry in {time_left:.0f} seconds" ) else: raise Exception("Circuit breaker is OPEN") elif self.state == CircuitState.HALF_OPEN: if self.half_open_calls >= self.half_open_max_calls: # Don't allow more calls in half-open state raise Exception( "Circuit breaker is HALF_OPEN. Max test calls reached" ) self.half_open_calls += 1 # Execute the function try: result = await func(*args, **kwargs) await self._on_success() return result except Exception as e: await self._on_failure() raise e async def _on_success(self) -> None: """Handle successful call.""" async with self._lock: if self.state == CircuitState.HALF_OPEN: # Recovery succeeded self.state = CircuitState.CLOSED self.failure_count = 0 logger.info("Circuit breaker: recovered (closed)") else: # Reset failure count on success self.failure_count = 0 async def _on_failure(self) -> None: """Handle failed call.""" async with self._lock: self.failure_count += 1 self.last_failure_time = time.time() if self.state == CircuitState.HALF_OPEN: # Recovery failed, reopen circuit self.state = CircuitState.OPEN logger.warning("Circuit breaker: recovery failed (open)") elif self.failure_count >= self.failure_threshold: # Too many failures, open circuit self.state = CircuitState.OPEN logger.warning( f"Circuit breaker: opened after {self.failure_count} failures" ) def _should_attempt_reset(self) -> bool: """Check if enough time has passed to attempt reset.""" return ( self.last_failure_time is not None and time.time() - self.last_failure_time >= self.recovery_timeout ) @property def is_closed(self) -> bool: """Check if circuit is closed (normal operation).""" return self.state == CircuitState.CLOSED @property def is_open(self) -> bool: """Check if circuit is open (blocking requests).""" return self.state == CircuitState.OPEN def get_state(self) -> dict[str, Any]: """Get current circuit breaker state.""" return { "state": self.state.value, "failure_count": self.failure_count, "last_failure": ( datetime.fromtimestamp(self.last_failure_time).isoformat() if self.last_failure_time else None ), } # Global instances # Configure based on API key availability _has_api_key = bool(os.environ.get("OPENFDA_API_KEY")) _rate_limit = 240 if _has_api_key else 40 # per minute # Create rate limiter (convert to per-second rate) FDA_RATE_LIMITER = RateLimiter(rate=_rate_limit, per=60.0) # Create circuit breaker FDA_CIRCUIT_BREAKER = CircuitBreaker( failure_threshold=5, recovery_timeout=60, half_open_max_calls=3 ) # Semaphore for concurrent request limiting FDA_SEMAPHORE = asyncio.Semaphore(10) # Max 10 concurrent requests async def rate_limited_request(func: Callable, *args, **kwargs) -> Any: """ Execute FDA API request with rate limiting and circuit breaker. Args: func: Async function to call *args: Function arguments **kwargs: Function keyword arguments Returns: Function result """ # Apply semaphore for concurrent limiting async with FDA_SEMAPHORE: # Apply rate limiting await FDA_RATE_LIMITER.acquire() # Apply circuit breaker return await FDA_CIRCUIT_BREAKER.call(func, *args, **kwargs) ``` -------------------------------------------------------------------------------- /tests/tdd/test_nci_integration.py: -------------------------------------------------------------------------------- ```python """Unit tests for NCI CTS API integration.""" from unittest.mock import patch import pytest from biomcp.biomarkers import search_biomarkers from biomcp.diseases.search import search_diseases from biomcp.integrations.cts_api import CTSAPIError, make_cts_request from biomcp.interventions import search_interventions from biomcp.organizations import get_organization, search_organizations from biomcp.trials.nci_getter import get_trial_nci from biomcp.trials.nci_search import convert_query_to_nci, search_trials_nci from biomcp.trials.search import TrialQuery class TestCTSAPIIntegration: """Test CTS API helper functions.""" @pytest.mark.asyncio async def test_make_cts_request_no_api_key(self): """Test that missing API key raises appropriate error.""" with ( patch.dict("os.environ", {}, clear=True), pytest.raises(CTSAPIError, match="NCI API key required"), ): await make_cts_request("https://example.com/api") @pytest.mark.asyncio async def test_make_cts_request_with_api_key(self): """Test successful request with API key.""" with patch("biomcp.integrations.cts_api.request_api") as mock_request: mock_request.return_value = ({"data": "test"}, None) result = await make_cts_request( "https://example.com/api", api_key="test-key" ) assert result == {"data": "test"} mock_request.assert_called_once() # Verify headers were included call_args = mock_request.call_args request_data = call_args.kwargs["request"] assert "_headers" in request_data class TestOrganizationsModule: """Test organizations module functions.""" @pytest.mark.asyncio async def test_search_organizations(self): """Test organization search.""" with patch( "biomcp.organizations.search.make_cts_request" ) as mock_request: mock_request.return_value = { "data": [{"id": "ORG001", "name": "Test Cancer Center"}], "total": 1, } result = await search_organizations( name="Cancer Center", api_key="test-key" ) assert result["total"] == 1 assert len(result["organizations"]) == 1 assert result["organizations"][0]["name"] == "Test Cancer Center" @pytest.mark.asyncio async def test_get_organization(self): """Test getting specific organization.""" with patch( "biomcp.organizations.getter.make_cts_request" ) as mock_request: mock_request.return_value = { "data": { "id": "ORG001", "name": "Test Cancer Center", "type": "Academic", } } result = await get_organization("ORG001", api_key="test-key") assert result["id"] == "ORG001" assert result["name"] == "Test Cancer Center" assert result["type"] == "Academic" class TestInterventionsModule: """Test interventions module functions.""" @pytest.mark.asyncio async def test_search_interventions(self): """Test intervention search.""" with patch( "biomcp.interventions.search.make_cts_request" ) as mock_request: mock_request.return_value = { "data": [ {"id": "INT001", "name": "Pembrolizumab", "type": "Drug"} ], "total": 1, } result = await search_interventions( name="Pembrolizumab", api_key="test-key" ) assert result["total"] == 1 assert len(result["interventions"]) == 1 assert result["interventions"][0]["name"] == "Pembrolizumab" class TestBiomarkersModule: """Test biomarkers module functions.""" @pytest.mark.asyncio async def test_search_biomarkers(self): """Test biomarker search.""" with patch( "biomcp.biomarkers.search.make_cts_request" ) as mock_request: mock_request.return_value = { "data": [{"id": "BIO001", "name": "PD-L1", "gene": "CD274"}], "total": 1, } result = await search_biomarkers(name="PD-L1", api_key="test-key") assert result["total"] == 1 assert len(result["biomarkers"]) == 1 assert result["biomarkers"][0]["name"] == "PD-L1" class TestDiseasesModule: """Test diseases module functions.""" @pytest.mark.asyncio async def test_search_diseases_nci(self): """Test disease search via NCI API.""" with patch("biomcp.diseases.search.make_cts_request") as mock_request: mock_request.return_value = { "data": [ { "id": "DIS001", "name": "Melanoma", "synonyms": ["Malignant Melanoma"], } ], "total": 1, } result = await search_diseases(name="Melanoma", api_key="test-key") assert result["total"] == 1 assert len(result["diseases"]) == 1 assert result["diseases"][0]["name"] == "Melanoma" class TestNCITrialIntegration: """Test NCI trial search and getter.""" @pytest.mark.asyncio async def test_convert_query_to_nci(self): """Test converting TrialQuery to NCI parameters.""" query = TrialQuery( conditions=["melanoma"], phase="PHASE2", recruiting_status="OPEN", allow_brain_mets=True, ) # Mock the disease/intervention lookups with ( patch("biomcp.trials.nci_search.search_diseases") as mock_diseases, patch( "biomcp.trials.nci_search.search_interventions" ) as mock_interventions, ): mock_diseases.return_value = {"diseases": []} mock_interventions.return_value = {"interventions": []} params = await convert_query_to_nci(query) assert params["diseases"] == ["melanoma"] assert params["phase"] == "II" assert params["recruitment_status"] == [ "recruiting", "enrolling_by_invitation", ] assert params["accepts_brain_mets"] is True @pytest.mark.asyncio async def test_search_trials_nci(self): """Test NCI trial search.""" query = TrialQuery(conditions=["melanoma"]) with ( patch( "biomcp.trials.nci_search.convert_query_to_nci" ) as mock_convert, patch("biomcp.trials.nci_search.make_cts_request") as mock_request, ): mock_convert.return_value = {"diseases": ["melanoma"]} mock_request.return_value = { "data": [ { "nct_id": "NCT12345", "title": "Test Trial", "phase": "II", } ], "total": 1, } result = await search_trials_nci(query, api_key="test-key") assert result["total"] == 1 assert result["source"] == "nci" assert len(result["trials"]) == 1 assert result["trials"][0]["nct_id"] == "NCT12345" @pytest.mark.asyncio async def test_get_trial_nci(self): """Test getting specific trial from NCI.""" with patch( "biomcp.trials.nci_getter.make_cts_request" ) as mock_request: mock_request.return_value = { "data": { "nct_id": "NCT12345", "title": "Test Trial", "phase": "II", "overall_status": "Recruiting", } } result = await get_trial_nci("NCT12345", api_key="test-key") assert result["nct_id"] == "NCT12345" assert result["title"] == "Test Trial" assert result["phase"] == "II" ``` -------------------------------------------------------------------------------- /src/biomcp/cli/variants.py: -------------------------------------------------------------------------------- ```python """BioMCP Command Line Interface for genetic variants.""" import asyncio from typing import Annotated import typer from ..constants import DEFAULT_ASSEMBLY, SYSTEM_PAGE_SIZE from ..variants import getter, search variant_app = typer.Typer(help="Search and get variants from MyVariant.info.") @variant_app.command("get") def get_variant( variant_id: Annotated[ str, typer.Argument( help="rsID (rs456) or MyVariant ID (chr1:g.1234A>G)", ), ], output_json: Annotated[ bool, typer.Option( "--json", "-j", help="Render in JSON format", case_sensitive=False, ), ] = False, include_external: Annotated[ bool, typer.Option( "--include-external/--no-external", help="Include annotations from external sources (TCGA, 1000 Genomes, cBioPortal)", ), ] = True, assembly: Annotated[ str, typer.Option( "--assembly", help="Genome assembly (hg19 or hg38)", case_sensitive=False, ), ] = DEFAULT_ASSEMBLY, ): """ Get detailed information about a specific genetic variant. Supports HGVS identifiers (e.g., 'chr7:g.140453136A>T') or dbSNP rsIDs. Examples: Get by HGVS: biomcp variant get "chr7:g.140453136A>T" Get by rsID: biomcp variant get rs113488022 Get as JSON: biomcp variant get rs113488022 --json Get without external annotations: biomcp variant get rs113488022 --no-external Get with hg38 assembly: biomcp variant get rs113488022 --assembly hg38 """ if not variant_id: typer.echo("Error: A variant identifier must be provided.", err=True) raise typer.Exit(code=1) # Validate assembly value if assembly not in ["hg19", "hg38"]: typer.echo( f"Error: Invalid assembly '{assembly}'. Must be 'hg19' or 'hg38'.", err=True, ) raise typer.Exit(code=1) result = asyncio.run( getter.get_variant( variant_id, output_json=output_json, include_external=include_external, assembly=assembly, ) ) typer.echo(result) @variant_app.command("search") def search_variant_cmd( gene: Annotated[ str | None, typer.Option( "--gene", help="Gene symbol (e.g., BRCA1)", ), ] = None, hgvsp: Annotated[ str | None, typer.Option( "--hgvsp", help="Protein notation (e.g., p.Val600Glu).", ), ] = None, hgvsc: Annotated[ str | None, typer.Option( "--hgvsc", help="cDNA notation (e.g., c.1799T>A).", ), ] = None, rsid: Annotated[ str | None, typer.Option( "--rsid", help="dbSNP rsID (e.g., rs113488022)", ), ] = None, region: Annotated[ str | None, typer.Option( "--region", help="Genomic region (e.g., chr1:69000-70000)", ), ] = None, significance: Annotated[ search.ClinicalSignificance | None, typer.Option( "--significance", help="Clinical significance (e.g., pathogenic, likely benign)", case_sensitive=False, ), ] = None, min_frequency: Annotated[ float | None, typer.Option( "--min-frequency", help="Minimum gnomAD exome allele frequency (0.0 to 1.0)", min=0.0, max=1.0, ), ] = None, max_frequency: Annotated[ float | None, typer.Option( "--max-frequency", help="Maximum gnomAD exome allele frequency (0.0 to 1.0)", min=0.0, max=1.0, ), ] = None, cadd: Annotated[ float | None, typer.Option( "--cadd", help="Minimum CADD phred score", min=0.0, ), ] = None, polyphen: Annotated[ search.PolyPhenPrediction | None, typer.Option( "--polyphen", help="PolyPhen-2 prediction: Probably damaging = D," "Possibly damaging = P, Benign = B", case_sensitive=False, ), ] = None, sift: Annotated[ search.SiftPrediction | None, typer.Option( "--sift", help="SIFT prediction: D = Deleterious, T = Tolerated", case_sensitive=False, ), ] = None, size: Annotated[ int, typer.Option( "--size", help="Maximum number of results to return", min=1, max=100, ), ] = SYSTEM_PAGE_SIZE, sources: Annotated[ str | None, typer.Option( "--sources", help="Specific sources to include in results (comma-separated)", ), ] = None, output_json: Annotated[ bool, typer.Option( "--json", "-j", help="Render in JSON format", case_sensitive=False, ), ] = False, ): query = search.VariantQuery( gene=gene, hgvsp=hgvsp, hgvsc=hgvsc, rsid=rsid, region=region, significance=significance, min_frequency=min_frequency, max_frequency=max_frequency, cadd=cadd, polyphen=polyphen, sift=sift, size=size, sources=sources.split(",") if sources else [], ) result = asyncio.run(search.search_variants(query, output_json)) typer.echo(result) @variant_app.command("predict") def predict_variant_effects( chromosome: Annotated[ str, typer.Argument(help="Chromosome (e.g., chr7, chrX)"), ], position: Annotated[ int, typer.Argument(help="1-based genomic position"), ], reference: Annotated[ str, typer.Argument(help="Reference allele(s) (e.g., A, ATG)"), ], alternate: Annotated[ str, typer.Argument(help="Alternate allele(s) (e.g., T, A)"), ], interval_size: Annotated[ int, typer.Option( "--interval", "-i", help="Analysis interval size in bp (max 1000000)", min=2000, max=1000000, ), ] = 131072, tissue: Annotated[ list[str] | None, typer.Option( "--tissue", "-t", help="UBERON ontology terms for tissue-specific predictions", ), ] = None, threshold: Annotated[ float, typer.Option( "--threshold", help="Significance threshold for log2 fold changes", min=0.0, max=5.0, ), ] = 0.5, api_key: Annotated[ str | None, typer.Option( "--api-key", help="AlphaGenome API key (overrides ALPHAGENOME_API_KEY env var)", envvar="ALPHAGENOME_API_KEY", ), ] = None, ): """ Predict variant effects using Google DeepMind's AlphaGenome:\n - Gene expression changes\n - Chromatin accessibility\n - Splicing alterations\n - Promoter activity\n \n Requires AlphaGenome API key via --api-key or ALPHAGENOME_API_KEY env var.\n \n Examples: \n\t# Predict BRAF V600E mutation \n\tbiomcp variant predict chr7 140753336 A T \n \n\t# With API key specified \n\tbiomcp variant predict chr7 140753336 A T --api-key YOUR_KEY \n \n\t# With tissue-specific predictions \n\tbiomcp variant predict chr7 140753336 A T --tissue UBERON:0002367 \n \n\t# With larger analysis interval \n\tbiomcp variant predict chr7 140753336 A T --interval 500000 """ from ..variants.alphagenome import predict_variant_effects result = asyncio.run( predict_variant_effects( chromosome=chromosome, position=position, reference=reference, alternate=alternate, interval_size=interval_size, tissue_types=tissue, significance_threshold=threshold, api_key=api_key, ) ) typer.echo(result) ``` -------------------------------------------------------------------------------- /tests/integration/test_variants_integration.py: -------------------------------------------------------------------------------- ```python """Integration tests for external variant data sources.""" import asyncio import pytest from biomcp.variants.external import ( ExternalVariantAggregator, TCGAClient, ThousandGenomesClient, ) from biomcp.variants.getter import get_variant class TestTCGAIntegration: """Integration tests for TCGA/GDC API.""" @pytest.mark.asyncio async def test_tcga_real_variant(self): """Test real TCGA API with known variant.""" client = TCGAClient() # Try with BRAF V600E - a well-known cancer mutation # TCGA can search by gene AA change format result = await client.get_variant_data("BRAF V600E") print(f"TCGA result: {result}") if result: print(f"COSMIC ID: {result.cosmic_id}") print(f"Tumor types: {result.tumor_types}") print(f"Affected cases: {result.affected_cases}") print(f"Consequence: {result.consequence_type}") else: print("No TCGA data found for this variant") class TestThousandGenomesIntegration: """Integration tests for 1000 Genomes via Ensembl.""" @pytest.mark.asyncio async def test_1000g_real_variant(self): """Test real 1000 Genomes API with known variant.""" client = ThousandGenomesClient() # Try with a known rsID result = await client.get_variant_data("rs7412") # APOE variant print(f"1000 Genomes result: {result}") if result: print(f"Global MAF: {result.global_maf}") print(f"EUR MAF: {result.eur_maf}") print(f"AFR MAF: {result.afr_maf}") print(f"Consequence: {result.most_severe_consequence}") print(f"Ancestral allele: {result.ancestral_allele}") # This variant should have frequency data assert result.global_maf is not None else: print("No 1000 Genomes data found") class TestExternalAggregatorIntegration: """Integration tests for the aggregator.""" @pytest.mark.asyncio async def test_aggregator_basic(self): """Test aggregator with basic functionality.""" aggregator = ExternalVariantAggregator() # Test with a known variant result = await aggregator.get_enhanced_annotations( "rs7412", # APOE variant include_tcga=True, include_1000g=True, ) print(f"Variant ID: {result.variant_id}") print(f"TCGA data: {'Present' if result.tcga else 'Not found'}") print( f"1000G data: {'Present' if result.thousand_genomes else 'Not found'}" ) print(f"Errors: {result.error_sources}") # Should still work assert result.variant_id == "rs7412" @pytest.mark.asyncio async def test_aggregator_partial_failures(self): """Test aggregator handles partial failures gracefully.""" aggregator = ExternalVariantAggregator() # Use a variant that might not be in all databases result = await aggregator.get_enhanced_annotations( "chr1:g.12345678A>G", # Arbitrary variant include_tcga=True, include_1000g=True, ) print("Results for arbitrary variant:") print(f"- TCGA: {'Found' if result.tcga else 'Not found'}") print( f"- 1000G: {'Found' if result.thousand_genomes else 'Not found'}" ) print(f"- Errors: {result.error_sources}") # Should complete without crashing assert result.variant_id == "chr1:g.12345678A>G" class TestAssemblyParameter: """Integration tests for assembly parameter.""" @pytest.mark.integration @pytest.mark.asyncio async def test_get_variant_hg19_assembly(self): """Test get_variant with hg19 assembly on real API.""" # Use a well-known variant: BRAF V600E variant_id = "rs113488022" result = await get_variant( variant_id, output_json=True, include_external=False, assembly="hg19", ) # Should return valid JSON assert result is not None assert len(result) > 0 # Parse and check for hg19 data import json data = json.loads(result) if data and len(data) > 0: variant_data = data[0] # BRAF V600E should have hg19 coordinates if "hg19" in variant_data: print(f"hg19 coordinates: {variant_data['hg19']}") assert "start" in variant_data["hg19"] assert "end" in variant_data["hg19"] else: pytest.skip("hg19 data not available in API response") else: pytest.skip("No data returned from API") @pytest.mark.integration @pytest.mark.asyncio async def test_get_variant_hg38_assembly(self): """Test get_variant with hg38 assembly on real API.""" # Use the same variant but request hg38 variant_id = "rs113488022" result = await get_variant( variant_id, output_json=True, include_external=False, assembly="hg38", ) # Should return valid JSON assert result is not None assert len(result) > 0 # Parse and check for hg38 data import json data = json.loads(result) if data and len(data) > 0: variant_data = data[0] # Should have hg38 coordinates if "hg38" in variant_data: print(f"hg38 coordinates: {variant_data['hg38']}") assert "start" in variant_data["hg38"] assert "end" in variant_data["hg38"] else: pytest.skip("hg38 data not available in API response") else: pytest.skip("No data returned from API") @pytest.mark.integration @pytest.mark.asyncio async def test_assembly_coordinate_differences(self): """Test that hg19 and hg38 return different coordinates for same variant.""" variant_id = "rs113488022" # BRAF V600E # Get both assemblies result_hg19 = await get_variant( variant_id, output_json=True, include_external=False, assembly="hg19", ) result_hg38 = await get_variant( variant_id, output_json=True, include_external=False, assembly="hg38", ) import json data_hg19 = json.loads(result_hg19) data_hg38 = json.loads(result_hg38) # Both should return data if not data_hg19 or not data_hg38: pytest.skip("API did not return data for both assemblies") # Compare coordinates if available if len(data_hg19) > 0 and len(data_hg38) > 0: v19 = data_hg19[0] v38 = data_hg38[0] # BRAF V600E has different coordinates in hg19 vs hg38 # hg19: chr7:140453136 # hg38: chr7:140753336 if "hg19" in v19 and "hg38" in v38: print(f"hg19 start: {v19['hg19']['start']}") print(f"hg38 start: {v38['hg38']['start']}") # Coordinates should be different (BRAF moved between assemblies) assert v19["hg19"]["start"] != v38["hg38"]["start"] else: pytest.skip("Assembly-specific coordinates not in response") if __name__ == "__main__": print("Testing TCGA/GDC...") asyncio.run(TestTCGAIntegration().test_tcga_real_variant()) print("\n" + "=" * 50 + "\n") print("Testing 1000 Genomes...") asyncio.run(TestThousandGenomesIntegration().test_1000g_real_variant()) print("\n" + "=" * 50 + "\n") print("Testing aggregator...") asyncio.run(TestExternalAggregatorIntegration().test_aggregator_basic()) print("\n" + "=" * 50 + "\n") print("Testing aggregator with partial failures...") asyncio.run( TestExternalAggregatorIntegration().test_aggregator_partial_failures() ) print("\n" + "=" * 50 + "\n") print("Testing assembly parameter...") asyncio.run(TestAssemblyParameter().test_get_variant_hg19_assembly()) asyncio.run(TestAssemblyParameter().test_get_variant_hg38_assembly()) asyncio.run(TestAssemblyParameter().test_assembly_coordinate_differences()) ``` -------------------------------------------------------------------------------- /tests/tdd/trials/test_backward_compatibility.py: -------------------------------------------------------------------------------- ```python """Test backward compatibility for trial search and getter functions.""" from unittest.mock import patch import pytest from biomcp.trials.getter import Module, get_trial, get_trial_unified from biomcp.trials.search import ( TrialQuery, search_trials, search_trials_unified, ) class TestTrialSearchBackwardCompatibility: """Test that existing trial search functionality remains unchanged.""" @pytest.mark.asyncio async def test_search_trials_defaults_to_clinicaltrials(self): """Test that search_trials still defaults to ClinicalTrials.gov.""" query = TrialQuery(conditions=["diabetes"]) with patch("biomcp.http_client.request_api") as mock_request: mock_request.return_value = ( { "studies": [ { "protocolSection": { "identificationModule": {"nctId": "NCT12345"} } } ] }, None, ) await search_trials(query, output_json=True) # Verify it called the ClinicalTrials.gov API assert mock_request.called call_args = mock_request.call_args # Check the URL argument url_arg = call_args.kwargs.get("url") assert url_arg is not None assert "clinicaltrials.gov" in url_arg @pytest.mark.asyncio async def test_search_trials_no_source_parameter(self): """Test that search_trials function signature hasn't changed.""" # This test ensures the function can still be called without source query = TrialQuery(conditions=["cancer"]) with patch("biomcp.http_client.request_api") as mock_request: mock_request.return_value = ({"studies": []}, None) # Should not raise TypeError about unexpected keyword argument await search_trials(query) assert mock_request.called @pytest.mark.asyncio async def test_search_trials_unified_with_source(self): """Test unified function supports source parameter.""" query = TrialQuery(conditions=["melanoma"]) # Test with ClinicalTrials.gov with patch("biomcp.trials.search.search_trials") as mock_ct: mock_ct.return_value = "CT results" result = await search_trials_unified( query, source="clinicaltrials" ) assert result == "CT results" mock_ct.assert_called_once_with(query, False) # Test with NCI with ( patch("biomcp.trials.nci_search.search_trials_nci") as mock_nci, patch( "biomcp.trials.nci_search.format_nci_trial_results" ) as mock_format, ): mock_nci.return_value = {"source": "nci", "trials": []} mock_format.return_value = "NCI formatted results" result = await search_trials_unified( query, source="nci", api_key="test-key" ) assert result == "NCI formatted results" mock_nci.assert_called_once_with(query, "test-key") class TestTrialGetterBackwardCompatibility: """Test that existing trial getter functionality remains unchanged.""" @pytest.mark.asyncio async def test_get_trial_defaults_to_clinicaltrials(self): """Test that get_trial still defaults to ClinicalTrials.gov.""" with patch("biomcp.http_client.request_api") as mock_request: mock_request.return_value = ( { "protocolSection": { "identificationModule": {"nctId": "NCT12345"} } }, None, ) await get_trial("NCT12345", Module.PROTOCOL) # Verify it called the ClinicalTrials.gov API assert mock_request.called call_args = mock_request.call_args # Check the URL argument url_arg = call_args.kwargs.get("url") assert url_arg is not None assert "clinicaltrials.gov" in url_arg # NCT ID would be in the request params, not the URL @pytest.mark.asyncio async def test_get_trial_no_source_parameter(self): """Test that get_trial function signature hasn't changed.""" with patch("biomcp.http_client.request_api") as mock_request: mock_request.return_value = ( { "protocolSection": { "identificationModule": {"nctId": "NCT99999"} } }, None, ) # Should not raise TypeError about unexpected keyword argument await get_trial("NCT99999", Module.PROTOCOL, output_json=True) assert mock_request.called @pytest.mark.asyncio async def test_get_trial_unified_with_source(self): """Test unified function supports source parameter.""" # Test with ClinicalTrials.gov - uses private functions with patch("biomcp.trials.getter._trial_protocol") as mock_protocol: mock_protocol.return_value = "CT trial details" result = await get_trial_unified( "NCT12345", source="clinicaltrials", sections=["protocol"] ) assert result == "CT trial details" mock_protocol.assert_called_once_with( nct_id="NCT12345", call_benefit="Getting protocol information for trial NCT12345", ) # Test with NCI with ( patch("biomcp.trials.nci_getter.get_trial_nci") as mock_nci, patch( "biomcp.trials.nci_getter.format_nci_trial_details" ) as mock_format, ): mock_nci.return_value = {"nct_id": "NCT12345", "source": "nci"} mock_format.return_value = "NCI formatted trial" result = await get_trial_unified( "NCT12345", source="nci", api_key="test-key" ) assert result == "NCI formatted trial" mock_nci.assert_called_once_with("NCT12345", "test-key") @pytest.mark.asyncio async def test_get_trial_all_modules_still_work(self): """Test that all existing Module options still work.""" modules_to_test = [ Module.PROTOCOL, Module.LOCATIONS, Module.REFERENCES, Module.OUTCOMES, ] for module in modules_to_test: with patch("biomcp.http_client.request_api") as mock_request: mock_request.return_value = ( { "protocolSection": { "identificationModule": {"nctId": "NCT12345"} } }, None, ) await get_trial("NCT12345", module) assert mock_request.called # Reset for next iteration mock_request.reset_mock() class TestCLIBackwardCompatibility: """Test that CLI commands maintain backward compatibility.""" def test_cli_imports_exist(self): """Test that CLI still imports the expected functions.""" # These imports should not raise ImportError from biomcp.cli.trials import get_trial_cli, search_trials_cli assert search_trials_cli is not None assert get_trial_cli is not None def test_search_defaults_without_source(self): """Test CLI search works without source parameter.""" from typer.testing import CliRunner from biomcp.cli.main import app runner = CliRunner() with patch("biomcp.cli.trials.asyncio.run") as mock_run: mock_run.return_value = None # Run CLI command without --source result = runner.invoke( app, ["trial", "search", "--condition", "diabetes"] ) # Should succeed assert result.exit_code == 0 # Verify asyncio.run was called with the right function mock_run.assert_called() args = mock_run.call_args[0][0] # Check that it's the unified search function being called assert hasattr(args, "__name__") or hasattr(args, "func") ``` -------------------------------------------------------------------------------- /docs/reference/architecture-diagrams.md: -------------------------------------------------------------------------------- ```markdown # BioMCP Architecture Diagrams This page describes BioMCP's architecture, data flows, and workflows. ## System Architecture Overview BioMCP consists of three main layers: ### Client Layer - **CLI Interface**: Command-line tool for direct interaction - **Claude Desktop**: AI assistant integration via MCP protocol - **Python SDK**: Programmatic access for custom applications - **Custom MCP Clients**: Any MCP-compatible client ### BioMCP Core - **MCP Server**: Handles protocol communication - **Request Router**: Directs queries to appropriate handlers - **Cache Layer**: Intelligent caching for API responses - **Domain Handlers**: Specialized processors for each data type - Articles Handler (PubMed/PubTator3) - Trials Handler (ClinicalTrials.gov, NCI) - Variants Handler (MyVariant.info) - Genes Handler (MyGene.info) ### External APIs - **PubMed/PubTator3**: Biomedical literature - **ClinicalTrials.gov**: US clinical trials registry - **NCI CTS API**: National Cancer Institute trials - **MyVariant.info**: Genetic variant annotations - **MyGene.info**: Gene information - **cBioPortal**: Cancer genomics data - **AlphaGenome**: Variant effect predictions ## Data Flow Architecture 1. **User Request**: Query submitted via CLI, Claude, or SDK 2. **Cache Check**: System checks for cached results 3. **API Request**: If cache miss, fetch from external API 4. **Result Processing**: Normalize and enrich data 5. **Cache Storage**: Store results for future use 6. **Response Delivery**: Return formatted results to user ## Key Workflows ### Search Workflow 1. **Think Tool**: Plan search strategy 2. **Execute Search**: Query relevant data sources 3. **Enrich Results**: Add contextual information 4. **Combine Data**: Merge results from multiple sources 5. **Format Output**: Present in user-friendly format ### Article Search Pipeline 1. **Query Processing**: Parse user input 2. **Entity Recognition**: Normalize gene/disease names 3. **PubTator3 Search**: Query literature database 4. **Preprint Integration**: Include bioRxiv/medRxiv if enabled 5. **cBioPortal Enrichment**: Add cancer genomics data for genes 6. **Result Merging**: Combine all data sources ### Clinical Trial Matching 1. **Patient Profile**: Parse eligibility criteria 2. **Location Filter**: Geographic constraints 3. **Molecular Profile**: Mutation requirements 4. **Prior Treatments**: Treatment history matching 5. **Scoring Algorithm**: Rank trials by relevance 6. **Contact Extraction**: Retrieve site information ### Variant Interpretation 1. **Input Parsing**: Process VCF/MAF files 2. **Batch Processing**: Group variants efficiently 3. **Annotation Gathering**: - Clinical significance from MyVariant.info - Population frequency data - In silico predictions - Literature evidence - Clinical trial associations 4. **AlphaGenome Integration**: Regulatory predictions (optional) 5. **Tier Classification**: Categorize by clinical relevance 6. **Report Generation**: Create interpretation summary ## Architecture Patterns ### Caching Strategy - **Multi-tier Cache**: Memory → Disk → External - **Smart TTL**: Domain-specific expiration times - **Cache Key Generation**: Include all query parameters - **Invalidation Logic**: Clear on errors or updates ### Error Handling - **Retry Logic**: Exponential backoff for transient errors - **Rate Limiting**: Respect API limits with queuing - **Graceful Degradation**: Return partial results when possible - **Clear Error Messages**: Help users troubleshoot issues ### Authentication Flow 1. Check for user-provided API key 2. Fall back to environment variable 3. Use public access if no key available 4. Handle authentication errors gracefully ### Performance Optimization - **Request Batching**: Combine multiple queries - **Parallel Execution**: Concurrent API calls - **Connection Pooling**: Reuse HTTP connections - **Result Streaming**: Return data as available ## Deployment Options ### Local Development - Single process with in-memory cache - Direct file system access - Simple configuration ### Docker Deployment - Containerized application - Volume-mounted cache - Environment-based configuration ### Cloud Deployment - Load-balanced instances - Shared Redis cache - Auto-scaling capabilities - Monitoring integration ## Creating Documentation Diagrams For visual diagrams, we recommend: 1. **ASCII Art**: Universal compatibility - Use tools like asciiflow.com - Store in `docs/assets/` directory 2. **Screenshots**: For complex UIs - Annotate with arrows/labels - Save as PNG in `docs/assets/` 3. **External Tools**: - draw.io for flowcharts - Lucidchart for professional diagrams - Export as static images ## ASCII System Architecture ``` ┌─────────────────────────────────────────────────────────────────────────┐ │ USER INTERFACES │ ├────────────────┬───────────────────┬───────────────┬───────────────────┤ │ │ │ │ │ │ CLI Tool │ Claude Desktop │ Python SDK │ Custom Client │ │ (biomcp) │ (MCP Client) │ (async) │ (your app) │ │ │ │ │ │ └────────┬───────┴─────────┬─────────┴───────┬───────┴───────────┬───────┘ │ │ │ │ └─────────────────┴─────────────────┴───────────────────┘ │ ▼ ┌─────────────────────────────────────────────────────────────────────────┐ │ BioMCP CORE SERVER │ ├─────────────────────────────────────────────────────────────────────────┤ │ │ │ ┌─────────────┐ ┌──────────────┐ ┌──────────────┐ ┌────────────┐ │ │ │ Router │ │ Rate Limiter │ │ Cache Manager│ │ Logger │ │ │ │ │ │ │ │ │ │ │ │ │ └──────┬──────┘ └──────────────┘ └──────────────┘ └────────────┘ │ │ │ │ │ ▼ │ │ ┌─────────────────────────────────────────────────────────────────┐ │ │ │ Domain Handlers │ │ │ ├─────────────┬─────────────┬─────────────┬──────────────────────┤ │ │ │ Articles │ Trials │ Variants │ Genes/Drugs/Disease │ │ │ │ Handler │ Handler │ Handler │ Handler │ │ │ └──────┬──────┴──────┬──────┴──────┬──────┴──────────┬───────────┘ │ │ │ │ │ │ │ └─────────┼─────────────┼─────────────┼─────────────────┼─────────────────┘ │ │ │ │ ▼ ▼ ▼ ▼ ┌─────────────────────────────────────────────────────────────────────────┐ │ EXTERNAL DATA SOURCES │ ├─────────────┬─────────────┬─────────────┬──────────────────────────────┤ │ │ │ │ │ │ PubMed/ │ Clinical │ MyVariant │ BioThings Suite │ │ PubTator3 │ Trials.gov │ .info │ (MyGene/MyDisease/MyChem) │ │ │ + NCI │ │ │ │ │ │ │ │ ├─────────────┴─────────────┴─────────────┴──────────────────────────────┤ │ │ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ │ │ cBioPortal │ │ AlphaGenome │ │ Europe PMC │ │ │ │ (Cancer) │ │ (Predictions)│ │ (Preprints) │ │ │ └──────────────┘ └──────────────┘ └──────────────┘ │ │ │ └─────────────────────────────────────────────────────────────────────────┘ ``` See also: [Quick Architecture Reference](quick-architecture.md) ## Next Steps - View the [Quick Architecture Guide](quick-architecture.md) for a concise overview - Check [Developer Guides](../developer-guides/01-server-deployment.md) for implementation details - See [API Reference](../apis/overview.md) for detailed specifications ``` -------------------------------------------------------------------------------- /tests/tdd/test_circuit_breaker.py: -------------------------------------------------------------------------------- ```python """Tests for circuit breaker pattern.""" import asyncio import pytest from biomcp.circuit_breaker import ( CircuitBreaker, CircuitBreakerConfig, CircuitBreakerError, CircuitState, circuit_breaker, get_circuit_breaker, ) class CircuitBreakerTestException(Exception): """Test exception for circuit breaker tests.""" pass class IgnoredException(Exception): """Exception that should be ignored by circuit breaker.""" pass @pytest.mark.asyncio async def test_circuit_breaker_closed_state(): """Test circuit breaker in closed state allows calls.""" breaker = CircuitBreaker("test_closed") call_count = 0 async def test_func(): nonlocal call_count call_count += 1 return "success" # Should allow calls in closed state assert breaker.is_closed result = await breaker.call(test_func) assert result == "success" assert call_count == 1 @pytest.mark.asyncio async def test_circuit_breaker_opens_on_threshold(): """Test circuit breaker opens after failure threshold.""" config = CircuitBreakerConfig( failure_threshold=3, expected_exception=CircuitBreakerTestException, ) breaker = CircuitBreaker("test_threshold", config) async def failing_func(): raise CircuitBreakerTestException("Test failure") # First 2 failures should pass through for _i in range(2): with pytest.raises(CircuitBreakerTestException): await breaker.call(failing_func) assert breaker.is_closed # Third failure should open the circuit with pytest.raises(CircuitBreakerTestException): await breaker.call(failing_func) assert breaker.is_open # Subsequent calls should fail fast with pytest.raises(CircuitBreakerError): await breaker.call(failing_func) @pytest.mark.asyncio async def test_circuit_breaker_half_open_recovery(): """Test circuit breaker recovery through half-open state.""" config = CircuitBreakerConfig( failure_threshold=2, recovery_timeout=0.1, # 100ms for testing success_threshold=2, ) breaker = CircuitBreaker("test_recovery", config) call_count = 0 should_fail = True async def test_func(): nonlocal call_count call_count += 1 if should_fail: raise CircuitBreakerTestException("Failure") return "success" # Open the circuit for _ in range(2): with pytest.raises(CircuitBreakerTestException): await breaker.call(test_func) assert breaker.is_open # Wait for recovery timeout await asyncio.sleep(0.15) # Next call should attempt (half-open state) should_fail = False result = await breaker.call(test_func) assert result == "success" assert breaker.state == CircuitState.HALF_OPEN # Need one more success to close result = await breaker.call(test_func) assert result == "success" assert breaker.is_closed @pytest.mark.asyncio async def test_circuit_breaker_half_open_failure(): """Test circuit breaker reopens on failure in half-open state.""" config = CircuitBreakerConfig( failure_threshold=2, recovery_timeout=0.1, ) breaker = CircuitBreaker("test_half_open_fail", config) async def failing_func(): raise CircuitBreakerTestException("Failure") # Open the circuit for _ in range(2): with pytest.raises(CircuitBreakerTestException): await breaker.call(failing_func) assert breaker.is_open # Wait for recovery timeout await asyncio.sleep(0.15) # Failure in half-open should reopen immediately with pytest.raises(CircuitBreakerTestException): await breaker.call(failing_func) assert breaker.is_open @pytest.mark.asyncio async def test_circuit_breaker_ignored_exceptions(): """Test that certain exceptions don't trigger circuit breaker.""" config = CircuitBreakerConfig( failure_threshold=2, expected_exception=Exception, exclude_exceptions=(IgnoredException,), ) breaker = CircuitBreaker("test_ignored", config) async def func_with_ignored_exception(): raise IgnoredException("Should be ignored") # These exceptions shouldn't count for _ in range(5): with pytest.raises(IgnoredException): await breaker.call(func_with_ignored_exception) assert breaker.is_closed @pytest.mark.asyncio async def test_circuit_breaker_reset(): """Test manual reset of circuit breaker.""" config = CircuitBreakerConfig(failure_threshold=1) breaker = CircuitBreaker("test_reset", config) async def failing_func(): raise CircuitBreakerTestException("Failure") # Open the circuit with pytest.raises(CircuitBreakerTestException): await breaker.call(failing_func) assert breaker.is_open # Manual reset await breaker.reset() assert breaker.is_closed # Should allow calls again async def success_func(): return "success" result = await breaker.call(success_func) assert result == "success" @pytest.mark.asyncio async def test_circuit_breaker_decorator(): """Test circuit breaker decorator.""" call_count = 0 @circuit_breaker( "test_decorator", CircuitBreakerConfig(failure_threshold=2) ) async def decorated_func(should_fail=False): nonlocal call_count call_count += 1 if should_fail: raise CircuitBreakerTestException("Failure") return "success" # Success calls result = await decorated_func() assert result == "success" # Open circuit with failures for _ in range(2): with pytest.raises(CircuitBreakerTestException): await decorated_func(should_fail=True) # Circuit should be open with pytest.raises(CircuitBreakerError): await decorated_func() def test_get_circuit_breaker(): """Test getting circuit breaker from registry.""" # First call creates breaker breaker1 = get_circuit_breaker("test_registry") assert breaker1.name == "test_registry" # Second call returns same instance breaker2 = get_circuit_breaker("test_registry") assert breaker1 is breaker2 # Different name creates different breaker breaker3 = get_circuit_breaker("test_registry_2") assert breaker3 is not breaker1 @pytest.mark.asyncio async def test_circuit_breaker_concurrent_calls(): """Test circuit breaker handles concurrent calls correctly.""" config = CircuitBreakerConfig( failure_threshold=5, expected_exception=CircuitBreakerTestException, ) breaker = CircuitBreaker("test_concurrent", config) failure_count = 0 async def failing_func(): nonlocal failure_count failure_count += 1 if failure_count <= 5: raise CircuitBreakerTestException("Failure") return "success" # Run concurrent failing calls tasks = [] for _ in range(10): tasks.append(breaker.call(failing_func)) results = await asyncio.gather(*tasks, return_exceptions=True) # Should have some CircuitBreakerTestExceptions and some CircuitBreakerErrors test_exceptions = sum( 1 for r in results if isinstance(r, CircuitBreakerTestException) ) breaker_errors = sum( 1 for r in results if isinstance(r, CircuitBreakerError) ) # At least failure_threshold CircuitBreakerTestExceptions assert test_exceptions >= config.failure_threshold # Some calls should have been blocked assert breaker_errors > 0 # Circuit should be open assert breaker.is_open @pytest.mark.asyncio async def test_circuit_breaker_success_resets_failures(): """Test that successes reset failure count in closed state.""" config = CircuitBreakerConfig(failure_threshold=3) breaker = CircuitBreaker("test_success_reset", config) async def sometimes_failing_func(fail=False): if fail: raise CircuitBreakerTestException("Failure") return "success" # Two failures for _ in range(2): with pytest.raises(CircuitBreakerTestException): await breaker.call(sometimes_failing_func, fail=True) # Success should reset failure count result = await breaker.call(sometimes_failing_func, fail=False) assert result == "success" assert breaker.is_closed # Can now fail 2 more times without opening for _ in range(2): with pytest.raises(CircuitBreakerTestException): await breaker.call(sometimes_failing_func, fail=True) assert breaker.is_closed ```