#
tokens: 49201/50000 25/303 files (page 4/15)
lines: off (toggle) GitHub
raw markdown copy
This is page 4 of 15. Use http://codebase.md/genomoncology/biomcp?page={x} to view the full context.

# Directory Structure

```
├── .github
│   ├── actions
│   │   └── setup-python-env
│   │       └── action.yml
│   ├── dependabot.yml
│   └── workflows
│       ├── ci.yml
│       ├── deploy-docs.yml
│       ├── main.yml.disabled
│       ├── on-release-main.yml
│       └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── apis
│   │   ├── error-codes.md
│   │   ├── overview.md
│   │   └── python-sdk.md
│   ├── assets
│   │   ├── biomcp-cursor-locations.png
│   │   ├── favicon.ico
│   │   ├── icon.png
│   │   ├── logo.png
│   │   ├── mcp_architecture.txt
│   │   └── remote-connection
│   │       ├── 00_connectors.png
│   │       ├── 01_add_custom_connector.png
│   │       ├── 02_connector_enabled.png
│   │       ├── 03_connect_to_biomcp.png
│   │       ├── 04_select_google_oauth.png
│   │       └── 05_success_connect.png
│   ├── backend-services-reference
│   │   ├── 01-overview.md
│   │   ├── 02-biothings-suite.md
│   │   ├── 03-cbioportal.md
│   │   ├── 04-clinicaltrials-gov.md
│   │   ├── 05-nci-cts-api.md
│   │   ├── 06-pubtator3.md
│   │   └── 07-alphagenome.md
│   ├── blog
│   │   ├── ai-assisted-clinical-trial-search-analysis.md
│   │   ├── images
│   │   │   ├── deep-researcher-video.png
│   │   │   ├── researcher-announce.png
│   │   │   ├── researcher-drop-down.png
│   │   │   ├── researcher-prompt.png
│   │   │   ├── trial-search-assistant.png
│   │   │   └── what_is_biomcp_thumbnail.png
│   │   └── researcher-persona-resource.md
│   ├── changelog.md
│   ├── CNAME
│   ├── concepts
│   │   ├── 01-what-is-biomcp.md
│   │   ├── 02-the-deep-researcher-persona.md
│   │   └── 03-sequential-thinking-with-the-think-tool.md
│   ├── developer-guides
│   │   ├── 01-server-deployment.md
│   │   ├── 02-contributing-and-testing.md
│   │   ├── 03-third-party-endpoints.md
│   │   ├── 04-transport-protocol.md
│   │   ├── 05-error-handling.md
│   │   ├── 06-http-client-and-caching.md
│   │   ├── 07-performance-optimizations.md
│   │   └── generate_endpoints.py
│   ├── faq-condensed.md
│   ├── FDA_SECURITY.md
│   ├── genomoncology.md
│   ├── getting-started
│   │   ├── 01-quickstart-cli.md
│   │   ├── 02-claude-desktop-integration.md
│   │   └── 03-authentication-and-api-keys.md
│   ├── how-to-guides
│   │   ├── 01-find-articles-and-cbioportal-data.md
│   │   ├── 02-find-trials-with-nci-and-biothings.md
│   │   ├── 03-get-comprehensive-variant-annotations.md
│   │   ├── 04-predict-variant-effects-with-alphagenome.md
│   │   ├── 05-logging-and-monitoring-with-bigquery.md
│   │   └── 06-search-nci-organizations-and-interventions.md
│   ├── index.md
│   ├── policies.md
│   ├── reference
│   │   ├── architecture-diagrams.md
│   │   ├── quick-architecture.md
│   │   ├── quick-reference.md
│   │   └── visual-architecture.md
│   ├── robots.txt
│   ├── stylesheets
│   │   ├── announcement.css
│   │   └── extra.css
│   ├── troubleshooting.md
│   ├── tutorials
│   │   ├── biothings-prompts.md
│   │   ├── claude-code-biomcp-alphagenome.md
│   │   ├── nci-prompts.md
│   │   ├── openfda-integration.md
│   │   ├── openfda-prompts.md
│   │   ├── pydantic-ai-integration.md
│   │   └── remote-connection.md
│   ├── user-guides
│   │   ├── 01-command-line-interface.md
│   │   ├── 02-mcp-tools-reference.md
│   │   └── 03-integrating-with-ides-and-clients.md
│   └── workflows
│       └── all-workflows.md
├── example_scripts
│   ├── mcp_integration.py
│   └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│   ├── check_docs_in_mkdocs.py
│   ├── check_http_imports.py
│   └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│   └── biomcp
│       ├── __init__.py
│       ├── __main__.py
│       ├── articles
│       │   ├── __init__.py
│       │   ├── autocomplete.py
│       │   ├── fetch.py
│       │   ├── preprints.py
│       │   ├── search_optimized.py
│       │   ├── search.py
│       │   └── unified.py
│       ├── biomarkers
│       │   ├── __init__.py
│       │   └── search.py
│       ├── cbioportal_helper.py
│       ├── circuit_breaker.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── articles.py
│       │   ├── biomarkers.py
│       │   ├── diseases.py
│       │   ├── health.py
│       │   ├── interventions.py
│       │   ├── main.py
│       │   ├── openfda.py
│       │   ├── organizations.py
│       │   ├── server.py
│       │   ├── trials.py
│       │   └── variants.py
│       ├── connection_pool.py
│       ├── constants.py
│       ├── core.py
│       ├── diseases
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── domain_handlers.py
│       ├── drugs
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── exceptions.py
│       ├── genes
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── http_client_simple.py
│       ├── http_client.py
│       ├── individual_tools.py
│       ├── integrations
│       │   ├── __init__.py
│       │   ├── biothings_client.py
│       │   └── cts_api.py
│       ├── interventions
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── logging_filter.py
│       ├── metrics_handler.py
│       ├── metrics.py
│       ├── openfda
│       │   ├── __init__.py
│       │   ├── adverse_events_helpers.py
│       │   ├── adverse_events.py
│       │   ├── cache.py
│       │   ├── constants.py
│       │   ├── device_events_helpers.py
│       │   ├── device_events.py
│       │   ├── drug_approvals.py
│       │   ├── drug_labels_helpers.py
│       │   ├── drug_labels.py
│       │   ├── drug_recalls_helpers.py
│       │   ├── drug_recalls.py
│       │   ├── drug_shortages_detail_helpers.py
│       │   ├── drug_shortages_helpers.py
│       │   ├── drug_shortages.py
│       │   ├── exceptions.py
│       │   ├── input_validation.py
│       │   ├── rate_limiter.py
│       │   ├── utils.py
│       │   └── validation.py
│       ├── organizations
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── parameter_parser.py
│       ├── prefetch.py
│       ├── query_parser.py
│       ├── query_router.py
│       ├── rate_limiter.py
│       ├── render.py
│       ├── request_batcher.py
│       ├── resources
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── instructions.md
│       │   └── researcher.md
│       ├── retry.py
│       ├── router_handlers.py
│       ├── router.py
│       ├── shared_context.py
│       ├── thinking
│       │   ├── __init__.py
│       │   ├── sequential.py
│       │   └── session.py
│       ├── thinking_tool.py
│       ├── thinking_tracker.py
│       ├── trials
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── nci_getter.py
│       │   ├── nci_search.py
│       │   └── search.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cancer_types_api.py
│       │   ├── cbio_http_adapter.py
│       │   ├── endpoint_registry.py
│       │   ├── gene_validator.py
│       │   ├── metrics.py
│       │   ├── mutation_filter.py
│       │   ├── query_utils.py
│       │   ├── rate_limiter.py
│       │   └── request_cache.py
│       ├── variants
│       │   ├── __init__.py
│       │   ├── alphagenome.py
│       │   ├── cancer_types.py
│       │   ├── cbio_external_client.py
│       │   ├── cbioportal_mutations.py
│       │   ├── cbioportal_search_helpers.py
│       │   ├── cbioportal_search.py
│       │   ├── constants.py
│       │   ├── external.py
│       │   ├── filters.py
│       │   ├── getter.py
│       │   ├── links.py
│       │   └── search.py
│       └── workers
│           ├── __init__.py
│           ├── worker_entry_stytch.js
│           ├── worker_entry.js
│           └── worker.py
├── tests
│   ├── bdd
│   │   ├── cli_help
│   │   │   ├── help.feature
│   │   │   └── test_help.py
│   │   ├── conftest.py
│   │   ├── features
│   │   │   └── alphagenome_integration.feature
│   │   ├── fetch_articles
│   │   │   ├── fetch.feature
│   │   │   └── test_fetch.py
│   │   ├── get_trials
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── get_variants
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── search_articles
│   │   │   ├── autocomplete.feature
│   │   │   ├── search.feature
│   │   │   ├── test_autocomplete.py
│   │   │   └── test_search.py
│   │   ├── search_trials
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   ├── search_variants
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   └── steps
│   │       └── test_alphagenome_steps.py
│   ├── config
│   │   └── test_smithery_config.py
│   ├── conftest.py
│   ├── data
│   │   ├── ct_gov
│   │   │   ├── clinical_trials_api_v2.yaml
│   │   │   ├── trials_NCT04280705.json
│   │   │   └── trials_NCT04280705.txt
│   │   ├── myvariant
│   │   │   ├── myvariant_api.yaml
│   │   │   ├── myvariant_field_descriptions.csv
│   │   │   ├── variants_full_braf_v600e.json
│   │   │   ├── variants_full_braf_v600e.txt
│   │   │   └── variants_part_braf_v600_multiple.json
│   │   ├── openfda
│   │   │   ├── drugsfda_detail.json
│   │   │   ├── drugsfda_search.json
│   │   │   ├── enforcement_detail.json
│   │   │   └── enforcement_search.json
│   │   └── pubtator
│   │       ├── pubtator_autocomplete.json
│   │       └── pubtator3_paper.txt
│   ├── integration
│   │   ├── test_openfda_integration.py
│   │   ├── test_preprints_integration.py
│   │   ├── test_simple.py
│   │   └── test_variants_integration.py
│   ├── tdd
│   │   ├── articles
│   │   │   ├── test_autocomplete.py
│   │   │   ├── test_cbioportal_integration.py
│   │   │   ├── test_fetch.py
│   │   │   ├── test_preprints.py
│   │   │   ├── test_search.py
│   │   │   └── test_unified.py
│   │   ├── conftest.py
│   │   ├── drugs
│   │   │   ├── __init__.py
│   │   │   └── test_drug_getter.py
│   │   ├── openfda
│   │   │   ├── __init__.py
│   │   │   ├── test_adverse_events.py
│   │   │   ├── test_device_events.py
│   │   │   ├── test_drug_approvals.py
│   │   │   ├── test_drug_labels.py
│   │   │   ├── test_drug_recalls.py
│   │   │   ├── test_drug_shortages.py
│   │   │   └── test_security.py
│   │   ├── test_biothings_integration_real.py
│   │   ├── test_biothings_integration.py
│   │   ├── test_circuit_breaker.py
│   │   ├── test_concurrent_requests.py
│   │   ├── test_connection_pool.py
│   │   ├── test_domain_handlers.py
│   │   ├── test_drug_approvals.py
│   │   ├── test_drug_recalls.py
│   │   ├── test_drug_shortages.py
│   │   ├── test_endpoint_documentation.py
│   │   ├── test_error_scenarios.py
│   │   ├── test_europe_pmc_fetch.py
│   │   ├── test_mcp_integration.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_metrics.py
│   │   ├── test_nci_integration.py
│   │   ├── test_nci_mcp_tools.py
│   │   ├── test_network_policies.py
│   │   ├── test_offline_mode.py
│   │   ├── test_openfda_unified.py
│   │   ├── test_pten_r173_search.py
│   │   ├── test_render.py
│   │   ├── test_request_batcher.py.disabled
│   │   ├── test_retry.py
│   │   ├── test_router.py
│   │   ├── test_shared_context.py.disabled
│   │   ├── test_unified_biothings.py
│   │   ├── thinking
│   │   │   ├── __init__.py
│   │   │   └── test_sequential.py
│   │   ├── trials
│   │   │   ├── test_backward_compatibility.py
│   │   │   ├── test_getter.py
│   │   │   └── test_search.py
│   │   ├── utils
│   │   │   ├── test_gene_validator.py
│   │   │   ├── test_mutation_filter.py
│   │   │   ├── test_rate_limiter.py
│   │   │   └── test_request_cache.py
│   │   ├── variants
│   │   │   ├── constants.py
│   │   │   ├── test_alphagenome_api_key.py
│   │   │   ├── test_alphagenome_comprehensive.py
│   │   │   ├── test_alphagenome.py
│   │   │   ├── test_cbioportal_mutations.py
│   │   │   ├── test_cbioportal_search.py
│   │   │   ├── test_external_integration.py
│   │   │   ├── test_external.py
│   │   │   ├── test_extract_gene_aa_change.py
│   │   │   ├── test_filters.py
│   │   │   ├── test_getter.py
│   │   │   ├── test_links.py
│   │   │   └── test_search.py
│   │   └── workers
│   │       └── test_worker_sanitization.js
│   └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```

# Files

--------------------------------------------------------------------------------
/docs/getting-started/02-claude-desktop-integration.md:
--------------------------------------------------------------------------------

```markdown
# Claude Desktop Integration

This guide covers how to integrate BioMCP with Claude Desktop, enabling AI-powered biomedical research directly in your Claude conversations.

## Prerequisites

- [Claude Desktop](https://claude.ai/download) application
- One of the following:
  - **Option A**: Python 3.10+ and [uv](https://docs.astral.sh/uv/) (recommended)
  - **Option B**: [Docker](https://www.docker.com/products/docker-desktop/)

## Installation Methods

### Option A: Using uv (Recommended)

This method is fastest and easiest for most users.

#### 1. Install uv

```bash
# macOS/Linux
curl -LsSf https://astral.sh/uv/install.sh | sh

# Windows
powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
```

#### 2. Configure Claude Desktop

Add BioMCP to your Claude Desktop configuration file:

**macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
**Windows**: `%APPDATA%\Claude\claude_desktop_config.json`

```json
{
  "mcpServers": {
    "biomcp": {
      "command": "uv",
      "args": ["run", "--with", "biomcp-python", "biomcp", "run"],
      "env": {
        "NCI_API_KEY": "your-nci-api-key-here",
        "ALPHAGENOME_API_KEY": "your-alphagenome-key-here",
        "CBIO_TOKEN": "your-cbioportal-token-here"
      }
    }
  }
}
```

### Option B: Using Docker

This method provides better isolation and consistency across systems.

#### 1. Create a Dockerfile

Create a file named `Dockerfile`:

```dockerfile
FROM python:3.11-slim

# Install BioMCP
RUN pip install biomcp-python

# Set the entrypoint
ENTRYPOINT ["biomcp", "run"]
```

#### 2. Build the Docker Image

```bash
docker build -t biomcp:latest .
```

#### 3. Configure Claude Desktop

Add BioMCP to your configuration file:

```json
{
  "mcpServers": {
    "biomcp": {
      "command": "docker",
      "args": ["run", "-i", "--rm", "biomcp:latest"],
      "env": {
        "NCI_API_KEY": "your-nci-api-key-here",
        "ALPHAGENOME_API_KEY": "your-alphagenome-key-here",
        "CBIO_TOKEN": "your-cbioportal-token-here"
      }
    }
  }
}
```

## Verification

1. Restart Claude Desktop after updating the configuration
2. Start a new conversation
3. Look for the 🔌 icon indicating MCP is connected
4. Test with: "Can you search for articles about BRAF mutations in melanoma?"

## Setting Up API Keys

While BioMCP works without API keys, some features require them for full functionality:

### NCI API Key (Optional)

Enables access to NCI's clinical trials database with advanced filters:

- Get your key from [NCI API Portal](https://api.cancer.gov)
- Add to configuration as `NCI_API_KEY`

### AlphaGenome API Key (Optional)

Enables variant effect predictions using Google DeepMind's AlphaGenome:

- Register at [AlphaGenome Portal](https://alphagenome.google.com)
- Add to configuration as `ALPHAGENOME_API_KEY`

### cBioPortal Token (Optional)

Enables enhanced cancer genomics queries:

- Get token from [cBioPortal](https://www.cbioportal.org/webAPI)
- Add to configuration as `CBIO_TOKEN`

## Usage Examples

Once configured, you can ask Claude to perform various biomedical research tasks:

### Literature Search

```
"Find recent articles about CAR-T therapy for B-cell lymphomas"
```

### Clinical Trials

```
"Search for actively recruiting trials for EGFR-mutant lung cancer"
```

### Variant Analysis

```
"What is known about the pathogenicity of BRCA1 c.5266dupC?"
```

### Drug Information

```
"Tell me about the mechanism of action and indications for pembrolizumab"
```

### Complex Research

```
"I need a comprehensive analysis of treatment options for a patient with
BRAF V600E melanoma who has progressed on dabrafenib/trametinib"
```

## The Deep Researcher Persona

BioMCP includes a specialized "Deep Researcher" persona that enhances Claude's biomedical research capabilities:

- **Sequential Thinking**: Automatically uses the `think` tool for systematic analysis
- **Comprehensive Coverage**: Searches multiple databases and synthesizes findings
- **Evidence-Based**: Provides citations and links to primary sources
- **Clinical Focus**: Understands medical context and terminology

To activate, simply ask biomedical questions naturally. The persona automatically engages for research tasks.

## Troubleshooting

### "MCP Connection Failed"

1. Verify the configuration file path is correct
2. Check JSON syntax (no trailing commas)
3. Ensure Claude Desktop has been restarted
4. Check that uv or Docker is properly installed

### "Command Not Found"

**For uv**:

```bash
# Verify uv installation
uv --version

# Ensure PATH includes uv
echo $PATH | grep -q "\.local/bin" || echo "PATH needs updating"
```

**For Docker**:

```bash
# Verify Docker is running
docker ps

# Test BioMCP container
docker run -it --rm biomcp:latest --help
```

### "No Results Found"

- Check your internet connection
- Verify API keys are correctly set (if using optional features)
- Try simpler queries first
- Use official gene symbols (e.g., "TP53" not "p53")

### Performance Issues

**For uv**:

- First run may be slow due to package downloads
- Subsequent runs use cached environments

**For Docker**:

- Ensure Docker has sufficient memory allocated
- Consider building with `--platform` flag for Apple Silicon

## Advanced Configuration

### Custom Environment Variables

Add any additional environment variables your research requires:

```json
{
  "mcpServers": {
    "biomcp": {
      "command": "uv",
      "args": ["run", "--with", "biomcp-python", "biomcp", "run"],
      "env": {
        "BIOMCP_LOG_LEVEL": "DEBUG",
        "BIOMCP_CACHE_DIR": "/path/to/cache",
        "HTTP_PROXY": "http://your-proxy:8080"
      }
    }
  }
}
```

### Multiple Configurations

You can run multiple BioMCP instances with different settings:

```json
{
  "mcpServers": {
    "biomcp-prod": {
      "command": "uv",
      "args": ["run", "--with", "biomcp-python", "biomcp", "run"],
      "env": {
        "BIOMCP_ENV": "production"
      }
    },
    "biomcp-dev": {
      "command": "uv",
      "args": ["run", "--with", "biomcp-python@latest", "biomcp", "run"],
      "env": {
        "BIOMCP_ENV": "development",
        "BIOMCP_LOG_LEVEL": "DEBUG"
      }
    }
  }
}
```

## Best Practices

1. **Start Simple**: Test with basic queries before complex research tasks
2. **Be Specific**: Use official gene symbols and disease names
3. **Iterate**: Refine queries based on initial results
4. **Verify Sources**: Always check the provided citations
5. **Save Important Findings**: Export conversation or copy key results

## Getting Help

- **Documentation**: [BioMCP Docs](https://github.com/genomoncology/biomcp)
- **Issues**: [GitHub Issues](https://github.com/genomoncology/biomcp/issues)
- **Community**: [Discussions](https://github.com/genomoncology/biomcp/discussions)

## Next Steps

Now that BioMCP is integrated with Claude Desktop:

1. Try the [example queries](#usage-examples) above
2. Explore [How-to Guides](../how-to-guides/01-find-articles-and-cbioportal-data.md) for specific research workflows
3. Learn about [Sequential Thinking](../concepts/03-sequential-thinking-with-the-think-tool.md) for complex analyses
4. Set up [additional API keys](03-authentication-and-api-keys.md) for enhanced features

```

--------------------------------------------------------------------------------
/src/biomcp/articles/unified.py:
--------------------------------------------------------------------------------

```python
"""Unified article search combining PubMed and preprint sources."""

import asyncio
import json
import logging
from collections.abc import Coroutine
from typing import Any

from .. import render
from .preprints import search_preprints
from .search import PubmedRequest, search_articles

logger = logging.getLogger(__name__)


def _deduplicate_articles(articles: list[dict]) -> list[dict]:
    """Remove duplicate articles based on DOI."""
    seen_dois = set()
    unique_articles = []
    for article in articles:
        doi = article.get("doi")
        if doi and doi in seen_dois:
            continue
        if doi:
            seen_dois.add(doi)
        unique_articles.append(article)
    return unique_articles


def _parse_search_results(results: list) -> list[dict]:
    """Parse search results from JSON strings."""
    all_articles = []
    for result in results:
        if isinstance(result, str):
            try:
                articles = json.loads(result)
                if isinstance(articles, list):
                    all_articles.extend(articles)
            except json.JSONDecodeError:
                continue
    return all_articles


async def _extract_mutation_pattern(
    keywords: list[str],
) -> tuple[str | None, str | None]:
    """Extract mutation pattern from keywords asynchronously."""
    if not keywords:
        return None, None

    # Use asyncio.to_thread for CPU-bound regex operations
    import re

    def _extract_sync():
        for keyword in keywords:
            # Check for specific mutations (e.g., F57Y, V600E)
            if re.match(r"^[A-Z]\d+[A-Z*]$", keyword):
                if keyword.endswith("*"):
                    return keyword, None  # mutation_pattern
                else:
                    return None, keyword  # specific_mutation
        return None, None

    # Run CPU-bound operation in thread pool
    return await asyncio.to_thread(_extract_sync)


async def _get_mutation_summary(
    gene: str, mutation: str | None, pattern: str | None
) -> str | None:
    """Get mutation-specific cBioPortal summary."""
    from ..variants.cbioportal_mutations import (
        CBioPortalMutationClient,
        format_mutation_search_result,
    )

    mutation_client = CBioPortalMutationClient()

    if mutation:
        logger.info(f"Searching for specific mutation {gene} {mutation}")
        result = await mutation_client.search_specific_mutation(
            gene=gene, mutation=mutation, max_studies=20
        )
    else:
        logger.info(f"Searching for mutation pattern {gene} {pattern}")
        result = await mutation_client.search_specific_mutation(
            gene=gene, pattern=pattern, max_studies=20
        )

    return format_mutation_search_result(result) if result else None


async def _get_gene_summary(gene: str) -> str | None:
    """Get regular gene cBioPortal summary."""
    from ..variants.cbioportal_search import (
        CBioPortalSearchClient,
        format_cbioportal_search_summary,
    )

    client = CBioPortalSearchClient()
    summary = await client.get_gene_search_summary(gene, max_studies=5)
    return format_cbioportal_search_summary(summary) if summary else None


async def _get_cbioportal_summary(request: PubmedRequest) -> str | None:
    """Get cBioPortal summary for the search request."""
    if not request.genes:
        return None

    try:
        gene = request.genes[0]
        mutation_pattern, specific_mutation = await _extract_mutation_pattern(
            request.keywords
        )

        if specific_mutation or mutation_pattern:
            return await _get_mutation_summary(
                gene, specific_mutation, mutation_pattern
            )
        else:
            return await _get_gene_summary(gene)

    except Exception as e:
        logger.warning(
            f"Failed to get cBioPortal summary for gene search: {e}"
        )
        return None


async def search_articles_unified(  # noqa: C901
    request: PubmedRequest,
    include_pubmed: bool = True,
    include_preprints: bool = False,
    include_cbioportal: bool = True,
    output_json: bool = False,
) -> str:
    """Search for articles across PubMed and preprint sources."""
    # Import here to avoid circular imports
    from ..shared_context import SearchContextManager

    # Use shared context to avoid redundant validations
    with SearchContextManager() as context:
        # Pre-validate genes once
        if request.genes:
            valid_genes = []
            for gene in request.genes:
                if await context.validate_gene(gene):
                    valid_genes.append(gene)
            request.genes = valid_genes

        tasks: list[Coroutine[Any, Any, Any]] = []
        task_labels = []

        if include_pubmed:
            tasks.append(search_articles(request, output_json=True))
            task_labels.append("pubmed")

        if include_preprints:
            tasks.append(search_preprints(request, output_json=True))
            task_labels.append("preprints")

        # Add cBioPortal to parallel execution
        if include_cbioportal and request.genes:
            tasks.append(_get_cbioportal_summary(request))
            task_labels.append("cbioportal")

        if not tasks:
            return json.dumps([]) if output_json else render.to_markdown([])

        # Run all operations in parallel
        results = await asyncio.gather(*tasks, return_exceptions=True)

        # Create result map for easier processing
        result_map = dict(zip(task_labels, results, strict=False))

        # Extract cBioPortal summary if it was included
        cbioportal_summary: str | None = None
        if "cbioportal" in result_map:
            result = result_map["cbioportal"]
            if not isinstance(result, Exception) and isinstance(result, str):
                cbioportal_summary = result

        # Parse article search results
        article_results = []
        for label, result in result_map.items():
            if label != "cbioportal" and not isinstance(result, Exception):
                article_results.append(result)

        # Parse and deduplicate results
        all_articles = _parse_search_results(article_results)
        unique_articles = _deduplicate_articles(all_articles)

        # Sort by publication state (peer-reviewed first) and then by date
        unique_articles.sort(
            key=lambda x: (
                0
                if x.get("publication_state", "peer_reviewed")
                == "peer_reviewed"
                else 1,
                x.get("date", "0000-00-00"),
            ),
            reverse=True,
        )

        if unique_articles and not output_json:
            result = render.to_markdown(unique_articles)
            if cbioportal_summary and isinstance(cbioportal_summary, str):
                # Add cBioPortal summary at the beginning
                result = cbioportal_summary + "\n\n---\n\n" + result
            return result
        else:
            if cbioportal_summary:
                return json.dumps(
                    {
                        "cbioportal_summary": cbioportal_summary,
                        "articles": unique_articles,
                    },
                    indent=2,
                )
            return json.dumps(unique_articles, indent=2)

```

--------------------------------------------------------------------------------
/src/biomcp/openfda/adverse_events.py:
--------------------------------------------------------------------------------

```python
"""
OpenFDA Drug Adverse Events (FAERS) integration.
"""

import logging

from .adverse_events_helpers import (
    format_drug_details,
    format_reaction_details,
    format_report_metadata,
    format_report_summary,
    format_search_summary,
    format_top_reactions,
)
from .constants import (
    OPENFDA_DEFAULT_LIMIT,
    OPENFDA_DISCLAIMER,
    OPENFDA_DRUG_EVENTS_URL,
    OPENFDA_MAX_LIMIT,
)
from .exceptions import (
    OpenFDAConnectionError,
    OpenFDARateLimitError,
    OpenFDATimeoutError,
)
from .input_validation import sanitize_input
from .utils import clean_text, make_openfda_request

logger = logging.getLogger(__name__)


def _build_search_query(
    drug: str | None, reaction: str | None, serious: bool | None
) -> str:
    """Build the search query for adverse events."""
    search_parts = []

    if drug:
        # Sanitize drug input to prevent injection
        drug = sanitize_input(drug, max_length=100)
        if drug:
            drug_query = (
                f'(patient.drug.medicinalproduct:"{drug}" OR '
                f'patient.drug.openfda.brand_name:"{drug}" OR '
                f'patient.drug.openfda.generic_name:"{drug}")'
            )
            search_parts.append(drug_query)

    if reaction:
        # Sanitize reaction input
        reaction = sanitize_input(reaction, max_length=200)
        if reaction:
            search_parts.append(
                f'patient.reaction.reactionmeddrapt:"{reaction}"'
            )

    if serious is not None:
        serious_value = "1" if serious else "2"
        search_parts.append(f"serious:{serious_value}")

    return " AND ".join(search_parts)


async def search_adverse_events(  # noqa: C901
    drug: str | None = None,
    reaction: str | None = None,
    serious: bool | None = None,
    limit: int = OPENFDA_DEFAULT_LIMIT,
    skip: int = 0,
    api_key: str | None = None,
) -> str:
    """
    Search FDA adverse event reports (FAERS).

    Args:
        drug: Drug name to search for
        reaction: Adverse reaction term to search for
        serious: Filter for serious events only
        limit: Maximum number of results
        skip: Number of results to skip
        api_key: Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)

    Returns:
        Formatted string with adverse event information
    """
    if not drug and not reaction:
        return (
            "⚠️ Please specify either a drug name or reaction term to search "
            "adverse events.\n\n"
            "Examples:\n"
            "- Search by drug: --drug 'imatinib'\n"
            "- Search by reaction: --reaction 'nausea'\n"
            "- Both: --drug 'imatinib' --reaction 'nausea'"
        )

    # Build and execute search
    search_query = _build_search_query(drug, reaction, serious)
    params = {
        "search": search_query,
        "limit": min(limit, OPENFDA_MAX_LIMIT),
        "skip": skip,
    }

    try:
        response, error = await make_openfda_request(
            OPENFDA_DRUG_EVENTS_URL, params, "openfda_adverse_events", api_key
        )
    except OpenFDARateLimitError:
        return (
            "⚠️ **FDA API Rate Limit Exceeded**\n\n"
            "You've exceeded the FDA's rate limit. Options:\n"
            "• Wait a moment and try again\n"
            "• Provide an FDA API key for higher limits (240/min vs 40/min)\n"
            "• Get a free key at: https://open.fda.gov/apis/authentication/"
        )
    except OpenFDATimeoutError:
        return (
            "⏱️ **Request Timeout**\n\n"
            "The FDA API is taking too long to respond. This may be due to:\n"
            "• High server load\n"
            "• Complex query\n"
            "• Network issues\n\n"
            "Please try again in a moment."
        )
    except OpenFDAConnectionError as e:
        return (
            "🔌 **Connection Error**\n\n"
            f"Unable to connect to FDA API: {e}\n\n"
            "Please check your internet connection and try again."
        )

    if error:
        return f"⚠️ Error searching adverse events: {error}"

    if not response or not response.get("results"):
        search_desc = []
        if drug:
            search_desc.append(f"drug '{drug}'")
        if reaction:
            search_desc.append(f"reaction '{reaction}'")
        return (
            f"No adverse event reports found for {' and '.join(search_desc)}."
        )

    results = response["results"]
    total = (
        response.get("meta", {}).get("results", {}).get("total", len(results))
    )

    # Build output
    output = ["## FDA Adverse Event Reports\n"]
    output.extend(format_search_summary(drug, reaction, serious, total))

    # Add top reactions if searching by drug
    if drug and not reaction:
        output.extend(format_top_reactions(results))

    # Add sample reports
    output.append(
        f"### Sample Reports (showing {min(len(results), 3)} of {total}):\n"
    )
    for i, result in enumerate(results[:3], 1):
        output.extend(format_report_summary(result, i))

    output.append(f"\n{OPENFDA_DISCLAIMER}")
    return "\n".join(output)


async def get_adverse_event(report_id: str, api_key: str | None = None) -> str:
    """
    Get detailed information for a specific adverse event report.

    Args:
        report_id: Safety report ID
        api_key: Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)

    Returns:
        Formatted string with detailed report information
    """
    params = {
        "search": f'safetyreportid:"{report_id}"',
        "limit": 1,
    }

    response, error = await make_openfda_request(
        OPENFDA_DRUG_EVENTS_URL,
        params,
        "openfda_adverse_event_detail",
        api_key,
    )

    if error:
        return f"⚠️ Error retrieving adverse event report: {error}"

    if not response or not response.get("results"):
        return f"Adverse event report '{report_id}' not found."

    result = response["results"][0]
    patient = result.get("patient", {})

    # Build detailed output
    output = [f"## Adverse Event Report: {report_id}\n"]

    # Patient Information
    output.extend(_format_patient_info(patient))

    # Drug Information
    if drugs := patient.get("drug", []):
        output.extend(format_drug_details(drugs))

    # Reactions
    if reactions := patient.get("reaction", []):
        output.extend(format_reaction_details(reactions))

    # Event Summary
    if summary := patient.get("summary", {}).get("narrativeincludeclinical"):
        output.append("### Event Narrative")
        output.append(clean_text(summary))
        output.append("")

    # Report metadata
    output.extend(format_report_metadata(result))

    output.append(f"\n{OPENFDA_DISCLAIMER}")
    return "\n".join(output)


def _format_patient_info(patient: dict) -> list[str]:
    """Format patient information section."""
    output = ["### Patient Information"]

    if age := patient.get("patientonsetage"):
        output.append(f"- **Age**: {age} years")

    sex_map = {0: "Unknown", 1: "Male", 2: "Female"}
    sex_code = patient.get("patientsex")
    sex = (
        sex_map.get(sex_code, "Unknown") if sex_code is not None else "Unknown"
    )
    output.append(f"- **Sex**: {sex}")

    if weight := patient.get("patientweight"):
        output.append(f"- **Weight**: {weight} kg")

    output.append("")
    return output

```

--------------------------------------------------------------------------------
/docs/how-to-guides/01-find-articles-and-cbioportal-data.md:
--------------------------------------------------------------------------------

```markdown
# How to Find Articles and cBioPortal Data

This guide walks you through searching biomedical literature with automatic cancer genomics integration from cBioPortal.

## Overview

When searching for articles about genes, BioMCP automatically enriches your results with:

- **cBioPortal Summary**: Mutation frequencies, hotspots, and cancer type distribution ([API Reference](../backend-services-reference/03-cbioportal.md))
- **PubMed Articles**: Peer-reviewed research with entity annotations ([PubTator3 Reference](../backend-services-reference/06-pubtator3.md))
- **Preprints**: Latest findings from bioRxiv and medRxiv

## Basic Article Search

### Search by Gene

Find articles about a specific gene:

```bash
# CLI
biomcp article search --gene BRAF --limit 5

# Python
articles = await client.articles.search(genes=["BRAF"], limit=5)

# MCP Tool
article_searcher(genes=["BRAF"], limit=5)
```

This automatically includes:

1. cBioPortal summary showing BRAF mutation frequency across cancers
2. Top mutation hotspots (e.g., V600E)
3. Recent articles mentioning BRAF

### Search by Disease

Find articles about a specific disease:

```bash
# CLI
biomcp article search --disease melanoma --limit 10

# Python
articles = await client.articles.search(diseases=["melanoma"])

# MCP Tool
article_searcher(diseases=["melanoma"])
```

## Advanced Search Techniques

### Combining Multiple Filters

Search for articles at the intersection of genes, diseases, and chemicals:

```bash
# CLI - EGFR mutations in lung cancer treated with erlotinib
biomcp article search \
  --gene EGFR \
  --disease "lung cancer" \
  --chemical erlotinib \
  --limit 20

# Python
articles = await client.articles.search(
    genes=["EGFR"],
    diseases=["lung cancer"],
    chemicals=["erlotinib"]
)
```

### Using OR Logic in Keywords

Find articles mentioning different notations of the same variant:

```bash
# CLI - Find any notation of BRAF V600E
biomcp article search \
  --gene BRAF \
  --keyword "V600E|p.V600E|c.1799T>A"

# Python - Different names for same concept
articles = await client.articles.search(
    diseases=["NSCLC|non-small cell lung cancer"],
    chemicals=["pembrolizumab|Keytruda|anti-PD-1"]
)
```

### Excluding Preprints

For peer-reviewed articles only:

```bash
# CLI
biomcp article search --gene TP53 --no-preprints

# Python
articles = await client.articles.search(
    genes=["TP53"],
    include_preprints=False
)
```

## Understanding cBioPortal Integration

### What cBioPortal Provides

When you search for a gene, the first result includes:

```markdown
### cBioPortal Summary for BRAF

- **Mutation Frequency**: 76.7% (368 mutations in 480 samples)
- **Studies**: 1 of 5 studies have mutations

**Top Hotspots:**

1. V600E: 310 mutations (84.2%)
2. V600K: 23 mutations (6.3%)
3. V600M: 12 mutations (3.3%)

**Cancer Type Distribution:**

- Skin Cancer, Non-Melanoma: 156 mutations
- Melanoma: 91 mutations
- Thyroid Cancer: 87 mutations
```

### Mutation-Specific Searches

Search for articles about specific mutations:

```python
# Search for BRAF V600E specifically
articles = await client.articles.search(
    genes=["BRAF"],
    keywords=["V600E"],
    include_cbioportal=True  # Default
)
```

The cBioPortal summary will highlight the specific mutation if found.

### Disabling cBioPortal

If you don't need cancer genomics data:

```bash
# CLI
biomcp article search --gene BRCA1 --no-cbioportal

# Python
articles = await client.articles.search(
    genes=["BRCA1"],
    include_cbioportal=False
)
```

## Practical Examples

### Example 1: Resistance Mechanism Research

Find articles about EGFR T790M resistance:

```python
# Using think tool first (for MCP)
think(
    thought="Researching EGFR T790M resistance mechanisms in lung cancer",
    thoughtNumber=1
)

# Search with multiple relevant terms
articles = await article_searcher(
    genes=["EGFR"],
    diseases=["lung cancer|NSCLC"],
    keywords=["T790M|p.T790M|resistance|resistant"],
    chemicals=["osimertinib|gefitinib|erlotinib"]
)
```

### Example 2: Combination Therapy Research

Research BRAF/MEK combination therapy:

```bash
# CLI approach
biomcp article search \
  --gene BRAF --gene MEK1 --gene MEK2 \
  --disease melanoma \
  --chemical dabrafenib --chemical trametinib \
  --keyword "combination therapy|combined treatment"
```

### Example 3: Biomarker Discovery

Find articles about potential biomarkers:

```python
# Search for PD-L1 as a biomarker
articles = await client.articles.search(
    genes=["CD274"],  # PD-L1 gene symbol
    keywords=["biomarker|predictive|prognostic"],
    diseases=["cancer"],
    limit=50
)

# Filter results programmatically
biomarker_articles = [
    a for a in articles
    if "biomarker" in a.title.lower() or "predictive" in a.abstract.lower()
]
```

## Working with Results

### Extracting Key Information

```python
# Process article results
for article in articles:
    print(f"Title: {article.title}")
    print(f"PMID: {article.pmid}")
    print(f"URL: {article.url}")

    # Extract annotated entities
    genes = article.metadata.get("genes", [])
    diseases = article.metadata.get("diseases", [])
    chemicals = article.metadata.get("chemicals", [])

    print(f"Genes mentioned: {', '.join(genes)}")
    print(f"Diseases: {', '.join(diseases)}")
    print(f"Chemicals: {', '.join(chemicals)}")
```

### Fetching Full Article Details

Get complete article information:

```python
# Get article by PMID
full_article = await client.articles.get("38768446")

# Access full abstract
print(full_article.abstract)

# Check for full text availability
if full_article.full_text_url:
    print(f"Full text: {full_article.full_text_url}")
```

## Tips for Effective Searches

### 1. Use Official Gene Symbols

```python
# ✅ Correct - Official HGNC symbol
articles = await search(genes=["ERBB2"])

# ❌ Avoid - Common name
articles = await search(genes=["HER2"])  # May miss results
```

### 2. Include Synonyms for Diseases

```python
# Cover all variations
articles = await search(
    diseases=["GIST|gastrointestinal stromal tumor|gastrointestinal stromal tumour"]
)
```

### 3. Leverage PubTator Annotations

PubTator automatically annotates articles with:

- Gene mentions (normalized to official symbols)
- Disease concepts (mapped to MeSH terms)
- Chemical/drug entities
- Genetic variants
- Species

### 4. Combine with Other Tools

```python
# 1. Find articles about a gene
articles = await article_searcher(genes=["ALK"])

# 2. Get gene details for context
gene_info = await gene_getter("ALK")

# 3. Find relevant trials
trials = await trial_searcher(
    other_terms=["ALK positive", "ALK rearrangement"]
)
```

## Troubleshooting

### No Results Found

1. **Check gene symbols**: Use [genenames.org](https://www.genenames.org)
2. **Broaden search**: Remove filters one by one
3. **Try synonyms**: Especially for diseases and drugs

### cBioPortal Data Missing

- Some genes may not have cancer genomics data
- Try searching for cancer-related genes
- Check if gene symbol is correct

### Preprint Issues

- Europe PMC may have delays in indexing
- Some preprints may not have DOIs
- Try searching by title keywords instead

## Next Steps

- Learn to [find trials with NCI and BioThings](02-find-trials-with-nci-and-biothings.md)
- Explore [variant annotations](03-get-comprehensive-variant-annotations.md)
- Set up [API keys](../getting-started/03-authentication-and-api-keys.md) for enhanced features

```

--------------------------------------------------------------------------------
/tests/tdd/test_network_policies.py:
--------------------------------------------------------------------------------

```python
"""Comprehensive tests for network policies and HTTP centralization."""

from pathlib import Path
from unittest.mock import patch

import pytest

from biomcp.http_client import request_api
from biomcp.utils.endpoint_registry import (
    DataType,
    EndpointCategory,
    EndpointInfo,
    EndpointRegistry,
    get_registry,
)


class TestEndpointRegistry:
    """Test the endpoint registry functionality."""

    def test_registry_initialization(self):
        """Test that registry initializes with known endpoints."""
        registry = EndpointRegistry()
        endpoints = registry.get_all_endpoints()

        # Check we have endpoints registered
        assert len(endpoints) > 0

        # Check specific endpoints exist
        assert "pubtator3_search" in endpoints
        assert "clinicaltrials_search" in endpoints
        assert "myvariant_query" in endpoints
        assert "cbioportal_api" in endpoints

    def test_get_endpoints_by_category(self):
        """Test filtering endpoints by category."""
        registry = EndpointRegistry()

        # Get biomedical literature endpoints
        lit_endpoints = registry.get_endpoints_by_category(
            EndpointCategory.BIOMEDICAL_LITERATURE
        )
        assert len(lit_endpoints) > 0
        assert all(
            e.category == EndpointCategory.BIOMEDICAL_LITERATURE
            for e in lit_endpoints.values()
        )

        # Get clinical trials endpoints
        trial_endpoints = registry.get_endpoints_by_category(
            EndpointCategory.CLINICAL_TRIALS
        )
        assert len(trial_endpoints) > 0
        assert all(
            e.category == EndpointCategory.CLINICAL_TRIALS
            for e in trial_endpoints.values()
        )

    def test_get_unique_domains(self):
        """Test getting unique domains."""
        registry = EndpointRegistry()
        domains = registry.get_unique_domains()

        assert len(domains) > 0
        assert "www.ncbi.nlm.nih.gov" in domains
        assert "clinicaltrials.gov" in domains
        assert "myvariant.info" in domains
        assert "www.cbioportal.org" in domains

    def test_endpoint_info_properties(self):
        """Test EndpointInfo dataclass properties."""
        endpoint = EndpointInfo(
            url="https://api.example.com/test",
            category=EndpointCategory.BIOMEDICAL_LITERATURE,
            data_types=[DataType.RESEARCH_ARTICLES],
            description="Test endpoint",
            compliance_notes="Test compliance",
            rate_limit="10 requests/second",
            authentication="API key required",
        )

        assert endpoint.domain == "api.example.com"
        assert endpoint.category == EndpointCategory.BIOMEDICAL_LITERATURE
        assert DataType.RESEARCH_ARTICLES in endpoint.data_types

    def test_markdown_report_generation(self):
        """Test markdown report generation."""
        registry = EndpointRegistry()
        report = registry.generate_markdown_report()

        # Check report contains expected sections
        assert "# Third-Party Endpoints Used by BioMCP" in report
        assert "## Overview" in report
        assert "## Endpoints by Category" in report
        assert "## Domain Summary" in report
        assert "## Compliance and Privacy" in report
        assert "## Network Control" in report

        # Check it mentions offline mode
        assert "BIOMCP_OFFLINE" in report

        # Check it contains actual endpoints
        assert "pubtator3" in report
        assert "clinicaltrials.gov" in report
        assert "myvariant.info" in report

    def test_save_markdown_report(self, tmp_path):
        """Test saving markdown report to file."""
        registry = EndpointRegistry()
        output_path = tmp_path / "test_endpoints.md"

        saved_path = registry.save_markdown_report(output_path)

        assert saved_path == output_path
        assert output_path.exists()

        # Read and verify content
        content = output_path.read_text()
        assert "Third-Party Endpoints Used by BioMCP" in content


class TestEndpointTracking:
    """Test endpoint tracking in HTTP client."""

    @pytest.mark.asyncio
    async def test_valid_endpoint_key(self):
        """Test that valid endpoint keys are accepted."""
        with patch("biomcp.http_client.call_http") as mock_call:
            mock_call.return_value = (200, '{"data": "test"}')

            # Should not raise an error
            result, error = await request_api(
                url="https://www.ncbi.nlm.nih.gov/research/pubtator3-api/search/",
                request={"text": "BRAF"},
                endpoint_key="pubtator3_search",
                cache_ttl=0,
            )

            assert result == {"data": "test"}
            assert error is None

    @pytest.mark.asyncio
    async def test_invalid_endpoint_key_raises_error(self):
        """Test that invalid endpoint keys raise an error."""
        with pytest.raises(ValueError, match="Unknown endpoint key"):
            await request_api(
                url="https://api.example.com/test",
                request={"test": "data"},
                endpoint_key="invalid_endpoint_key",
                cache_ttl=0,
            )

    @pytest.mark.asyncio
    async def test_no_endpoint_key_allowed(self):
        """Test that requests without endpoint keys are allowed."""
        with patch("biomcp.http_client.call_http") as mock_call:
            mock_call.return_value = (200, '{"data": "test"}')

            # Should not raise an error
            result, error = await request_api(
                url="https://api.example.com/test",
                request={"test": "data"},
                cache_ttl=0,
            )

            assert result == {"data": "test"}
            assert error is None


class TestHTTPImportChecks:
    """Test the HTTP import checking script."""

    def test_check_script_exists(self):
        """Test that the check script exists."""
        script_path = (
            Path(__file__).parent.parent.parent
            / "scripts"
            / "check_http_imports.py"
        )
        assert script_path.exists()

    def test_allowed_files_configured(self):
        """Test that allowed files are properly configured."""
        # Import the script module
        import sys

        script_path = Path(__file__).parent.parent.parent / "scripts"
        sys.path.insert(0, str(script_path))

        try:
            from check_http_imports import ALLOWED_FILES, HTTP_LIBRARIES

            # Check essential files are allowed
            assert "http_client.py" in ALLOWED_FILES
            assert "http_client_simple.py" in ALLOWED_FILES

            # Check we're checking for the right libraries
            assert "httpx" in HTTP_LIBRARIES
            assert "aiohttp" in HTTP_LIBRARIES
            assert "requests" in HTTP_LIBRARIES
        finally:
            sys.path.pop(0)


class TestGlobalRegistry:
    """Test the global registry instance."""

    def test_get_registry_returns_same_instance(self):
        """Test that get_registry returns the same instance."""
        registry1 = get_registry()
        registry2 = get_registry()

        assert registry1 is registry2

    def test_global_registry_has_endpoints(self):
        """Test that the global registry has endpoints."""
        registry = get_registry()
        endpoints = registry.get_all_endpoints()

        assert len(endpoints) > 0

```

--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------

```markdown
# BioMCP: AI-Powered Biomedical Research

[![Release](https://img.shields.io/github/v/tag/genomoncology/biomcp)](https://github.com/genomoncology/biomcp/tags)
[![Build status](https://img.shields.io/github/actions/workflow/status/genomoncology/biomcp/main.yml?branch=main)](https://github.com/genomoncology/biomcp/actions/workflows/main.yml?query=branch%3Amain)
[![License](https://img.shields.io/github/license/genomoncology/biomcp)](https://img.shields.io/github/license/genomoncology/biomcp)

**Transform how you search and analyze biomedical data** with BioMCP - a powerful tool that connects AI assistants and researchers to critical biomedical databases through natural language.

### Built and Maintained by <a href="https://www.genomoncology.com"><img src="./assets/logo.png" width=200 valign="middle" /></a>

<div class="announcement-banner">
  <div class="announcement-content">
    <h2>
      <span class="badge-new">NEW</span>
      Remote BioMCP Now Available!
    </h2>
    <p>Connect to BioMCP instantly through Claude - no installation required!</p>

    <div class="announcement-features">
      <div class="feature-item">
        <strong>🚀 Instant Access</strong>
        <span>Start using BioMCP in under 2 minutes</span>
      </div>
      <div class="feature-item">
        <strong>☁️ Cloud-Powered</strong>
        <span>Always up-to-date with latest features</span>
      </div>
      <div class="feature-item">
        <strong>🔒 Secure Auth</strong>
        <span>Google OAuth authentication</span>
      </div>
      <div class="feature-item">
        <strong>🛠️ 23+ Tools</strong>
        <span>Full suite of biomedical research tools</span>
      </div>
    </div>

    <a href="tutorials/remote-connection/" class="cta-button">
      Connect to Remote BioMCP Now
    </a>

  </div>
</div>

## What Can You Do with BioMCP?

### Search Research Literature

Find articles about genes, variants, diseases, and drugs with automatic cancer genomics data from cBioPortal

```bash
biomcp article search --gene BRAF --disease melanoma
```

### Discover Clinical Trials

Search active trials by condition, location, phase, and eligibility criteria including genetic biomarkers

```bash
biomcp trial search --condition "lung cancer" --status RECRUITING
```

### Analyze Genetic Variants

Query variant databases, predict effects, and understand clinical significance

```bash
biomcp variant search --gene TP53 --significance pathogenic
```

### AI-Powered Analysis

Use with Claude Desktop for conversational biomedical research with sequential thinking

```python
# Claude automatically uses BioMCP tools
"What BRAF mutations are found in melanoma?"
```

## 5-Minute Quick Start

### Choose Your Interface

=== "Claude Desktop (Recommended)"

    **Best for**: Conversational research, complex queries, AI-assisted analysis

    1. **Install Claude Desktop** from [claude.ai/desktop](https://claude.ai/desktop)

    2. **Configure BioMCP**:
       ```json
       {
         "mcpServers": {
           "biomcp": {
             "command": "uv",
             "args": [
        "run", "--with", "biomcp-python",
        "biomcp", "run"
      ]
           }
         }
       }
       ```

    3. **Start researching**: Ask Claude about any biomedical topic!

    [Full Claude Desktop Guide →](getting-started/02-claude-desktop-integration.md)

=== "Command Line"

    **Best for**: Direct queries, scripting, automation

    1. **Install BioMCP**:
       ```bash
       # Using uv (recommended)
       uv tool install biomcp

       # Or using pip
       pip install biomcp-python
       ```

    2. **Run your first search**:
       ```bash
       biomcp article search \
         --gene BRAF --disease melanoma \
         --limit 5
       ```

    [CLI Reference →](user-guides/01-command-line-interface.md)

=== "Python SDK"

    **Best for**: Integration, custom applications, bulk operations

    1. **Install the package**:
       ```bash
       pip install biomcp-python
       ```

    2. **Use in your code**:
       ```python
       from biomcp import BioMCPClient

       async with BioMCPClient() as client:
           articles = await client.articles.search(
               genes=["BRAF"],
               diseases=["melanoma"]
           )
       ```

    [Python SDK Docs →](apis/python-sdk.md)

## Key Features

### Unified Search Across Databases

- **PubMed/PubTator3**: 30M+ research articles with entity recognition
- **ClinicalTrials.gov**: 400K+ clinical trials worldwide
- **MyVariant.info**: Comprehensive variant annotations
- **cBioPortal**: Automatic cancer genomics integration

### Intelligent Query Processing

- Natural language to structured queries
- Automatic synonym expansion
- OR logic support for flexible matching
- Cross-domain relationship discovery

### Built for AI Integration

- 24 specialized MCP tools
- Sequential thinking for complex analysis
- Streaming responses for real-time updates
- Context preservation across queries

[Explore All Features →](concepts/01-what-is-biomcp.md)

## Learn by Example

### Find Articles About a Specific Mutation

```bash
# Search for BRAF V600E mutations
biomcp article search --gene BRAF \
  --keyword "V600E|p.V600E|c.1799T>A"
```

### Discover Trials Near You

```bash
# Find cancer trials in Boston area
biomcp trial search --condition cancer \
  --latitude 42.3601 --longitude -71.0589 \
  --distance 50
```

### Get Gene Information

```bash
# Get comprehensive gene data
biomcp gene get TP53
```

[More Examples →](tutorials/biothings-prompts.md)

## Popular Workflows

### Literature Review

Systematic search across papers, preprints, and clinical trials
[Workflow Guide →](workflows/all-workflows.md#1-literature-review-workflow)

### Variant Interpretation

From variant ID to clinical significance and treatment implications
[Workflow Guide →](workflows/all-workflows.md#3-variant-interpretation-workflow)

### Trial Matching

Find eligible trials based on patient criteria and biomarkers
[Workflow Guide →](workflows/all-workflows.md#2-clinical-trial-matching-workflow)

### Drug Research

Connect drugs to targets, trials, and research literature
[Workflow Guide →](workflows/all-workflows.md)

## Advanced Features

- **[NCI Integration](getting-started/03-authentication-and-api-keys.md#nci-clinical-trials-api)**: Enhanced cancer trial search with biomarker filtering
- **[AlphaGenome](how-to-guides/04-predict-variant-effects-with-alphagenome.md)**: Predict variant effects on gene regulation
- **[BigQuery Logging](how-to-guides/05-logging-and-monitoring-with-bigquery.md)**: Monitor usage and performance
- **[HTTP Server Mode](developer-guides/01-server-deployment.md)**: Deploy as a service

## Documentation

- **[Getting Started](getting-started/01-quickstart-cli.md)** - Installation and first steps
- **[User Guides](user-guides/01-command-line-interface.md)** - Detailed usage instructions
- **[API Reference](apis/overview.md)** - Technical documentation
- **[FAQ](faq-condensed.md)** - Quick answers to common questions

## Community & Support

- **GitHub**: [github.com/genomoncology/biomcp](https://github.com/genomoncology/biomcp)
- **Issues**: [Report bugs or request features](https://github.com/genomoncology/biomcp/issues)
- **Discussions**: [Ask questions and share tips](https://github.com/genomoncology/biomcp/discussions)

## License

BioMCP is licensed under the MIT License. See [LICENSE](https://github.com/genomoncology/biomcp/blob/main/LICENSE) for details.

```

--------------------------------------------------------------------------------
/docs/tutorials/claude-code-biomcp-alphagenome.md:
--------------------------------------------------------------------------------

```markdown
# Using Claude Code with BioMCP for AlphaGenome Variant Analysis

This tutorial demonstrates how to use Claude Code with BioMCP to analyze genetic variants using Google DeepMind's AlphaGenome. We'll explore both the MCP server integration and CLI approaches, showing how Claude Code can seamlessly work with both interfaces.

## Prerequisites

- **Claude Code**: Latest version with MCP support
- **Python 3.11+**: Required for BioMCP and AlphaGenome
- **uv**: Modern Python package manager ([installation guide](https://docs.astral.sh/uv/getting-started/installation/))
- **AlphaGenome API Key**: Get free access at [Google DeepMind AlphaGenome](https://deepmind.google.com/science/alphagenome)

## Setup Overview

BioMCP offers two interfaces that work perfectly with Claude Code:

1. **MCP Server**: Integrated directly into Claude Code for seamless workflows
2. **CLI**: Command-line interface for direct terminal access

Both produce identical results, giving you flexibility in how you work.

## Part 1: MCP Server Setup

### Step 1: Install BioMCP CLI

```bash
# Install BioMCP CLI globally (note: biomcp-python, not biomcp!)
uv tool install -q biomcp-python

# Verify installation
biomcp --version
```

### Step 2: Configure MCP Server

Add BioMCP to your Claude Code MCP configuration:

```bash
# Basic setup (requires ALPHAGENOME_API_KEY environment variable)
claude mcp add biomcp -- uv run --with biomcp-python biomcp run

# Or with API key in configuration
claude mcp add biomcp -e ALPHAGENOME_API_KEY=your-api-key-here -- uv run --with biomcp-python biomcp run
```

Verify the setup:

```bash
claude mcp list
claude mcp get biomcp
```

### Step 3: Set Environment Variable

```bash
# Add to your shell profile (~/.zshrc or ~/.bashrc)
export ALPHAGENOME_API_KEY='your-api-key-here'

# Or set per-session
export ALPHAGENOME_API_KEY='your-api-key-here'
```

### Step 4: Install AlphaGenome

```bash
# Clone and install AlphaGenome
git clone https://github.com/google-deepmind/alphagenome.git
cd alphagenome && uv pip install .
```

## Part 2: Testing with Claude Code

### Example: DLG1 Exon Skipping Variant

Let's analyze the variant `chr3:197081044:TACTC>T` from the AlphaGenome paper, which demonstrates exon skipping in the DLG1 gene.

#### Using MCP Server (Recommended)

```python
# Claude Code automatically uses MCP when available
mcp__biomcp__alphagenome_predictor(
    chromosome="chr3",
    position=197081044,
    reference="TACTC",
    alternate="T"
)
```

**Result:**

```markdown
## AlphaGenome Variant Effect Predictions

**Variant**: chr3:197081044 TACTC>T
**Analysis window**: 131,072 bp

### Gene Expression

- **MELTF**: +2.57 log₂ fold change (↑ increases expression)

### Chromatin Accessibility

- **EFO:0005719 DNase-seq**: +17.27 log₂ change (↑ increases accessibility)

### Splicing

- Potential splicing alterations detected

### Summary

- Analyzed 11796 regulatory tracks
- 6045 tracks show substantial changes (|log₂| > 0.5)
```

#### Using CLI Interface

```bash
# Same analysis via CLI
export ALPHAGENOME_API_KEY='your-api-key-here'
uv run biomcp variant predict chr3 197081044 TACTC T
```

**Result:** Identical output to MCP server.

## Part 3: Why Both Interfaces Matter

### MCP Server Advantages 🔌

- **Persistent State**: No need to re-export environment variables
- **Workflow Integration**: Seamless chaining with other biomedical tools
- **Structured Data**: Direct programmatic access to results
- **Auto-Documentation**: Built-in parameter validation

### CLI Advantages 💻

- **Immediate Access**: No server setup required
- **Debugging**: Direct command-line testing
- **Scripting**: Easy integration into bash scripts
- **Standalone Use**: Works without Claude Code

### Claude Code Perspective

As Claude Code, both interfaces work equally well. The **MCP approach provides slight benefits**:

- Results persist across conversation turns
- Built-in error handling and validation
- Automatic integration with thinking and search workflows
- No need to manage environment variables per session

**Trade-off**: MCP requires initial setup, while CLI is immediately available.

## Part 4: Advanced Usage Examples

### Multi-Variant Analysis

```python
# Analyze multiple variants from AlphaGenome paper
variants = [
    ("chr3", 197081044, "TACTC", "T"),      # DLG1 exon skipping
    ("chr21", 46126238, "G", "C"),          # COL6A2 splice junction
    ("chr16", 173694, "A", "G")             # HBA2 polyadenylation
]

for chr, pos, ref, alt in variants:
    result = mcp__biomcp__alphagenome_predictor(
        chromosome=chr,
        position=pos,
        reference=ref,
        alternate=alt
    )
    print(f"Most affected gene: {result}")
```

### Tissue-Specific Analysis

```python
# Analyze with tissue context
mcp__biomcp__alphagenome_predictor(
    chromosome="chr7",
    position=140753336,
    reference="A",
    alternate="T",
    tissue_types=["UBERON:0000310"]  # breast tissue
)
```

### Combined BioMCP Workflow

```python
# 1. First, search for known annotations
variant_data = mcp__biomcp__variant_searcher(gene="BRAF")

# 2. Then predict regulatory effects
regulatory_effects = mcp__biomcp__alphagenome_predictor(
    chromosome="chr7",
    position=140753336,
    reference="A",
    alternate="T"
)

# 3. Search literature for context
literature = mcp__biomcp__article_searcher(
    genes=["BRAF"],
    variants=["V600E"]
)
```

## Part 5: Validation and Quality Assurance

### How We Validated the Integration

1. **Raw API Testing**: Directly tested Google's AlphaGenome API
2. **Source Code Analysis**: Verified BioMCP uses correct API methods (`score_variant` + `get_recommended_scorers`)
3. **Cross-Validation**: Confirmed identical results across all three approaches:
   - Raw Python API: MELTF +2.57 log₂
   - BioMCP CLI: MELTF +2.57 log₂
   - BioMCP MCP: MELTF +2.57 log₂

### Key Scientific Finding

The variant `chr3:197081044:TACTC>T` most strongly affects **MELTF** (+2.57 log₂ fold change), not DLG1 as initially expected. This demonstrates that AlphaGenome considers the full regulatory landscape, not just the nearest gene.

## Part 6: Best Practices

### For MCP Usage

- Use structured thinking with `mcp__biomcp__think` for complex analyses
- Leverage `call_benefit` parameter to improve result quality
- Chain multiple tools for comprehensive variant characterization

### For CLI Usage

- Set `ALPHAGENOME_API_KEY` in your shell profile
- Use `--help` to explore all available parameters
- Combine with other CLI tools via pipes and scripts

### General Tips

- Start with default 131kb analysis window
- Use tissue-specific analysis when relevant
- Validate surprising results with literature search
- Consider both gene expression and chromatin accessibility effects

## Conclusion

BioMCP's dual interface approach (MCP + CLI) provides robust variant analysis capabilities. Claude Code works seamlessly with both, offering flexibility for different workflows. The MCP integration provides slight advantages for interactive analysis, while the CLI excels for scripting and debugging.

The combination of AlphaGenome's predictive power with BioMCP's comprehensive biomedical data access creates a powerful platform for genetic variant analysis and interpretation.

## Resources

- [BioMCP Documentation](https://biomcp.org)
- [AlphaGenome Paper](https://deepmind.google/science/alphagenome)
- [Claude Code MCP Guide](https://docs.anthropic.com/claude/docs/model-context-protocol)
- [uv Documentation](https://docs.astral.sh/uv/)

```

--------------------------------------------------------------------------------
/tests/tdd/articles/test_search.py:
--------------------------------------------------------------------------------

```python
import json
from unittest.mock import patch

import pytest

from biomcp.articles.search import (
    PubmedRequest,
    ResultItem,
    SearchResponse,
    convert_request,
    search_articles,
)


async def test_convert_search_query(anyio_backend):
    pubmed_request = PubmedRequest(
        chemicals=["Caffeine"],
        diseases=["non-small cell lung cancer"],
        genes=["BRAF"],
        variants=["BRAF V600E"],
        keywords=["therapy"],
    )
    pubtator_request = await convert_request(request=pubmed_request)

    # The API may or may not return prefixed entity IDs, so we check for both possibilities
    query_text = pubtator_request.text

    # Keywords should always be first
    assert query_text.startswith("therapy AND ")

    # Check that all terms are present (with or without prefixes)
    assert "Caffeine" in query_text or "@CHEMICAL_Caffeine" in query_text
    assert (
        "non-small cell lung cancer" in query_text.lower()
        or "carcinoma" in query_text.lower()
        or "@DISEASE_" in query_text
    )
    assert "BRAF" in query_text or "@GENE_BRAF" in query_text
    assert (
        "V600E" in query_text
        or "p.V600E" in query_text
        or "@VARIANT_" in query_text
    )

    # All terms should be joined with AND
    assert (
        query_text.count(" AND ") >= 4
    )  # At least 4 AND operators for 5 terms

    # default page request (changed to 10 for token efficiency)
    assert pubtator_request.size == 10


async def test_convert_search_query_with_or_logic(anyio_backend):
    """Test that keywords with pipe separators are converted to OR queries."""
    pubmed_request = PubmedRequest(
        genes=["PTEN"],
        keywords=["R173|Arg173|p.R173", "mutation"],
    )
    pubtator_request = await convert_request(request=pubmed_request)

    query_text = pubtator_request.text

    # Check that OR logic is properly formatted
    assert "(R173 OR Arg173 OR p.R173)" in query_text
    assert "mutation" in query_text
    assert "PTEN" in query_text or "@GENE_PTEN" in query_text

    # Check overall structure
    assert (
        query_text.count(" AND ") >= 2
    )  # At least 2 AND operators for 3 terms


async def test_search(anyio_backend):
    """Test search with real API call - may be flaky due to network dependency.

    This test makes real API calls to PubTator3 and can fail due to:
    - Network connectivity issues (Error 599)
    - API rate limiting
    - Changes in search results over time

    Consider using test_search_mocked for more reliable testing.
    """
    query = {
        "genes": ["BRAF"],
        "diseases": ["NSCLC", "Non - Small Cell Lung Cancer"],
        "keywords": ["BRAF mutations NSCLC"],
        "variants": ["mutation", "mutations"],
    }

    query = PubmedRequest(**query)
    output = await search_articles(query, output_json=True)
    data = json.loads(output)
    assert isinstance(data, list)

    # Handle potential errors - if the first item has an 'error' key, it's an error response
    if data and isinstance(data[0], dict) and "error" in data[0]:
        import pytest

        pytest.skip(f"API returned error: {data[0]['error']}")

    assert len(data) == 10  # Changed from 40 to 10 for token efficiency
    result = ResultItem.model_validate(data[0])
    # todo: this might be flaky.
    assert (
        result.title
        == "[Expert consensus on the diagnosis and treatment in advanced "
        "non-small cell lung cancer with BRAF mutation in China]."
    )


@pytest.mark.asyncio
async def test_search_mocked(anyio_backend):
    """Test search with mocked API response to avoid network dependency."""
    query = {
        "genes": ["BRAF"],
        "diseases": ["NSCLC", "Non - Small Cell Lung Cancer"],
        "keywords": ["BRAF mutations NSCLC"],
        "variants": ["mutation", "mutations"],
    }

    # Create mock response - don't include abstract here as it will be added by add_abstracts
    mock_response = SearchResponse(
        results=[
            ResultItem(
                pmid=37495419,
                title="[Expert consensus on the diagnosis and treatment in advanced "
                "non-small cell lung cancer with BRAF mutation in China].",
                journal="Zhonghua Zhong Liu Za Zhi",
                authors=["Zhang", "Li", "Wang"],
                date="2023-07-23",
                doi="10.3760/cma.j.cn112152-20230314-00115",
            )
            for _ in range(10)  # Create 40 results
        ],
        page_size=10,
        current=1,
        count=10,
        total_pages=1,
    )

    with patch("biomcp.http_client.request_api") as mock_request:
        mock_request.return_value = (mock_response, None)

        # Mock the autocomplete calls
        with patch("biomcp.articles.search.autocomplete") as mock_autocomplete:
            mock_autocomplete.return_value = (
                None  # Simplified - no entity mapping
            )

            # Mock the call_pubtator_api function
            with patch(
                "biomcp.articles.search.call_pubtator_api"
            ) as mock_pubtator:
                from biomcp.articles.fetch import (
                    Article,
                    FetchArticlesResponse,
                    Passage,
                    PassageInfo,
                )

                # Create a mock response with abstracts
                mock_fetch_response = FetchArticlesResponse(
                    PubTator3=[
                        Article(
                            pmid=37495419,
                            passages=[
                                Passage(
                                    text="This is a test abstract about BRAF mutations in NSCLC.",
                                    infons=PassageInfo(
                                        section_type="ABSTRACT"
                                    ),
                                )
                            ],
                        )
                    ]
                )
                mock_pubtator.return_value = (mock_fetch_response, None)

                query_obj = PubmedRequest(**query)
                output = await search_articles(query_obj, output_json=True)
                data = json.loads(output)

                assert isinstance(data, list)
                assert (
                    len(data) == 10
                )  # Changed from 40 to 10 for token efficiency
                result = ResultItem.model_validate(data[0])
                assert (
                    result.title
                    == "[Expert consensus on the diagnosis and treatment in advanced "
                    "non-small cell lung cancer with BRAF mutation in China]."
                )
                assert (
                    result.abstract
                    == "This is a test abstract about BRAF mutations in NSCLC."
                )


@pytest.mark.asyncio
async def test_search_network_error(anyio_backend):
    """Test search handles network errors gracefully."""
    query = PubmedRequest(genes=["BRAF"])

    with patch("biomcp.http_client.request_api") as mock_request:
        from biomcp.http_client import RequestError

        mock_request.return_value = (
            None,
            RequestError(code=599, message="Network connectivity error"),
        )

        output = await search_articles(query, output_json=True)
        data = json.loads(output)

        assert isinstance(data, list)
        assert len(data) == 1
        assert "error" in data[0]
        assert "Error 599: Network connectivity error" in data[0]["error"]

```

--------------------------------------------------------------------------------
/BIOMCP_DATA_FLOW.md:
--------------------------------------------------------------------------------

```markdown
# BioMCP Data Flow Diagram

This document illustrates how BioMCP (Biomedical Model Context Protocol) works, showing the interaction between AI clients, the MCP server, domains, and external data sources.

## High-Level Architecture

```mermaid
graph TB
    subgraph "AI Client Layer"
        AI[AI Assistant<br/>e.g., Claude, GPT]
    end

    subgraph "MCP Server Layer"
        MCP[MCP Server<br/>router.py]
        SEARCH[search tool]
        FETCH[fetch tool]
    end

    subgraph "Domain Routing Layer"
        ROUTER[Query Router]
        PARSER[Query Parser]
        UNIFIED[Unified Query<br/>Language]
    end

    subgraph "Domain Handlers"
        ARTICLES[Articles Domain<br/>Handler]
        TRIALS[Trials Domain<br/>Handler]
        VARIANTS[Variants Domain<br/>Handler]
        THINKING[Thinking Domain<br/>Handler]
    end

    subgraph "External APIs"
        subgraph "Article Sources"
            PUBMED[PubTator3/<br/>PubMed]
            BIORXIV[bioRxiv/<br/>medRxiv]
            EUROPEPMC[Europe PMC]
        end

        subgraph "Clinical Data"
            CLINICALTRIALS[ClinicalTrials.gov]
        end

        subgraph "Variant Sources"
            MYVARIANT[MyVariant.info]
            TCGA[TCGA]
            KG[1000 Genomes]
            CBIO[cBioPortal]
        end
    end

    %% Connections
    AI -->|MCP Protocol| MCP
    MCP --> SEARCH
    MCP --> FETCH

    SEARCH --> ROUTER
    ROUTER --> PARSER
    PARSER --> UNIFIED

    ROUTER --> ARTICLES
    ROUTER --> TRIALS
    ROUTER --> VARIANTS
    ROUTER --> THINKING

    ARTICLES --> PUBMED
    ARTICLES --> BIORXIV
    ARTICLES --> EUROPEPMC
    ARTICLES -.->|Gene enrichment| CBIO

    TRIALS --> CLINICALTRIALS

    VARIANTS --> MYVARIANT
    MYVARIANT --> TCGA
    MYVARIANT --> KG
    VARIANTS --> CBIO

    THINKING -->|Internal| THINKING

    classDef clientClass fill:#e1f5fe,stroke:#01579b,stroke-width:2px
    classDef serverClass fill:#f3e5f5,stroke:#4a148c,stroke-width:2px
    classDef domainClass fill:#e8f5e9,stroke:#1b5e20,stroke-width:2px
    classDef apiClass fill:#fff3e0,stroke:#e65100,stroke-width:2px

    class AI clientClass
    class MCP,SEARCH,FETCH serverClass
    class ARTICLES,TRIALS,VARIANTS,THINKING domainClass
    class PUBMED,BIORXIV,EUROPEPMC,CLINICALTRIALS,MYVARIANT,TCGA,KG,CBIO apiClass
```

## Detailed Search Flow

```mermaid
sequenceDiagram
    participant AI as AI Client
    participant MCP as MCP Server
    participant Router as Query Router
    participant Domain as Domain Handler
    participant API as External API

    AI->>MCP: search(query="gene:BRAF AND disease:melanoma")
    MCP->>Router: Parse & route query

    alt Unified Query
        Router->>Router: Parse field syntax
        Router->>Router: Create routing plan

        par Search Articles
            Router->>Domain: Search articles (BRAF, melanoma)
            Domain->>API: PubTator3 API call
            API-->>Domain: Article results
            Domain->>API: cBioPortal enrichment
            API-->>Domain: Mutation data
        and Search Trials
            Router->>Domain: Search trials (melanoma)
            Domain->>API: ClinicalTrials.gov API
            API-->>Domain: Trial results
        and Search Variants
            Router->>Domain: Search variants (BRAF)
            Domain->>API: MyVariant.info API
            API-->>Domain: Variant results
        end
    else Domain-specific
        Router->>Domain: Direct domain search
        Domain->>API: Single API call
        API-->>Domain: Domain results
    else Sequential Thinking
        Router->>Domain: Process thought
        Domain->>Domain: Update session state
        Domain-->>Router: Thought response
    end

    Domain-->>Router: Formatted results
    Router-->>MCP: Aggregated results
    MCP-->>AI: Standardized response
```

## Search Tool Parameters

```mermaid
graph LR
    subgraph "Search Tool Input"
        PARAMS[Parameters]
        QUERY[query: string]
        DOMAIN[domain: article/trial/variant/thinking]
        GENES[genes: list]
        DISEASES[diseases: list]
        CONDITIONS[conditions: list]
        LAT[lat/long: coordinates]
        THOUGHT[thought parameters]
    end

    subgraph "Search Modes"
        MODE1[Unified Query Mode<br/>Uses 'query' param]
        MODE2[Domain-Specific Mode<br/>Uses domain + params]
        MODE3[Thinking Mode<br/>Uses thought params]
    end

    PARAMS --> MODE1
    PARAMS --> MODE2
    PARAMS --> MODE3
```

## Domain-Specific Data Sources

```mermaid
graph TD
    subgraph "Articles Domain"
        A1[PubTator3/PubMed<br/>- Published articles<br/>- Annotations]
        A2[bioRxiv/medRxiv<br/>- Preprints<br/>- Early research]
        A3[Europe PMC<br/>- Open access<br/>- Full text]
        A4[cBioPortal Integration<br/>- Auto-enrichment when genes specified<br/>- Mutation summaries & hotspots]
    end

    subgraph "Trials Domain"
        T1[ClinicalTrials.gov<br/>- Active trials<br/>- Trial details<br/>- Location search]
    end

    subgraph "Variants Domain"
        V1[MyVariant.info<br/>- Variant annotations<br/>- Clinical significance]
        V2[TCGA<br/>- Cancer variants<br/>- Somatic mutations]
        V3[1000 Genomes<br/>- Population frequency<br/>- Allele data]
        V4[cBioPortal<br/>- Cancer mutations<br/>- Hotspots]
    end

    A1 -.->|When genes present| A4
    A2 -.->|When genes present| A4
    A3 -.->|When genes present| A4
```

## Unified Query Language

```mermaid
graph TD
    QUERY[Unified Query<br/>"gene:BRAF AND disease:melanoma"]

    QUERY --> PARSE[Query Parser]

    PARSE --> F1[Field: gene<br/>Value: BRAF]
    PARSE --> F2[Field: disease<br/>Value: melanoma]

    F1 --> D1[Articles Domain]
    F1 --> D2[Variants Domain]
    F2 --> D1
    F2 --> D3[Trials Domain]

    D1 --> R1[PubMed Results]
    D2 --> R2[Variant Results]
    D3 --> R3[Trial Results]

    R1 --> AGG[Aggregated Results]
    R2 --> AGG
    R3 --> AGG
```

## Example: Location-Based Trial Search

```mermaid
sequenceDiagram
    participant User as User
    participant AI as AI Client
    participant MCP as BioMCP
    participant GEO as Geocoding Service
    participant CT as ClinicalTrials.gov

    User->>AI: Find active trials in Cleveland for NSCLC
    AI->>AI: Recognize location needs geocoding
    AI->>GEO: Geocode "Cleveland"
    GEO-->>AI: lat: 41.4993, long: -81.6944

    AI->>MCP: search(domain="trial",<br/>diseases=["NSCLC"],<br/>lat=41.4993,<br/>long=-81.6944,<br/>distance=50)

    MCP->>CT: API call with geo filter
    CT-->>MCP: Trials near Cleveland
    MCP-->>AI: Formatted trial results
    AI-->>User: Here are X active NSCLC trials in Cleveland area
```

## Key Features

1. **Parallel Execution**: Multiple domains are searched simultaneously for unified queries
2. **Smart Enrichment**: Article searches automatically include cBioPortal mutation summaries when genes are specified, providing clinical context alongside literature results
3. **Location Awareness**: Trial searches support geographic filtering with lat/long coordinates
4. **Sequential Thinking**: Built-in reasoning system for complex biomedical questions
5. **Standardized Output**: All results follow OpenAI MCP format for consistency

## Response Format

All search results follow this standardized structure:

```json
{
  "results": [
    {
      "id": "PMID12345678",
      "title": "BRAF V600E mutation in melanoma",
      "text": "This study investigates BRAF mutations...",
      "url": "https://pubmed.ncbi.nlm.nih.gov/12345678"
    }
  ]
}
```

Fetch results include additional domain-specific metadata in the response.

```

--------------------------------------------------------------------------------
/src/biomcp/openfda/drug_labels_helpers.py:
--------------------------------------------------------------------------------

```python
"""
Helper functions for OpenFDA drug labels to reduce complexity.
"""

from typing import Any

from .input_validation import sanitize_input
from .utils import clean_text, extract_drug_names, truncate_text


def build_label_search_query(
    name: str | None,
    indication: str | None,
    boxed_warning: bool,
    section: str | None,
) -> str:
    """Build the search query for drug labels."""
    search_parts = []

    if name:
        # Sanitize input to prevent injection
        name = sanitize_input(name, max_length=100)

    if name:
        name_query = (
            f'(openfda.brand_name:"{name}" OR '
            f'openfda.generic_name:"{name}" OR '
            f'openfda.substance_name:"{name}")'
        )
        search_parts.append(name_query)

    if indication:
        # Sanitize indication input
        indication = sanitize_input(indication, max_length=200)
        if indication:
            search_parts.append(f'indications_and_usage:"{indication}"')

    if boxed_warning:
        search_parts.append("_exists_:boxed_warning")

    if section:
        # Map common section names to FDA fields
        section_map = {
            "indications": "indications_and_usage",
            "dosage": "dosage_and_administration",
            "contraindications": "contraindications",
            "warnings": "warnings_and_precautions",
            "adverse": "adverse_reactions",
            "interactions": "drug_interactions",
            "pregnancy": "pregnancy",
            "pediatric": "pediatric_use",
            "geriatric": "geriatric_use",
            "overdose": "overdosage",
        }
        field_name = section_map.get(section.lower(), section)
        search_parts.append(f"_exists_:{field_name}")

    return " AND ".join(search_parts)


def format_label_summary(result: dict[str, Any], index: int) -> list[str]:
    """Format a single drug label summary."""
    output = []

    # Extract drug names
    drug_names = extract_drug_names(result)
    primary_name = drug_names[0] if drug_names else "Unknown Drug"

    output.append(f"#### {index}. {primary_name}")

    # Get OpenFDA data
    openfda = result.get("openfda", {})

    # Show all names if multiple
    if len(drug_names) > 1:
        output.append(f"**Also known as**: {', '.join(drug_names[1:])}")

    # Basic info
    output.extend(_format_label_basic_info(openfda))

    # Boxed warning
    if "boxed_warning" in result:
        warning_text = clean_text(" ".join(result["boxed_warning"]))
        output.append(
            f"\n⚠️ **BOXED WARNING**: {truncate_text(warning_text, 200)}"
        )

    # Key sections
    output.extend(_format_label_key_sections(result))

    # Set ID for retrieval
    if "set_id" in result:
        output.append(f"\n*Label ID: {result['set_id']}*")

    output.append("")
    return output


def _format_label_basic_info(openfda: dict) -> list[str]:
    """Format basic label information from OpenFDA data."""
    output = []

    # Application number
    if app_numbers := openfda.get("application_number", []):
        output.append(f"**FDA Application**: {app_numbers[0]}")

    # Manufacturer
    if manufacturers := openfda.get("manufacturer_name", []):
        output.append(f"**Manufacturer**: {manufacturers[0]}")

    # Route
    if routes := openfda.get("route", []):
        output.append(f"**Route**: {', '.join(routes)}")

    return output


def _format_label_key_sections(result: dict) -> list[str]:
    """Format key label sections."""
    output = []

    # Indications
    if "indications_and_usage" in result:
        indications_text = clean_text(
            " ".join(result["indications_and_usage"])
        )
        output.append(
            f"\n**Indications**: {truncate_text(indications_text, 300)}"
        )

    # Contraindications
    if "contraindications" in result:
        contra_text = clean_text(" ".join(result["contraindications"]))
        output.append(
            f"\n**Contraindications**: {truncate_text(contra_text, 200)}"
        )

    return output


def format_label_header(result: dict[str, Any], set_id: str) -> list[str]:
    """Format the header for detailed drug label."""
    output = []

    drug_names = extract_drug_names(result)
    primary_name = drug_names[0] if drug_names else "Unknown Drug"

    output.append(f"## FDA Drug Label: {primary_name}\n")

    # Basic information
    openfda = result.get("openfda", {})

    if len(drug_names) > 1:
        output.append(f"**Other Names**: {', '.join(drug_names[1:])}")

    output.extend(_format_detailed_metadata(openfda))
    output.append(f"**Label ID**: {set_id}\n")

    return output


def _format_detailed_metadata(openfda: dict) -> list[str]:
    """Format detailed metadata from OpenFDA."""
    output = []

    # FDA application numbers
    if app_numbers := openfda.get("application_number", []):
        output.append(f"**FDA Application**: {', '.join(app_numbers)}")

    # Manufacturers
    if manufacturers := openfda.get("manufacturer_name", []):
        output.append(f"**Manufacturer**: {', '.join(manufacturers)}")

    # Routes of administration
    if routes := openfda.get("route", []):
        output.append(f"**Route of Administration**: {', '.join(routes)}")

    # Pharmacologic class
    if pharm_classes := openfda.get("pharm_class_epc", []):
        output.append(f"**Pharmacologic Class**: {', '.join(pharm_classes)}")

    return output


def format_label_section(
    result: dict[str, Any], section: str, section_titles: dict[str, str]
) -> list[str]:
    """Format a single label section."""
    output: list[str] = []

    if section not in result:
        return output

    title = section_titles.get(section, section.upper().replace("_", " "))
    output.append(f"### {title}\n")

    section_text = result[section]
    if isinstance(section_text, list):
        section_text = " ".join(section_text)

    cleaned_text = clean_text(section_text)

    # For very long sections, provide a truncated version
    if len(cleaned_text) > 3000:
        output.append(truncate_text(cleaned_text, 3000))
        output.append("\n*[Section truncated for brevity]*")
    else:
        output.append(cleaned_text)

    output.append("")
    return output


def get_default_sections() -> list[str]:
    """Get the default sections to display."""
    return [
        "indications_and_usage",
        "dosage_and_administration",
        "contraindications",
        "warnings_and_precautions",
        "adverse_reactions",
        "drug_interactions",
        "use_in_specific_populations",
        "clinical_pharmacology",
        "clinical_studies",
    ]


def get_section_titles() -> dict[str, str]:
    """Get the mapping of section names to display titles."""
    return {
        "indications_and_usage": "INDICATIONS AND USAGE",
        "dosage_and_administration": "DOSAGE AND ADMINISTRATION",
        "contraindications": "CONTRAINDICATIONS",
        "warnings_and_precautions": "WARNINGS AND PRECAUTIONS",
        "adverse_reactions": "ADVERSE REACTIONS",
        "drug_interactions": "DRUG INTERACTIONS",
        "use_in_specific_populations": "USE IN SPECIFIC POPULATIONS",
        "clinical_pharmacology": "CLINICAL PHARMACOLOGY",
        "clinical_studies": "CLINICAL STUDIES",
        "how_supplied": "HOW SUPPLIED",
        "storage_and_handling": "STORAGE AND HANDLING",
        "patient_counseling_information": "PATIENT COUNSELING INFORMATION",
        "pregnancy": "PREGNANCY",
        "nursing_mothers": "NURSING MOTHERS",
        "pediatric_use": "PEDIATRIC USE",
        "geriatric_use": "GERIATRIC USE",
        "overdosage": "OVERDOSAGE",
    }

```

--------------------------------------------------------------------------------
/tests/tdd/test_drug_shortages.py:
--------------------------------------------------------------------------------

```python
"""Tests for FDA drug shortages module."""

from datetime import datetime
from unittest.mock import AsyncMock, patch

import pytest

from biomcp.openfda.drug_shortages import (
    get_drug_shortage,
    search_drug_shortages,
)


class TestDrugShortages:
    """Test drug shortages functionality."""

    @pytest.mark.asyncio
    async def test_search_drug_shortages_no_data_available(self):
        """Test drug shortage search when FDA data is unavailable."""
        with patch(
            "biomcp.openfda.drug_shortages._get_cached_shortage_data",
            new_callable=AsyncMock,
        ) as mock_get_data:
            mock_get_data.return_value = None

            result = await search_drug_shortages(drug="cisplatin")

            assert "Drug Shortage Data Temporarily Unavailable" in result
            assert "FDA drug shortage database cannot be accessed" in result
            assert (
                "https://www.accessdata.fda.gov/scripts/drugshortages/"
                in result
            )
            assert (
                "https://www.ashp.org/drug-shortages/current-shortages"
                in result
            )

    @pytest.mark.asyncio
    async def test_get_drug_shortage_no_data_available(self):
        """Test getting specific drug shortage when FDA data is unavailable."""
        with patch(
            "biomcp.openfda.drug_shortages._get_cached_shortage_data",
            new_callable=AsyncMock,
        ) as mock_get_data:
            mock_get_data.return_value = None

            result = await get_drug_shortage("cisplatin")

            assert "Drug Shortage Data Temporarily Unavailable" in result
            assert "FDA drug shortage database cannot be accessed" in result
            assert "Alternative Options:" in result

    @pytest.mark.asyncio
    async def test_mock_data_not_used_in_production(self):
        """Test that mock data is never returned in production scenarios."""
        with patch(
            "biomcp.openfda.drug_shortages._get_cached_shortage_data",
            new_callable=AsyncMock,
        ) as mock_get_data:
            # Simulate no data available (cache miss and fetch failure)
            mock_get_data.return_value = None

            result = await search_drug_shortages(drug="test")

            assert "Drug Shortage Data Temporarily Unavailable" in result
            # Ensure mock data is not present
            assert "Cisplatin Injection" not in result
            assert "Methotrexate" not in result

    # Cache functionality test removed - was testing private implementation details
    # The public API is tested through search_drug_shortages and get_drug_shortage

    # Cache expiry test removed - was testing private implementation details
    # The caching behavior is an implementation detail not part of the public API

    @pytest.mark.asyncio
    async def test_search_with_filters(self):
        """Test drug shortage search with various filters."""
        mock_data = {
            "_fetched_at": datetime.now().isoformat(),
            "shortages": [
                {
                    "generic_name": "Drug A",
                    "brand_names": ["Brand A"],
                    "status": "Current Shortage",
                    "therapeutic_category": "Oncology",
                },
                {
                    "generic_name": "Drug B",
                    "brand_names": ["Brand B"],
                    "status": "Resolved",
                    "therapeutic_category": "Cardiology",
                },
                {
                    "generic_name": "Drug C",
                    "brand_names": ["Brand C"],
                    "status": "Current Shortage",
                    "therapeutic_category": "Oncology",
                },
            ],
        }

        with patch(
            "biomcp.openfda.drug_shortages._get_cached_shortage_data",
            new_callable=AsyncMock,
        ) as mock_get_data:
            mock_get_data.return_value = mock_data

            # Test status filter
            result = await search_drug_shortages(status="current")
            assert "Drug A" in result
            assert "Drug C" in result
            assert "Drug B" not in result

            # Test therapeutic category filter
            result = await search_drug_shortages(
                therapeutic_category="Oncology"
            )
            assert "Drug A" in result
            assert "Drug C" in result
            assert "Drug B" not in result

            # Test drug name filter
            result = await search_drug_shortages(drug="Drug B")
            assert "Drug B" in result
            assert "Drug A" not in result

    @pytest.mark.asyncio
    async def test_get_specific_drug_shortage(self):
        """Test getting details for a specific drug shortage."""
        mock_data = {
            "_fetched_at": datetime.now().isoformat(),
            "shortages": [
                {
                    "generic_name": "Cisplatin Injection",
                    "brand_names": ["Platinol"],
                    "status": "Current Shortage",
                    "shortage_start_date": "2023-02-10",
                    "estimated_resolution": "Q2 2024",
                    "reason": "Manufacturing delays",
                    "therapeutic_category": "Oncology",
                    "notes": "Limited supplies available",
                },
            ],
        }

        with patch(
            "biomcp.openfda.drug_shortages._get_cached_shortage_data",
            new_callable=AsyncMock,
        ) as mock_get_data:
            mock_get_data.return_value = mock_data

            result = await get_drug_shortage("cisplatin")

            assert "Cisplatin Injection" in result
            assert "Current Shortage" in result
            assert "Manufacturing delays" in result
            assert "Oncology" in result
            assert "Limited supplies available" in result

    @pytest.mark.asyncio
    async def test_get_drug_shortage_not_found(self):
        """Test getting drug shortage for non-existent drug."""
        mock_data = {
            "_fetched_at": datetime.now().isoformat(),
            "shortages": [
                {
                    "generic_name": "Drug A",
                    "status": "Current Shortage",
                },
            ],
        }

        with patch(
            "biomcp.openfda.drug_shortages._get_cached_shortage_data",
            new_callable=AsyncMock,
        ) as mock_get_data:
            mock_get_data.return_value = mock_data

            result = await get_drug_shortage("nonexistent-drug")

            assert "No shortage information found" in result
            assert "nonexistent-drug" in result

    @pytest.mark.asyncio
    async def test_api_key_parameter_ignored(self):
        """Test that API key parameter is accepted but not used (FDA limitation)."""
        mock_data = {
            "_fetched_at": datetime.now().isoformat(),
            "shortages": [
                {
                    "generic_name": "Test Drug",
                    "status": "Current Shortage",
                    "therapeutic_category": "Test Category",
                }
            ],
        }

        with patch(
            "biomcp.openfda.drug_shortages._get_cached_shortage_data",
            new_callable=AsyncMock,
        ) as mock_get_data:
            mock_get_data.return_value = mock_data

            # API key should be accepted but not affect functionality
            result = await search_drug_shortages(
                drug="test",
                api_key="test-key",
            )

            # When there's data, it should format results
            assert "FDA Drug Shortage Information" in result
            assert "Test Drug" in result

    # Mock data function has been removed - no longer needed

```

--------------------------------------------------------------------------------
/tests/tdd/thinking/test_sequential.py:
--------------------------------------------------------------------------------

```python
"""Tests for sequential thinking functionality."""

from datetime import datetime

import pytest

from biomcp.thinking import sequential
from biomcp.thinking.session import ThoughtEntry, _session_manager


@pytest.fixture(autouse=True)
def clear_thinking_state():
    """Clear thinking state before each test."""
    _session_manager.clear_all_sessions()
    yield
    _session_manager.clear_all_sessions()


class TestSequentialThinking:
    """Test the sequential thinking MCP tool."""

    @pytest.mark.anyio
    async def test_basic_sequential_thinking(self):
        """Test basic sequential thinking flow."""
        result = await sequential._sequential_thinking(
            thought="First step: analyze the problem",
            nextThoughtNeeded=True,
            thoughtNumber=1,
            totalThoughts=3,
        )

        assert "Added thought 1 to main sequence" in result
        assert "Progress: 1/3 thoughts" in result
        assert "Next thought needed" in result

        # Get current session
        session = _session_manager.get_session()
        assert session is not None
        assert len(session.thought_history) == 1

        # Verify thought structure
        thought = session.thought_history[0]
        assert thought.thought == "First step: analyze the problem"
        assert thought.thought_number == 1
        assert thought.total_thoughts == 3
        assert thought.next_thought_needed is True
        assert thought.is_revision is False

    @pytest.mark.anyio
    async def test_multiple_sequential_thoughts(self):
        """Test adding multiple thoughts in sequence."""
        # Add first thought
        await sequential._sequential_thinking(
            thought="First step",
            nextThoughtNeeded=True,
            thoughtNumber=1,
            totalThoughts=3,
        )

        # Add second thought
        await sequential._sequential_thinking(
            thought="Second step",
            nextThoughtNeeded=True,
            thoughtNumber=2,
            totalThoughts=3,
        )

        # Add final thought
        result = await sequential._sequential_thinking(
            thought="Final step",
            nextThoughtNeeded=False,
            thoughtNumber=3,
            totalThoughts=3,
        )

        assert "Added thought 3 to main sequence" in result
        assert "Thinking sequence complete" in result
        session = _session_manager.get_session()
        assert len(session.thought_history) == 3

    @pytest.mark.anyio
    async def test_thought_revision(self):
        """Test revising a previous thought."""
        # Add initial thought
        await sequential._sequential_thinking(
            thought="Initial analysis",
            nextThoughtNeeded=True,
            thoughtNumber=1,
            totalThoughts=2,
        )

        # Revise the thought
        result = await sequential._sequential_thinking(
            thought="Better analysis",
            nextThoughtNeeded=True,
            thoughtNumber=1,
            totalThoughts=2,
            isRevision=True,
            revisesThought=1,
        )

        assert "Revised thought 1" in result
        session = _session_manager.get_session()
        assert len(session.thought_history) == 1
        assert session.thought_history[0].thought == "Better analysis"
        assert session.thought_history[0].is_revision is True

    @pytest.mark.anyio
    async def test_branching_logic(self):
        """Test creating thought branches."""
        # Add main sequence thoughts
        await sequential._sequential_thinking(
            thought="Main thought 1",
            nextThoughtNeeded=True,
            thoughtNumber=1,
            totalThoughts=3,
        )

        await sequential._sequential_thinking(
            thought="Main thought 2",
            nextThoughtNeeded=True,
            thoughtNumber=2,
            totalThoughts=3,
        )

        # Create a branch
        result = await sequential._sequential_thinking(
            thought="Alternative approach",
            nextThoughtNeeded=True,
            thoughtNumber=3,
            totalThoughts=3,
            branchFromThought=2,
        )

        assert "Added thought 3 to branch 'branch_2'" in result
        session = _session_manager.get_session()
        assert len(session.thought_history) == 2
        assert len(session.thought_branches) == 1
        assert "branch_2" in session.thought_branches
        assert len(session.thought_branches["branch_2"]) == 1

    @pytest.mark.anyio
    async def test_validation_errors(self):
        """Test input validation errors."""
        # Test invalid thought number
        result = await sequential._sequential_thinking(
            thought="Test",
            nextThoughtNeeded=False,
            thoughtNumber=0,
            totalThoughts=1,
        )
        assert "thoughtNumber must be >= 1" in result

        # Test invalid total thoughts
        result = await sequential._sequential_thinking(
            thought="Test",
            nextThoughtNeeded=False,
            thoughtNumber=1,
            totalThoughts=0,
        )
        assert "totalThoughts must be >= 1" in result

        # Test revision without specifying which thought
        result = await sequential._sequential_thinking(
            thought="Test",
            nextThoughtNeeded=False,
            thoughtNumber=1,
            totalThoughts=1,
            isRevision=True,
        )
        assert (
            "revisesThought must be specified when isRevision=True" in result
        )

    @pytest.mark.anyio
    async def test_needs_more_thoughts(self):
        """Test the needsMoreThoughts parameter."""
        result = await sequential._sequential_thinking(
            thought="This problem is more complex than expected",
            nextThoughtNeeded=True,
            thoughtNumber=3,
            totalThoughts=3,
            needsMoreThoughts=True,
        )

        assert "Added thought 3 to main sequence" in result
        session = _session_manager.get_session()
        assert len(session.thought_history) == 1
        assert (
            session.thought_history[0].metadata.get("needsMoreThoughts")
            is True
        )


class TestUtilityFunctions:
    """Test utility functions."""

    def test_get_current_timestamp(self):
        """Test timestamp generation."""
        timestamp = sequential.get_current_timestamp()
        assert isinstance(timestamp, str)
        # Should be able to parse as ISO format
        parsed = datetime.fromisoformat(
            timestamp.replace("Z", "+00:00").replace("T", " ").split(".")[0]
        )
        assert isinstance(parsed, datetime)

    def test_session_management(self):
        """Test session management functionality."""
        # Clear any existing sessions
        _session_manager.clear_all_sessions()

        # Create a new session
        session = _session_manager.create_session()
        assert session is not None
        assert session.session_id is not None

        # Add a thought entry
        entry = ThoughtEntry(
            thought="Test thought",
            thought_number=1,
            total_thoughts=1,
            next_thought_needed=False,
        )
        session.add_thought(entry)
        assert len(session.thought_history) == 1
        assert session.thought_history[0].thought == "Test thought"

        # Test branch creation
        branch_entry = ThoughtEntry(
            thought="Branch thought",
            thought_number=2,
            total_thoughts=2,
            next_thought_needed=False,
            branch_id="test-branch",
            branch_from_thought=1,
        )
        session.add_thought(branch_entry)
        assert len(session.thought_branches) == 1
        assert "test-branch" in session.thought_branches
        assert len(session.thought_branches["test-branch"]) == 1

```

--------------------------------------------------------------------------------
/tests/tdd/openfda/test_drug_labels.py:
--------------------------------------------------------------------------------

```python
"""
Unit tests for OpenFDA drug labels integration.
"""

from unittest.mock import patch

import pytest

from biomcp.openfda.drug_labels import get_drug_label, search_drug_labels


@pytest.mark.asyncio
async def test_search_drug_labels_by_name():
    """Test searching drug labels by name."""
    mock_response = {
        "meta": {"results": {"total": 5}},
        "results": [
            {
                "set_id": "abc123",
                "openfda": {
                    "brand_name": ["KEYTRUDA"],
                    "generic_name": ["PEMBROLIZUMAB"],
                    "application_number": ["BLA125514"],
                    "manufacturer_name": ["MERCK"],
                    "route": ["INTRAVENOUS"],
                },
                "indications_and_usage": [
                    "KEYTRUDA is indicated for the treatment of patients with unresectable or metastatic melanoma."
                ],
                "boxed_warning": [
                    "Immune-mediated adverse reactions can occur."
                ],
            }
        ],
    }

    with patch(
        "biomcp.openfda.drug_labels.make_openfda_request"
    ) as mock_request:
        mock_request.return_value = (mock_response, None)

        result = await search_drug_labels(name="pembrolizumab", limit=10)

        # Verify request
        mock_request.assert_called_once()
        call_args = mock_request.call_args
        assert "pembrolizumab" in call_args[0][1]["search"].lower()

        # Check output
        assert "FDA Drug Labels" in result
        assert "KEYTRUDA" in result
        assert "PEMBROLIZUMAB" in result
        assert "melanoma" in result
        assert "BOXED WARNING" in result
        assert "Immune-mediated" in result
        assert "abc123" in result


@pytest.mark.asyncio
async def test_search_drug_labels_by_indication():
    """Test searching drug labels by indication."""
    mock_response = {
        "meta": {"results": {"total": 10}},
        "results": [
            {
                "set_id": "xyz789",
                "openfda": {
                    "brand_name": ["DRUG X"],
                    "generic_name": ["GENERIC X"],
                },
                "indications_and_usage": [
                    "Indicated for breast cancer treatment"
                ],
            }
        ],
    }

    with patch(
        "biomcp.openfda.drug_labels.make_openfda_request"
    ) as mock_request:
        mock_request.return_value = (mock_response, None)

        result = await search_drug_labels(indication="breast cancer")

        # Verify request
        call_args = mock_request.call_args
        assert "breast cancer" in call_args[0][1]["search"].lower()

        # Check output
        assert "breast cancer" in result
        assert "10 labels" in result


@pytest.mark.asyncio
async def test_search_drug_labels_no_params():
    """Test that searching without parameters returns helpful message."""
    result = await search_drug_labels()

    assert "Please specify" in result
    assert "drug name, indication, or label section" in result
    assert "Examples:" in result


@pytest.mark.asyncio
async def test_search_drug_labels_boxed_warning_filter():
    """Test filtering for drugs with boxed warnings."""
    mock_response = {
        "meta": {"results": {"total": 3}},
        "results": [
            {
                "set_id": "warn123",
                "openfda": {"brand_name": ["WARNING DRUG"]},
                "boxed_warning": ["Serious warning text"],
            }
        ],
    }

    with patch(
        "biomcp.openfda.drug_labels.make_openfda_request"
    ) as mock_request:
        mock_request.return_value = (mock_response, None)

        result = await search_drug_labels(boxed_warning=True)

        # Verify boxed warning filter in search
        call_args = mock_request.call_args
        assert "_exists_:boxed_warning" in call_args[0][1]["search"]

        # Check output
        assert "WARNING DRUG" in result
        assert "Serious warning" in result


@pytest.mark.asyncio
async def test_get_drug_label_detail():
    """Test getting detailed drug label."""
    mock_response = {
        "results": [
            {
                "set_id": "detail123",
                "openfda": {
                    "brand_name": ["DETAILED DRUG"],
                    "generic_name": ["GENERIC DETAILED"],
                    "application_number": ["NDA123456"],
                    "manufacturer_name": ["PHARMA CORP"],
                    "route": ["ORAL"],
                    "pharm_class_epc": ["KINASE INHIBITOR"],
                },
                "boxed_warning": ["Serious boxed warning"],
                "indications_and_usage": ["Indicated for cancer"],
                "dosage_and_administration": ["Take once daily"],
                "contraindications": ["Do not use if allergic"],
                "warnings_and_precautions": ["Monitor liver function"],
                "adverse_reactions": ["Common: nausea, fatigue"],
                "drug_interactions": ["Avoid with CYP3A4 inhibitors"],
                "clinical_pharmacology": ["Mechanism of action details"],
                "clinical_studies": ["Phase 3 trial results"],
            }
        ]
    }

    with patch(
        "biomcp.openfda.drug_labels.make_openfda_request"
    ) as mock_request:
        mock_request.return_value = (mock_response, None)

        result = await get_drug_label("detail123")

        # Verify request
        mock_request.assert_called_once()
        call_args = mock_request.call_args
        assert "detail123" in call_args[0][1]["search"]

        # Check detailed output
        assert "DETAILED DRUG" in result
        assert "GENERIC DETAILED" in result
        assert "NDA123456" in result
        assert "PHARMA CORP" in result
        assert "ORAL" in result
        assert "KINASE INHIBITOR" in result
        assert "BOXED WARNING" in result
        assert "Serious boxed warning" in result
        assert "INDICATIONS AND USAGE" in result
        assert "Indicated for cancer" in result
        assert "DOSAGE AND ADMINISTRATION" in result
        assert "Take once daily" in result
        assert "CONTRAINDICATIONS" in result
        assert "WARNINGS AND PRECAUTIONS" in result
        assert "ADVERSE REACTIONS" in result
        assert "DRUG INTERACTIONS" in result


@pytest.mark.asyncio
async def test_get_drug_label_specific_sections():
    """Test getting specific sections of drug label."""
    mock_response = {
        "results": [
            {
                "set_id": "section123",
                "openfda": {"brand_name": ["SECTION DRUG"]},
                "indications_and_usage": ["Cancer indication"],
                "adverse_reactions": ["Side effects list"],
                "clinical_studies": ["Study data"],
            }
        ]
    }

    with patch(
        "biomcp.openfda.drug_labels.make_openfda_request"
    ) as mock_request:
        mock_request.return_value = (mock_response, None)

        sections = ["indications_and_usage", "adverse_reactions"]
        result = await get_drug_label("section123", sections)

        # Check that requested sections are included
        assert "INDICATIONS AND USAGE" in result
        assert "Cancer indication" in result
        assert "ADVERSE REACTIONS" in result
        assert "Side effects list" in result
        # Clinical studies should not be in output since not requested
        assert "CLINICAL STUDIES" not in result


@pytest.mark.asyncio
async def test_get_drug_label_not_found():
    """Test handling when drug label is not found."""
    with patch(
        "biomcp.openfda.drug_labels.make_openfda_request"
    ) as mock_request:
        mock_request.return_value = ({"results": []}, None)

        result = await get_drug_label("NOTFOUND456")

        assert "NOTFOUND456" in result
        assert "not found" in result

```

--------------------------------------------------------------------------------
/docs/getting-started/03-authentication-and-api-keys.md:
--------------------------------------------------------------------------------

```markdown
# Authentication and API Keys

BioMCP integrates with multiple biomedical databases. While many features work without authentication, some advanced capabilities require API keys for enhanced functionality.

## Overview of API Keys

| Service         | Required?  | Features Enabled                                  | Get Key                                                                |
| --------------- | ---------- | ------------------------------------------------- | ---------------------------------------------------------------------- |
| **NCI API**     | Optional   | Advanced clinical trial filters, biomarker search | [api.cancer.gov](https://api.cancer.gov)                               |
| **AlphaGenome** | Required\* | Variant effect predictions                        | [deepmind.google.com](https://deepmind.google.com/science/alphagenome) |
| **cBioPortal**  | Optional   | Enhanced cancer genomics queries                  | [cbioportal.org](https://www.cbioportal.org/webAPI)                    |

\*Required only when using AlphaGenome features

## Setting Up API Keys

### Method 1: Environment Variables (Recommended for Personal Use)

Set environment variables in your shell configuration:

```bash
# Add to ~/.bashrc, ~/.zshrc, or equivalent
export NCI_API_KEY="your-nci-api-key"
export ALPHAGENOME_API_KEY="your-alphagenome-key"
export CBIO_TOKEN="your-cbioportal-token"
```

### Method 2: Configuration Files

#### For Claude Desktop

Add keys to your Claude Desktop configuration:

```json
{
  "mcpServers": {
    "biomcp": {
      "command": "uv",
      "args": ["run", "--with", "biomcp-python", "biomcp", "run"],
      "env": {
        "NCI_API_KEY": "your-nci-api-key",
        "ALPHAGENOME_API_KEY": "your-alphagenome-key",
        "CBIO_TOKEN": "your-cbioportal-token"
      }
    }
  }
}
```

#### For Docker Deployments

Include in your Docker run command:

```bash
docker run -e NCI_API_KEY="your-key" \
           -e ALPHAGENOME_API_KEY="your-key" \
           -e CBIO_TOKEN="your-token" \
           biomcp:latest
```

### Method 3: Per-Request Keys (For Hosted Environments)

When using BioMCP through AI assistants or hosted services, provide keys in your request:

```
"Predict effects of BRAF V600E mutation. My AlphaGenome API key is YOUR_KEY_HERE"
```

The AI will recognize patterns like "My [service] API key is..." and use the key for that request only.

## Individual Service Setup

### NCI Clinical Trials API

The National Cancer Institute API provides advanced clinical trial search capabilities.

#### Getting Your Key

1. Visit [api.cancer.gov](https://api.cancer.gov)
2. Click "Get API Key"
3. Complete registration
4. Key is emailed immediately

#### Features Enabled

- Advanced biomarker-based trial search
- Organization and investigator lookups
- Intervention and disease vocabularies
- Higher rate limits (1000 requests/day vs 100)

#### Usage Example

```bash
# With API key set
export NCI_API_KEY="your-key"

# Search trials with biomarker criteria
biomcp trial search --condition melanoma --source nci \
  --required-mutations "BRAF V600E" --allow-brain-mets true
```

### AlphaGenome

Google DeepMind's AlphaGenome predicts variant effects on gene expression and chromatin accessibility.

#### Getting Your Key

1. Visit [AlphaGenome Portal](https://deepmind.google.com/science/alphagenome)
2. Register for non-commercial use
3. Receive API key via email
4. Accept terms of service

#### Features Enabled

- Gene expression predictions
- Chromatin accessibility analysis
- Splicing effect predictions
- Tissue-specific analyses

#### Usage Examples

**CLI with environment variable:**

```bash
export ALPHAGENOME_API_KEY="your-key"
biomcp variant predict chr7 140753336 A T
```

**CLI with per-request key:**

```bash
biomcp variant predict chr7 140753336 A T --api-key YOUR_KEY
```

**Through AI assistant:**

```
"Predict regulatory effects of BRAF V600E (chr7:140753336 A>T).
My AlphaGenome API key is YOUR_KEY_HERE"
```

### cBioPortal

The cBioPortal token enables enhanced cancer genomics queries.

#### Getting Your Token

1. Create account at [cbioportal.org](https://www.cbioportal.org)
2. Navigate to "Web API" section
3. Generate a personal access token
4. Copy the token (shown only once)

#### Features Enabled

- Higher API rate limits
- Access to private studies (if authorized)
- Batch query capabilities
- Extended timeout limits

#### Usage

cBioPortal integration is automatic when searching for genes. The token enables:

```bash
# Enhanced gene search with cancer genomics
export CBIO_TOKEN="your-token"
biomcp article search --gene BRAF --disease melanoma
```

## Security Best Practices

### DO:

- Store keys in environment variables or secure config files
- Use per-request keys in shared/hosted environments
- Rotate keys periodically
- Use separate keys for development/production

### DON'T:

- Commit keys to version control
- Share keys with others
- Include keys in code or documentation
- Store keys in plain text files

### Git Security

Add to `.gitignore`:

```
.env
.env.local
*.key
config/secrets/
```

Use git-secrets to prevent accidental commits:

```bash
# Install git-secrets
brew install git-secrets  # macOS
# or follow instructions at github.com/awslabs/git-secrets

# Set up in your repo
git secrets --install
git secrets --register-aws  # Detects common key patterns
```

## Troubleshooting

### "API Key Required" Errors

**For AlphaGenome:**

- This service always requires a key
- Provide it via environment variable or per-request
- Check key spelling and format

**For NCI:**

- Basic search works without key
- Advanced features require authentication
- Verify key is active at api.cancer.gov

### "Invalid API Key" Errors

1. Check for extra spaces or quotes
2. Ensure key hasn't expired
3. Verify you're using the correct service's key
4. Test key directly with the service's API

### Rate Limit Errors

**Without API keys:**

- Public limits are restrictive (e.g., 100 requests/day)
- Add delays between requests
- Consider getting API keys

**With API keys:**

- Limits are much higher but still exist
- Implement exponential backoff
- Cache results when possible

## Testing Your Setup

### Check Environment Variables

```bash
# List all BioMCP-related environment variables
env | grep -E "(NCI_API_KEY|ALPHAGENOME_API_KEY|CBIO_TOKEN)"
```

### Test Each Service

```bash
# Test NCI API
biomcp trial search --condition cancer --source nci --limit 1

# Test AlphaGenome (requires key)
biomcp variant predict chr7 140753336 A T --limit 1

# Test cBioPortal integration
biomcp article search --gene TP53 --limit 1
```

## API Key Management Tools

For managing multiple API keys securely:

### 1. direnv (Recommended)

```bash
# Install direnv
brew install direnv  # macOS
# Add to shell: eval "$(direnv hook zsh)"

# Create .envrc in project
echo 'export NCI_API_KEY="your-key"' > .envrc
direnv allow
```

### 2. 1Password CLI

```bash
# Store in 1Password
op item create --category=password \
  --title="BioMCP API Keys" \
  --vault="Development" \
  NCI_API_KEY="your-key"

# Load in shell
export NCI_API_KEY=$(op read "op://Development/BioMCP API Keys/NCI_API_KEY")
```

### 3. AWS Secrets Manager

```bash
# Store secret
aws secretsmanager create-secret \
  --name biomcp/api-keys \
  --secret-string '{"NCI_API_KEY":"your-key"}'

# Retrieve in script
export NCI_API_KEY=$(aws secretsmanager get-secret-value \
  --secret-id biomcp/api-keys \
  --query SecretString \
  --output text | jq -r .NCI_API_KEY)
```

## Next Steps

Now that you have API keys configured:

1. Test each service to ensure keys work
2. Explore [How-to Guides](../how-to-guides/01-find-articles-and-cbioportal-data.md) for advanced features
3. Set up [logging and monitoring](../how-to-guides/05-logging-and-monitoring-with-bigquery.md)
4. Review [security policies](../policies.md) for your organization

```

--------------------------------------------------------------------------------
/docs/concepts/03-sequential-thinking-with-the-think-tool.md:
--------------------------------------------------------------------------------

```markdown
# Sequential Thinking with the Think Tool

## CRITICAL: The Think Tool is MANDATORY

**The 'think' tool must be your FIRST action when using BioMCP. This is not optional.**

For detailed technical documentation on the think tool parameters and usage, see the [MCP Tools Reference - Think Tool](../user-guides/02-mcp-tools-reference.md#3-think).

## Why Sequential Thinking?

Biomedical research is inherently complex, requiring systematic analysis of interconnected data from multiple sources. The think tool enforces a structured approach that:

- **Prevents Information Overload**: Breaks complex queries into manageable steps
- **Ensures Comprehensive Coverage**: Systematic thinking catches details that might be missed
- **Documents Reasoning**: Creates an audit trail of research decisions
- **Improves Accuracy**: Thoughtful planning leads to better search strategies

## Mandatory Usage Requirements

🚨 **REQUIRED USAGE:**

- You MUST call 'think' BEFORE any search or fetch operations
- EVERY biomedical research query requires thinking first
- ALL multi-step analyses must begin with the think tool
- ANY task using BioMCP tools requires prior planning with think

⚠️ **WARNING - Skipping the think tool will result in:**

- Incomplete analysis
- Poor search strategies
- Missing critical connections
- Suboptimal results
- Frustrated users

## How to Use the Think Tool

The think tool accepts these parameters:

```python
think(
    thought="Your reasoning about the current step",
    thoughtNumber=1,  # Sequential number starting from 1
    totalThoughts=5,  # Optional: estimated total thoughts needed
    nextThoughtNeeded=True  # Set to False only when analysis is complete
)
```

## Sequential Thinking Patterns

### Pattern 1: Initial Query Decomposition

Always start by breaking down the user's query:

```python
# User asks: "What are the treatment options for BRAF V600E melanoma?"

think(
    thought="Breaking down query: Need to find 1) BRAF V600E mutation significance in melanoma, 2) approved treatments for BRAF-mutant melanoma, 3) clinical trials for new therapies, 4) resistance mechanisms and combination strategies",
    thoughtNumber=1,
    nextThoughtNeeded=True
)
```

### Pattern 2: Search Strategy Planning

Plan your data collection approach:

```python
think(
    thought="Search strategy: First use gene_getter for BRAF context, then article_searcher for BRAF V600E melanoma treatments focusing on FDA-approved drugs, followed by trial_searcher for ongoing studies with BRAF inhibitors",
    thoughtNumber=2,
    nextThoughtNeeded=True
)
```

### Pattern 3: Progressive Refinement

Document findings and adjust strategy:

```python
think(
    thought="Found 3 FDA-approved BRAF inhibitors (vemurafenib, dabrafenib, encorafenib). Need to search for combination therapies with MEK inhibitors based on resistance patterns identified in literature",
    thoughtNumber=3,
    nextThoughtNeeded=True
)
```

### Pattern 4: Synthesis Planning

Before creating final output:

```python
think(
    thought="Ready to synthesize: Will organize findings into 1) First-line treatments (BRAF+MEK combos), 2) Second-line options (immunotherapy), 3) Emerging therapies from trials, 4) Resistance mechanisms to consider",
    thoughtNumber=4,
    nextThoughtNeeded=False  # Analysis complete
)
```

## Common Think Tool Workflows

### Literature Review Workflow

```python
# Step 1: Problem definition
think(thought="User wants comprehensive review of CDK4/6 inhibitors in breast cancer...", thoughtNumber=1)

# Step 2: Search parameters
think(thought="Will search for palbociclib, ribociclib, abemaciclib in HR+/HER2- breast cancer...", thoughtNumber=2)

# Step 3: Quality filtering
think(thought="Found 47 articles, filtering for Phase III trials and meta-analyses...", thoughtNumber=3)

# Step 4: Evidence synthesis
think(thought="Identified consistent PFS benefit across trials, now analyzing OS data...", thoughtNumber=4)
```

### Clinical Trial Analysis Workflow

```python
# Step 1: Criteria identification
think(thought="Patient has EGFR L858R lung cancer, progressed on osimertinib...", thoughtNumber=1)

# Step 2: Trial search strategy
think(thought="Searching for trials accepting EGFR-mutant NSCLC after TKI resistance...", thoughtNumber=2)

# Step 3: Eligibility assessment
think(thought="Found 12 trials, checking for brain metastases eligibility...", thoughtNumber=3)

# Step 4: Prioritization
think(thought="Ranking trials by proximity, novel mechanisms, and enrollment status...", thoughtNumber=4)
```

### Variant Interpretation Workflow

```python
# Step 1: Variant identification
think(thought="Analyzing TP53 R248Q mutation found in patient's tumor...", thoughtNumber=1)

# Step 2: Database queries
think(thought="Will check MyVariant for population frequency, cBioPortal for cancer prevalence...", thoughtNumber=2)

# Step 3: Functional assessment
think(thought="Variant is pathogenic, affects DNA binding domain, common in multiple cancers...", thoughtNumber=3)

# Step 4: Clinical implications
think(thought="Synthesizing prognostic impact and potential therapeutic vulnerabilities...", thoughtNumber=4)
```

## Think Tool Best Practices

### DO:

- Start EVERY BioMCP session with think
- Use sequential numbering (1, 2, 3...)
- Document key findings in each thought
- Adjust strategy based on intermediate results
- Use think to track progress through complex analyses

### DON'T:

- Skip think and jump to searches
- Use think only at the beginning
- Set nextThoughtNeeded=false prematurely
- Use generic thoughts without specific content
- Forget to document decision rationale

## Integration with Other Tools

The think tool should wrap around other tool usage:

```python
# CORRECT PATTERN
think(thought="Planning BRAF melanoma research...", thoughtNumber=1)
gene_info = gene_getter("BRAF")

think(thought="BRAF is a serine/threonine kinase, V600E creates constitutive activation. Searching for targeted therapies...", thoughtNumber=2)
articles = article_searcher(genes=["BRAF"], diseases=["melanoma"], keywords=["vemurafenib", "dabrafenib"])

think(thought="Found key trials showing BRAF+MEK combination superiority. Checking for active trials...", thoughtNumber=3)
trials = trial_searcher(conditions=["melanoma"], interventions=["BRAF inhibitor"])

# INCORRECT PATTERN - NO THINKING
gene_info = gene_getter("BRAF")  # ❌ Started without thinking
articles = article_searcher(...)  # ❌ No strategy planning
```

## Reminder System

BioMCP includes automatic reminders if you forget to use think:

- Search results will include a reminder message
- The reminder appears as a system message
- It prompts you to use think for better results
- This ensures consistent methodology

## Advanced Sequential Thinking

### Branching Logic

Use think to handle conditional paths:

```python
think(
    thought="No direct trials found for this rare mutation. Pivoting to search for basket trials and mutation-agnostic approaches...",
    thoughtNumber=5,
    nextThoughtNeeded=True
)
```

### Error Recovery

Document and adjust when searches fail:

```python
think(
    thought="MyVariant query failed for this structural variant. Will use article search to find functional studies instead...",
    thoughtNumber=6,
    nextThoughtNeeded=True
)
```

### Complex Integration

Coordinate multiple data sources:

```python
think(
    thought="Integrating findings: cBioPortal shows 15% frequency in lung adenocarcinoma, articles describe resistance mechanisms, trials testing combination strategies...",
    thoughtNumber=7,
    nextThoughtNeeded=True
)
```

## Conclusion

The think tool is not just a requirement—it's your research companion that ensures systematic, thorough, and reproducible biomedical research. By following sequential thinking patterns, you'll deliver comprehensive insights that address all aspects of complex biomedical queries.

Remember: **Always think first, then search. Document your reasoning. Only mark thinking complete when your analysis is truly finished.**

```

--------------------------------------------------------------------------------
/src/biomcp/retry.py:
--------------------------------------------------------------------------------

```python
"""Retry logic with exponential backoff for handling transient failures."""

import asyncio
import functools
import logging
import secrets
from collections.abc import Callable, Coroutine
from typing import Any, TypeVar

from .constants import (
    DEFAULT_EXPONENTIAL_BASE,
    DEFAULT_INITIAL_RETRY_DELAY,
    DEFAULT_MAX_RETRY_ATTEMPTS,
    DEFAULT_MAX_RETRY_DELAY,
    METRIC_JITTER_RANGE,
)

logger = logging.getLogger(__name__)

T = TypeVar("T")


class RetryConfig:
    """Configuration for retry behavior."""

    def __init__(
        self,
        max_attempts: int = DEFAULT_MAX_RETRY_ATTEMPTS,
        initial_delay: float = DEFAULT_INITIAL_RETRY_DELAY,
        max_delay: float = DEFAULT_MAX_RETRY_DELAY,
        exponential_base: float = DEFAULT_EXPONENTIAL_BASE,
        jitter: bool = True,
        retryable_exceptions: tuple[type[Exception], ...] = (
            ConnectionError,
            TimeoutError,
            OSError,
        ),
        retryable_status_codes: tuple[int, ...] = (429, 502, 503, 504),
    ):
        """Initialize retry configuration.

        Args:
            max_attempts: Maximum number of retry attempts
            initial_delay: Initial delay between retries in seconds
            max_delay: Maximum delay between retries in seconds
            exponential_base: Base for exponential backoff calculation
            jitter: Whether to add random jitter to delays
            retryable_exceptions: Exception types that should trigger retry
            retryable_status_codes: HTTP status codes that should trigger retry
        """
        self.max_attempts = max_attempts
        self.initial_delay = initial_delay
        self.max_delay = max_delay
        self.exponential_base = exponential_base
        self.jitter = jitter
        self.retryable_exceptions = retryable_exceptions
        self.retryable_status_codes = retryable_status_codes


def calculate_delay(attempt: int, config: RetryConfig) -> float:
    """Calculate delay for the next retry attempt.

    Args:
        attempt: Current attempt number (0-based)
        config: Retry configuration

    Returns:
        Delay in seconds before the next retry
    """
    # Exponential backoff: delay = initial_delay * (base ^ attempt)
    delay = config.initial_delay * (config.exponential_base**attempt)

    # Cap at maximum delay
    delay = min(delay, config.max_delay)

    # Add jitter to prevent thundering herd
    if config.jitter:
        jitter_range = delay * METRIC_JITTER_RANGE  # 10% jitter
        # Use secrets for cryptographically secure randomness
        # Generate random float between -1 and 1, then scale
        random_factor = (secrets.randbits(32) / (2**32 - 1)) * 2 - 1
        jitter = random_factor * jitter_range
        delay += jitter

    return max(0, delay)  # Ensure non-negative


def is_retryable_exception(exc: Exception, config: RetryConfig) -> bool:
    """Check if an exception should trigger a retry.

    Args:
        exc: The exception that occurred
        config: Retry configuration

    Returns:
        True if the exception is retryable
    """
    return isinstance(exc, config.retryable_exceptions)


def is_retryable_status(status_code: int, config: RetryConfig) -> bool:
    """Check if an HTTP status code should trigger a retry.

    Args:
        status_code: HTTP status code
        config: Retry configuration

    Returns:
        True if the status code is retryable
    """
    return status_code in config.retryable_status_codes


def with_retry(
    config: RetryConfig | None = None,
) -> Callable[
    [Callable[..., Coroutine[Any, Any, T]]],
    Callable[..., Coroutine[Any, Any, T]],
]:
    """Decorator to add retry logic to async functions.

    Args:
        config: Retry configuration (uses defaults if not provided)

    Returns:
        Decorated function with retry logic
    """
    if config is None:
        config = RetryConfig()

    def decorator(
        func: Callable[..., Coroutine[Any, Any, T]],
    ) -> Callable[..., Coroutine[Any, Any, T]]:
        @functools.wraps(func)
        async def wrapper(*args: Any, **kwargs: Any) -> T:
            last_exception = None

            for attempt in range(config.max_attempts):
                try:
                    return await func(*args, **kwargs)
                except Exception as exc:
                    last_exception = exc

                    # Check if this is the last attempt
                    if attempt == config.max_attempts - 1:
                        logger.error(
                            f"Max retry attempts ({config.max_attempts}) "
                            f"reached for {func.__name__}: {exc}"
                        )
                        raise

                    # Check if the exception is retryable
                    if not is_retryable_exception(exc, config):
                        logger.debug(
                            f"Non-retryable exception in {func.__name__}: {exc}"
                        )
                        raise

                    # Calculate delay for next attempt
                    delay = calculate_delay(attempt, config)
                    logger.warning(
                        f"Retry attempt {attempt + 1}/{config.max_attempts} "
                        f"for {func.__name__} after {delay:.2f}s delay. "
                        f"Error: {exc}"
                    )

                    # Wait before retrying
                    await asyncio.sleep(delay)

            # This should never be reached due to the raise in the loop
            if last_exception:
                raise last_exception
            raise RuntimeError("Unexpected retry loop exit")

        return wrapper

    return decorator


class RetryableHTTPError(Exception):
    """Exception wrapper for HTTP errors that should be retried."""

    def __init__(self, status_code: int, message: str):
        self.status_code = status_code
        self.message = message
        super().__init__(f"HTTP {status_code}: {message}")


async def retry_with_backoff(
    func: Callable[..., Coroutine[Any, Any, T]],
    *args: Any,
    config: RetryConfig | None = None,
    **kwargs: Any,
) -> T:
    """Execute a function with retry logic and exponential backoff.

    This is an alternative to the decorator for cases where you need
    more control over retry behavior.

    Args:
        func: Async function to execute
        *args: Positional arguments for the function
        config: Retry configuration (uses defaults if not provided)
        **kwargs: Keyword arguments for the function

    Returns:
        Result of the function call

    Raises:
        The last exception if all retries fail
    """
    if config is None:
        config = RetryConfig()

    last_exception = None

    for attempt in range(config.max_attempts):
        try:
            return await func(*args, **kwargs)
        except Exception as exc:
            last_exception = exc

            # Check if this is the last attempt
            if attempt == config.max_attempts - 1:
                logger.error(
                    f"Max retry attempts ({config.max_attempts}) "
                    f"reached for {func.__name__}: {exc}"
                )
                raise

            # Check if the exception is retryable
            if not is_retryable_exception(exc, config):
                logger.debug(
                    f"Non-retryable exception in {func.__name__}: {exc}"
                )
                raise

            # Calculate delay for next attempt
            delay = calculate_delay(attempt, config)
            logger.warning(
                f"Retry attempt {attempt + 1}/{config.max_attempts} "
                f"for {func.__name__} after {delay:.2f}s delay. "
                f"Error: {exc}"
            )

            # Wait before retrying
            await asyncio.sleep(delay)

    # This should never be reached due to the raise in the loop
    if last_exception:
        raise last_exception
    raise RuntimeError("Unexpected retry loop exit")

```

--------------------------------------------------------------------------------
/tests/integration/test_openfda_integration.py:
--------------------------------------------------------------------------------

```python
"""Integration tests for OpenFDA API.

These tests make real API calls to verify FDA integration works correctly.
They are marked with pytest.mark.integration and can be skipped with --ignore-integration.
"""

import os

import pytest

from biomcp.openfda.adverse_events import search_adverse_events
from biomcp.openfda.device_events import search_device_events
from biomcp.openfda.drug_approvals import search_drug_approvals
from biomcp.openfda.drug_labels import search_drug_labels
from biomcp.openfda.drug_recalls import search_drug_recalls


@pytest.mark.integration
class TestOpenFDAIntegration:
    """Integration tests for OpenFDA API endpoints."""

    @pytest.mark.asyncio
    async def test_adverse_events_real_api(self):
        """Test real adverse event API call."""
        result = await search_adverse_events(drug="aspirin", limit=5)

        # Should return formatted results
        assert isinstance(result, str)
        assert len(result) > 100  # Non-trivial response

        # Should contain disclaimer
        assert "FDA Data Notice" in result

        # Should have structure
        if "No adverse events found" not in result:
            assert (
                "Total Reports Found:" in result or "adverse" in result.lower()
            )

    @pytest.mark.asyncio
    async def test_drug_labels_real_api(self):
        """Test real drug label API call."""
        result = await search_drug_labels(name="ibuprofen", limit=5)

        # Should return formatted results
        assert isinstance(result, str)
        assert len(result) > 100

        # Should contain disclaimer
        assert "FDA Data Notice" in result

        # Should have label information
        if "No drug labels found" not in result:
            assert "Total Labels Found:" in result or "label" in result.lower()

    @pytest.mark.asyncio
    async def test_device_events_real_api(self):
        """Test real device event API call."""
        result = await search_device_events(device="insulin pump", limit=5)

        # Should return formatted results
        assert isinstance(result, str)
        assert len(result) > 100

        # Should contain disclaimer
        assert "FDA Data Notice" in result

        # Should have device information
        if "No device events found" not in result:
            assert (
                "Total Events Found:" in result or "device" in result.lower()
            )

    @pytest.mark.asyncio
    async def test_drug_approvals_real_api(self):
        """Test real drug approval API call."""
        result = await search_drug_approvals(drug="pembrolizumab", limit=5)

        # Should return formatted results
        assert isinstance(result, str)
        assert len(result) > 100

        # Should contain disclaimer
        assert "FDA Data Notice" in result

        # Pembrolizumab (Keytruda) should have results
        if "No drug approvals found" not in result:
            assert "KEYTRUDA" in result or "pembrolizumab" in result.lower()

    @pytest.mark.asyncio
    async def test_drug_recalls_real_api(self):
        """Test real drug recall API call."""
        # Use drug parameter which is more likely to return results
        result = await search_drug_recalls(drug="acetaminophen", limit=5)

        # Should return formatted results
        assert isinstance(result, str)
        assert len(result) > 100

        # Should contain disclaimer OR error message (API might return no results)
        assert "FDA Data Notice" in result or "Error" in result

        # Should have recall information if not an error
        if "Error" not in result and "No drug recalls found" not in result:
            assert "recall" in result.lower()

    @pytest.mark.asyncio
    async def test_rate_limiting_without_key(self):
        """Test that rate limiting is handled gracefully without API key."""
        # Temporarily remove API key if present
        original_key = os.environ.get("OPENFDA_API_KEY")
        if original_key:
            del os.environ["OPENFDA_API_KEY"]

        try:
            # Make multiple rapid requests
            results = []
            for i in range(5):
                result = await search_adverse_events(drug=f"drug{i}", limit=1)
                results.append(result)

            # All should return strings (not crash)
            assert all(isinstance(r, str) for r in results)

        finally:
            # Restore API key
            if original_key:
                os.environ["OPENFDA_API_KEY"] = original_key

    @pytest.mark.asyncio
    async def test_api_key_usage(self):
        """Test that API key is used when provided."""
        # This test only runs if API key is available
        if not os.environ.get("OPENFDA_API_KEY"):
            pytest.skip("OPENFDA_API_KEY not set")

        result = await search_adverse_events(drug="acetaminophen", limit=10)

        # With API key, should be able to get results
        assert isinstance(result, str)
        assert len(result) > 100

    @pytest.mark.asyncio
    async def test_error_handling_invalid_params(self):
        """Test graceful handling of invalid parameters."""
        # Search with invalid/nonsense parameters
        result = await search_adverse_events(
            drug="xyzabc123notarealdrugname999", limit=5
        )

        # Should handle gracefully
        assert isinstance(result, str)

        # Should either show no results or error message
        assert (
            "No adverse events found" in result
            or "Error" in result
            or "no results" in result.lower()
        )

    @pytest.mark.asyncio
    async def test_cross_domain_consistency(self):
        """Test that different FDA domains return consistent formats."""
        # Search for a common drug across domains
        drug_name = "aspirin"

        adverse_result = await search_adverse_events(drug=drug_name, limit=2)
        label_result = await search_drug_labels(name=drug_name, limit=2)

        # Both should have disclaimers
        assert "FDA Data Notice" in adverse_result
        assert "FDA Data Notice" in label_result

        # Both should be properly formatted strings
        assert isinstance(adverse_result, str)
        assert isinstance(label_result, str)

        # Both should mention the drug or indicate no results
        assert (
            drug_name in adverse_result.lower()
            or "no " in adverse_result.lower()
        )
        assert (
            drug_name in label_result.lower() or "no " in label_result.lower()
        )

    @pytest.mark.asyncio
    async def test_special_characters_handling(self):
        """Test handling of special characters in queries."""
        # Test with special characters
        result = await search_drug_labels(name="aspirin/dipyridamole", limit=5)

        # Should handle forward slash gracefully
        assert isinstance(result, str)
        # API might return error or no results for complex drug names
        assert isinstance(result, str)  # Just verify we get a response

    @pytest.mark.asyncio
    async def test_large_result_handling(self):
        """Test handling of large result sets."""
        # Request maximum allowed results
        result = await search_adverse_events(
            drug="ibuprofen",  # Common drug with many reports
            limit=100,  # Maximum limit
        )

        # Should handle large results
        assert isinstance(result, str)
        assert len(result) > 500  # Should be substantial

        # Should still include disclaimer
        assert "FDA Data Notice" in result

    @pytest.mark.asyncio
    async def test_empty_query_handling(self):
        """Test handling of empty/missing query parameters."""
        # Search without specifying a drug
        result = await search_drug_recalls(
            limit=5  # Only limit, no other filters
        )

        # Should return recent recalls
        assert isinstance(result, str)
        assert len(result) > 100

        # Should have results (there are always some recalls)
        if "Error" not in result:
            assert "recall" in result.lower()

```

--------------------------------------------------------------------------------
/tests/tdd/test_metrics.py:
--------------------------------------------------------------------------------

```python
"""Tests for performance metrics collection."""

import asyncio
import time
from datetime import datetime
from unittest.mock import patch

import pytest

from biomcp.metrics import (
    MetricSample,
    MetricsCollector,
    MetricSummary,
    Timer,
    get_all_metrics,
    get_metric_summary,
    record_metric,
    track_performance,
)


@pytest.fixture(autouse=True)
def enable_metrics(monkeypatch):
    """Enable metrics for all tests in this module."""
    monkeypatch.setenv("BIOMCP_METRICS_ENABLED", "true")
    # Force reload of the module to pick up the new env var
    import importlib

    import biomcp.metrics

    importlib.reload(biomcp.metrics)


def test_metric_sample():
    """Test MetricSample dataclass."""
    sample = MetricSample(
        timestamp=datetime.now(),
        duration=1.5,
        success=True,
        error=None,
        tags={"domain": "article"},
    )

    assert sample.duration == 1.5
    assert sample.success is True
    assert sample.error is None
    assert sample.tags["domain"] == "article"


def test_metric_summary_from_samples():
    """Test MetricSummary calculation from samples."""
    now = datetime.now()
    samples = [
        MetricSample(timestamp=now, duration=0.1, success=True),
        MetricSample(timestamp=now, duration=0.2, success=True),
        MetricSample(
            timestamp=now, duration=0.3, success=False, error="timeout"
        ),
        MetricSample(timestamp=now, duration=0.4, success=True),
        MetricSample(timestamp=now, duration=0.5, success=True),
    ]

    summary = MetricSummary.from_samples("test_metric", samples)

    assert summary.name == "test_metric"
    assert summary.count == 5
    assert summary.success_count == 4
    assert summary.error_count == 1
    assert summary.total_duration == 1.5
    assert summary.min_duration == 0.1
    assert summary.max_duration == 0.5
    assert summary.avg_duration == 0.3
    assert summary.error_rate == 0.2  # 1/5

    # Check percentiles
    assert summary.p50_duration == 0.3  # median
    assert 0.4 <= summary.p95_duration <= 0.5
    assert 0.4 <= summary.p99_duration <= 0.5


def test_metric_summary_empty():
    """Test MetricSummary with no samples."""
    summary = MetricSummary.from_samples("empty", [])

    assert summary.count == 0
    assert summary.success_count == 0
    assert summary.error_count == 0
    assert summary.total_duration == 0.0
    assert summary.error_rate == 0.0


@pytest.mark.asyncio
async def test_metrics_collector():
    """Test MetricsCollector functionality."""
    collector = MetricsCollector(max_samples_per_metric=3)

    # Record some metrics
    await collector.record("api_call", 0.1, success=True)
    await collector.record("api_call", 0.2, success=True)
    await collector.record("api_call", 0.3, success=False, error="timeout")

    # Get summary
    summary = await collector.get_summary("api_call")
    assert summary is not None
    assert summary.count == 3
    assert summary.success_count == 2
    assert summary.error_count == 1

    # Test max samples limit
    await collector.record("api_call", 0.4, success=True)
    await collector.record("api_call", 0.5, success=True)

    summary = await collector.get_summary("api_call")
    assert summary.count == 3  # Still 3 due to limit
    assert summary.min_duration == 0.3  # Oldest samples dropped

    # Test clear
    await collector.clear("api_call")
    summary = await collector.get_summary("api_call")
    assert summary is None


@pytest.mark.asyncio
async def test_global_metrics_functions():
    """Test global metrics functions."""
    # Clear any existing metrics
    from biomcp.metrics import _metrics_collector

    await _metrics_collector.clear()

    # Record metrics
    await record_metric("test_op", 0.5, success=True)
    await record_metric("test_op", 0.7, success=False, error="failed")

    # Get summary
    summary = await get_metric_summary("test_op")
    assert summary is not None
    assert summary.count == 2
    assert summary.success_count == 1

    # Get all metrics
    all_metrics = await get_all_metrics()
    assert "test_op" in all_metrics


@pytest.mark.asyncio
async def test_track_performance_decorator_async():
    """Test track_performance decorator on async functions."""
    from biomcp.metrics import _metrics_collector

    await _metrics_collector.clear()

    @track_performance("test_async_func")
    async def slow_operation():
        await asyncio.sleep(0.1)
        return "done"

    result = await slow_operation()
    assert result == "done"

    # Check metric was recorded
    summary = await get_metric_summary("test_async_func")
    assert summary is not None
    assert summary.count == 1
    assert summary.success_count == 1
    assert summary.min_duration >= 0.1


@pytest.mark.asyncio
async def test_track_performance_decorator_async_error():
    """Test track_performance decorator on async functions with errors."""
    from biomcp.metrics import _metrics_collector

    await _metrics_collector.clear()

    @track_performance("test_async_error")
    async def failing_operation():
        await asyncio.sleep(0.05)
        raise ValueError("Test error")

    with pytest.raises(ValueError, match="Test error"):
        await failing_operation()

    # Check metric was recorded with error
    summary = await get_metric_summary("test_async_error")
    assert summary is not None
    assert summary.count == 1
    assert summary.success_count == 0
    assert summary.error_count == 1


def test_track_performance_decorator_sync():
    """Test track_performance decorator on sync functions."""

    @track_performance("test_sync_func")
    def fast_operation():
        time.sleep(0.05)
        return "done"

    # Need to run in an event loop context
    async def run_test():
        from biomcp.metrics import _metrics_collector

        await _metrics_collector.clear()

        result = fast_operation()
        assert result == "done"

        # Give time for the metric to be recorded
        await asyncio.sleep(0.1)

        summary = await get_metric_summary("test_sync_func")
        assert summary is not None
        assert summary.count == 1
        assert summary.success_count == 1

    asyncio.run(run_test())


@pytest.mark.asyncio
async def test_timer_context_manager():
    """Test Timer context manager."""
    from biomcp.metrics import _metrics_collector

    await _metrics_collector.clear()

    # Test async timer
    async with Timer("test_timer", tags={"operation": "test"}):
        await asyncio.sleep(0.1)

    summary = await get_metric_summary("test_timer")
    assert summary is not None
    assert summary.count == 1
    assert summary.success_count == 1
    assert summary.min_duration >= 0.1

    # Test sync timer (in async context)
    with Timer("test_sync_timer"):
        time.sleep(0.05)

    # Give time for metric to be recorded
    await asyncio.sleep(0.1)

    summary = await get_metric_summary("test_sync_timer")
    assert summary is not None
    assert summary.count == 1


@pytest.mark.asyncio
async def test_timer_with_exception():
    """Test Timer context manager with exceptions."""
    from biomcp.metrics import _metrics_collector

    await _metrics_collector.clear()

    # Test async timer with exception
    with pytest.raises(ValueError):
        async with Timer("test_timer_error"):
            await asyncio.sleep(0.05)
            raise ValueError("Test error")

    summary = await get_metric_summary("test_timer_error")
    assert summary is not None
    assert summary.count == 1
    assert summary.success_count == 0
    assert summary.error_count == 1


def test_timer_without_event_loop():
    """Test Timer when no event loop is running."""
    # This simulates using Timer in a non-async context
    with patch("biomcp.metrics.logger") as mock_logger:
        with Timer("test_no_loop"):
            time.sleep(0.01)

        # Should log instead of recording metric
        mock_logger.debug.assert_called_once()
        call_args = mock_logger.debug.call_args[0][0]
        assert "test_no_loop" in call_args
        assert "duration=" in call_args

```

--------------------------------------------------------------------------------
/src/biomcp/openfda/rate_limiter.py:
--------------------------------------------------------------------------------

```python
"""
Rate limiting and circuit breaker for OpenFDA API requests.

This module provides client-side rate limiting to prevent API quota exhaustion
and circuit breaker pattern to handle API failures gracefully.
"""

import asyncio
import logging
import os
import time
from collections.abc import Callable
from datetime import datetime
from enum import Enum
from typing import Any

logger = logging.getLogger(__name__)


class CircuitState(Enum):
    """Circuit breaker states."""

    CLOSED = "closed"  # Normal operation
    OPEN = "open"  # Blocking requests
    HALF_OPEN = "half_open"  # Testing recovery


class RateLimiter:
    """
    Token bucket rate limiter for FDA API requests.
    """

    def __init__(self, rate: int = 10, per: float = 1.0):
        """
        Initialize rate limiter.

        Args:
            rate: Number of requests allowed
            per: Time period in seconds
        """
        self.rate = rate
        self.per = per
        self.allowance = float(rate)
        self.last_check = time.monotonic()
        self._lock = asyncio.Lock()

    async def acquire(self) -> None:
        """
        Acquire permission to make a request.
        Blocks if rate limit would be exceeded.
        """
        async with self._lock:
            current = time.monotonic()
            time_passed = current - self.last_check
            self.last_check = current

            # Add tokens based on time passed
            self.allowance += time_passed * (self.rate / self.per)

            # Cap at maximum rate
            if self.allowance > self.rate:
                self.allowance = float(self.rate)

            # Check if we can proceed
            if self.allowance < 1.0:
                # Calculate wait time
                deficit = 1.0 - self.allowance
                wait_time = deficit * (self.per / self.rate)

                logger.debug(f"Rate limit: waiting {wait_time:.2f}s")
                await asyncio.sleep(wait_time)

                # Update allowance after waiting
                self.allowance = 0.0
            else:
                # Consume one token
                self.allowance -= 1.0


class CircuitBreaker:
    """
    Circuit breaker to prevent cascading failures.
    """

    def __init__(
        self,
        failure_threshold: int = 5,
        recovery_timeout: int = 60,
        half_open_max_calls: int = 3,
    ):
        """
        Initialize circuit breaker.

        Args:
            failure_threshold: Number of failures before opening circuit
            recovery_timeout: Seconds to wait before attempting recovery
            half_open_max_calls: Max calls allowed in half-open state
        """
        self.failure_threshold = failure_threshold
        self.recovery_timeout = recovery_timeout
        self.half_open_max_calls = half_open_max_calls

        self.failure_count = 0
        self.last_failure_time: float | None = None
        self.state = CircuitState.CLOSED
        self.half_open_calls = 0
        self._lock = asyncio.Lock()

    async def call(self, func: Callable, *args, **kwargs) -> Any:
        """
        Execute function through circuit breaker.

        Args:
            func: Async function to call
            *args: Function arguments
            **kwargs: Function keyword arguments

        Returns:
            Function result

        Raises:
            Exception: If circuit is open or function fails
        """
        async with self._lock:
            # Check circuit state
            if self.state == CircuitState.OPEN:
                if self._should_attempt_reset():
                    self.state = CircuitState.HALF_OPEN
                    self.half_open_calls = 0
                    logger.info(
                        "Circuit breaker: attempting recovery (half-open)"
                    )
                else:
                    if self.last_failure_time is not None:
                        time_left = self.recovery_timeout - (
                            time.time() - self.last_failure_time
                        )
                        raise Exception(
                            f"Circuit breaker is OPEN. Retry in {time_left:.0f} seconds"
                        )
                    else:
                        raise Exception("Circuit breaker is OPEN")

            elif self.state == CircuitState.HALF_OPEN:
                if self.half_open_calls >= self.half_open_max_calls:
                    # Don't allow more calls in half-open state
                    raise Exception(
                        "Circuit breaker is HALF_OPEN. Max test calls reached"
                    )
                self.half_open_calls += 1

        # Execute the function
        try:
            result = await func(*args, **kwargs)
            await self._on_success()
            return result
        except Exception as e:
            await self._on_failure()
            raise e

    async def _on_success(self) -> None:
        """Handle successful call."""
        async with self._lock:
            if self.state == CircuitState.HALF_OPEN:
                # Recovery succeeded
                self.state = CircuitState.CLOSED
                self.failure_count = 0
                logger.info("Circuit breaker: recovered (closed)")
            else:
                # Reset failure count on success
                self.failure_count = 0

    async def _on_failure(self) -> None:
        """Handle failed call."""
        async with self._lock:
            self.failure_count += 1
            self.last_failure_time = time.time()

            if self.state == CircuitState.HALF_OPEN:
                # Recovery failed, reopen circuit
                self.state = CircuitState.OPEN
                logger.warning("Circuit breaker: recovery failed (open)")
            elif self.failure_count >= self.failure_threshold:
                # Too many failures, open circuit
                self.state = CircuitState.OPEN
                logger.warning(
                    f"Circuit breaker: opened after {self.failure_count} failures"
                )

    def _should_attempt_reset(self) -> bool:
        """Check if enough time has passed to attempt reset."""
        return (
            self.last_failure_time is not None
            and time.time() - self.last_failure_time >= self.recovery_timeout
        )

    @property
    def is_closed(self) -> bool:
        """Check if circuit is closed (normal operation)."""
        return self.state == CircuitState.CLOSED

    @property
    def is_open(self) -> bool:
        """Check if circuit is open (blocking requests)."""
        return self.state == CircuitState.OPEN

    def get_state(self) -> dict[str, Any]:
        """Get current circuit breaker state."""
        return {
            "state": self.state.value,
            "failure_count": self.failure_count,
            "last_failure": (
                datetime.fromtimestamp(self.last_failure_time).isoformat()
                if self.last_failure_time
                else None
            ),
        }


# Global instances
# Configure based on API key availability
_has_api_key = bool(os.environ.get("OPENFDA_API_KEY"))
_rate_limit = 240 if _has_api_key else 40  # per minute

# Create rate limiter (convert to per-second rate)
FDA_RATE_LIMITER = RateLimiter(rate=_rate_limit, per=60.0)

# Create circuit breaker
FDA_CIRCUIT_BREAKER = CircuitBreaker(
    failure_threshold=5, recovery_timeout=60, half_open_max_calls=3
)

# Semaphore for concurrent request limiting
FDA_SEMAPHORE = asyncio.Semaphore(10)  # Max 10 concurrent requests


async def rate_limited_request(func: Callable, *args, **kwargs) -> Any:
    """
    Execute FDA API request with rate limiting and circuit breaker.

    Args:
        func: Async function to call
        *args: Function arguments
        **kwargs: Function keyword arguments

    Returns:
        Function result
    """
    # Apply semaphore for concurrent limiting
    async with FDA_SEMAPHORE:
        # Apply rate limiting
        await FDA_RATE_LIMITER.acquire()

        # Apply circuit breaker
        return await FDA_CIRCUIT_BREAKER.call(func, *args, **kwargs)

```

--------------------------------------------------------------------------------
/tests/tdd/test_nci_integration.py:
--------------------------------------------------------------------------------

```python
"""Unit tests for NCI CTS API integration."""

from unittest.mock import patch

import pytest

from biomcp.biomarkers import search_biomarkers
from biomcp.diseases.search import search_diseases
from biomcp.integrations.cts_api import CTSAPIError, make_cts_request
from biomcp.interventions import search_interventions
from biomcp.organizations import get_organization, search_organizations
from biomcp.trials.nci_getter import get_trial_nci
from biomcp.trials.nci_search import convert_query_to_nci, search_trials_nci
from biomcp.trials.search import TrialQuery


class TestCTSAPIIntegration:
    """Test CTS API helper functions."""

    @pytest.mark.asyncio
    async def test_make_cts_request_no_api_key(self):
        """Test that missing API key raises appropriate error."""
        with (
            patch.dict("os.environ", {}, clear=True),
            pytest.raises(CTSAPIError, match="NCI API key required"),
        ):
            await make_cts_request("https://example.com/api")

    @pytest.mark.asyncio
    async def test_make_cts_request_with_api_key(self):
        """Test successful request with API key."""
        with patch("biomcp.integrations.cts_api.request_api") as mock_request:
            mock_request.return_value = ({"data": "test"}, None)

            result = await make_cts_request(
                "https://example.com/api", api_key="test-key"
            )

            assert result == {"data": "test"}
            mock_request.assert_called_once()

            # Verify headers were included
            call_args = mock_request.call_args
            request_data = call_args.kwargs["request"]
            assert "_headers" in request_data


class TestOrganizationsModule:
    """Test organizations module functions."""

    @pytest.mark.asyncio
    async def test_search_organizations(self):
        """Test organization search."""
        with patch(
            "biomcp.organizations.search.make_cts_request"
        ) as mock_request:
            mock_request.return_value = {
                "data": [{"id": "ORG001", "name": "Test Cancer Center"}],
                "total": 1,
            }

            result = await search_organizations(
                name="Cancer Center", api_key="test-key"
            )

            assert result["total"] == 1
            assert len(result["organizations"]) == 1
            assert result["organizations"][0]["name"] == "Test Cancer Center"

    @pytest.mark.asyncio
    async def test_get_organization(self):
        """Test getting specific organization."""
        with patch(
            "biomcp.organizations.getter.make_cts_request"
        ) as mock_request:
            mock_request.return_value = {
                "data": {
                    "id": "ORG001",
                    "name": "Test Cancer Center",
                    "type": "Academic",
                }
            }

            result = await get_organization("ORG001", api_key="test-key")

            assert result["id"] == "ORG001"
            assert result["name"] == "Test Cancer Center"
            assert result["type"] == "Academic"


class TestInterventionsModule:
    """Test interventions module functions."""

    @pytest.mark.asyncio
    async def test_search_interventions(self):
        """Test intervention search."""
        with patch(
            "biomcp.interventions.search.make_cts_request"
        ) as mock_request:
            mock_request.return_value = {
                "data": [
                    {"id": "INT001", "name": "Pembrolizumab", "type": "Drug"}
                ],
                "total": 1,
            }

            result = await search_interventions(
                name="Pembrolizumab", api_key="test-key"
            )

            assert result["total"] == 1
            assert len(result["interventions"]) == 1
            assert result["interventions"][0]["name"] == "Pembrolizumab"


class TestBiomarkersModule:
    """Test biomarkers module functions."""

    @pytest.mark.asyncio
    async def test_search_biomarkers(self):
        """Test biomarker search."""
        with patch(
            "biomcp.biomarkers.search.make_cts_request"
        ) as mock_request:
            mock_request.return_value = {
                "data": [{"id": "BIO001", "name": "PD-L1", "gene": "CD274"}],
                "total": 1,
            }

            result = await search_biomarkers(name="PD-L1", api_key="test-key")

            assert result["total"] == 1
            assert len(result["biomarkers"]) == 1
            assert result["biomarkers"][0]["name"] == "PD-L1"


class TestDiseasesModule:
    """Test diseases module functions."""

    @pytest.mark.asyncio
    async def test_search_diseases_nci(self):
        """Test disease search via NCI API."""
        with patch("biomcp.diseases.search.make_cts_request") as mock_request:
            mock_request.return_value = {
                "data": [
                    {
                        "id": "DIS001",
                        "name": "Melanoma",
                        "synonyms": ["Malignant Melanoma"],
                    }
                ],
                "total": 1,
            }

            result = await search_diseases(name="Melanoma", api_key="test-key")

            assert result["total"] == 1
            assert len(result["diseases"]) == 1
            assert result["diseases"][0]["name"] == "Melanoma"


class TestNCITrialIntegration:
    """Test NCI trial search and getter."""

    @pytest.mark.asyncio
    async def test_convert_query_to_nci(self):
        """Test converting TrialQuery to NCI parameters."""
        query = TrialQuery(
            conditions=["melanoma"],
            phase="PHASE2",
            recruiting_status="OPEN",
            allow_brain_mets=True,
        )

        # Mock the disease/intervention lookups
        with (
            patch("biomcp.trials.nci_search.search_diseases") as mock_diseases,
            patch(
                "biomcp.trials.nci_search.search_interventions"
            ) as mock_interventions,
        ):
            mock_diseases.return_value = {"diseases": []}
            mock_interventions.return_value = {"interventions": []}

            params = await convert_query_to_nci(query)

            assert params["diseases"] == ["melanoma"]
            assert params["phase"] == "II"
            assert params["recruitment_status"] == [
                "recruiting",
                "enrolling_by_invitation",
            ]
            assert params["accepts_brain_mets"] is True

    @pytest.mark.asyncio
    async def test_search_trials_nci(self):
        """Test NCI trial search."""
        query = TrialQuery(conditions=["melanoma"])

        with (
            patch(
                "biomcp.trials.nci_search.convert_query_to_nci"
            ) as mock_convert,
            patch("biomcp.trials.nci_search.make_cts_request") as mock_request,
        ):
            mock_convert.return_value = {"diseases": ["melanoma"]}
            mock_request.return_value = {
                "data": [
                    {
                        "nct_id": "NCT12345",
                        "title": "Test Trial",
                        "phase": "II",
                    }
                ],
                "total": 1,
            }

            result = await search_trials_nci(query, api_key="test-key")

            assert result["total"] == 1
            assert result["source"] == "nci"
            assert len(result["trials"]) == 1
            assert result["trials"][0]["nct_id"] == "NCT12345"

    @pytest.mark.asyncio
    async def test_get_trial_nci(self):
        """Test getting specific trial from NCI."""
        with patch(
            "biomcp.trials.nci_getter.make_cts_request"
        ) as mock_request:
            mock_request.return_value = {
                "data": {
                    "nct_id": "NCT12345",
                    "title": "Test Trial",
                    "phase": "II",
                    "overall_status": "Recruiting",
                }
            }

            result = await get_trial_nci("NCT12345", api_key="test-key")

            assert result["nct_id"] == "NCT12345"
            assert result["title"] == "Test Trial"
            assert result["phase"] == "II"

```

--------------------------------------------------------------------------------
/src/biomcp/cli/variants.py:
--------------------------------------------------------------------------------

```python
"""BioMCP Command Line Interface for genetic variants."""

import asyncio
from typing import Annotated

import typer

from ..constants import DEFAULT_ASSEMBLY, SYSTEM_PAGE_SIZE
from ..variants import getter, search

variant_app = typer.Typer(help="Search and get variants from MyVariant.info.")


@variant_app.command("get")
def get_variant(
    variant_id: Annotated[
        str,
        typer.Argument(
            help="rsID (rs456) or MyVariant ID (chr1:g.1234A>G)",
        ),
    ],
    output_json: Annotated[
        bool,
        typer.Option(
            "--json",
            "-j",
            help="Render in JSON format",
            case_sensitive=False,
        ),
    ] = False,
    include_external: Annotated[
        bool,
        typer.Option(
            "--include-external/--no-external",
            help="Include annotations from external sources (TCGA, 1000 Genomes, cBioPortal)",
        ),
    ] = True,
    assembly: Annotated[
        str,
        typer.Option(
            "--assembly",
            help="Genome assembly (hg19 or hg38)",
            case_sensitive=False,
        ),
    ] = DEFAULT_ASSEMBLY,
):
    """
    Get detailed information about a specific genetic variant.

    Supports HGVS identifiers (e.g., 'chr7:g.140453136A>T') or dbSNP rsIDs.

    Examples:
        Get by HGVS: biomcp variant get "chr7:g.140453136A>T"
        Get by rsID: biomcp variant get rs113488022
        Get as JSON: biomcp variant get rs113488022 --json
        Get without external annotations: biomcp variant get rs113488022 --no-external
        Get with hg38 assembly: biomcp variant get rs113488022 --assembly hg38
    """
    if not variant_id:
        typer.echo("Error: A variant identifier must be provided.", err=True)
        raise typer.Exit(code=1)

    # Validate assembly value
    if assembly not in ["hg19", "hg38"]:
        typer.echo(
            f"Error: Invalid assembly '{assembly}'. Must be 'hg19' or 'hg38'.",
            err=True,
        )
        raise typer.Exit(code=1)

    result = asyncio.run(
        getter.get_variant(
            variant_id,
            output_json=output_json,
            include_external=include_external,
            assembly=assembly,
        )
    )
    typer.echo(result)


@variant_app.command("search")
def search_variant_cmd(
    gene: Annotated[
        str | None,
        typer.Option(
            "--gene",
            help="Gene symbol (e.g., BRCA1)",
        ),
    ] = None,
    hgvsp: Annotated[
        str | None,
        typer.Option(
            "--hgvsp",
            help="Protein notation (e.g., p.Val600Glu).",
        ),
    ] = None,
    hgvsc: Annotated[
        str | None,
        typer.Option(
            "--hgvsc",
            help="cDNA notation (e.g., c.1799T>A).",
        ),
    ] = None,
    rsid: Annotated[
        str | None,
        typer.Option(
            "--rsid",
            help="dbSNP rsID (e.g., rs113488022)",
        ),
    ] = None,
    region: Annotated[
        str | None,
        typer.Option(
            "--region",
            help="Genomic region (e.g., chr1:69000-70000)",
        ),
    ] = None,
    significance: Annotated[
        search.ClinicalSignificance | None,
        typer.Option(
            "--significance",
            help="Clinical significance (e.g., pathogenic, likely benign)",
            case_sensitive=False,
        ),
    ] = None,
    min_frequency: Annotated[
        float | None,
        typer.Option(
            "--min-frequency",
            help="Minimum gnomAD exome allele frequency (0.0 to 1.0)",
            min=0.0,
            max=1.0,
        ),
    ] = None,
    max_frequency: Annotated[
        float | None,
        typer.Option(
            "--max-frequency",
            help="Maximum gnomAD exome allele frequency (0.0 to 1.0)",
            min=0.0,
            max=1.0,
        ),
    ] = None,
    cadd: Annotated[
        float | None,
        typer.Option(
            "--cadd",
            help="Minimum CADD phred score",
            min=0.0,
        ),
    ] = None,
    polyphen: Annotated[
        search.PolyPhenPrediction | None,
        typer.Option(
            "--polyphen",
            help="PolyPhen-2 prediction: Probably damaging = D,"
            "Possibly damaging = P, Benign = B",
            case_sensitive=False,
        ),
    ] = None,
    sift: Annotated[
        search.SiftPrediction | None,
        typer.Option(
            "--sift",
            help="SIFT prediction: D = Deleterious, T = Tolerated",
            case_sensitive=False,
        ),
    ] = None,
    size: Annotated[
        int,
        typer.Option(
            "--size",
            help="Maximum number of results to return",
            min=1,
            max=100,
        ),
    ] = SYSTEM_PAGE_SIZE,
    sources: Annotated[
        str | None,
        typer.Option(
            "--sources",
            help="Specific sources to include in results (comma-separated)",
        ),
    ] = None,
    output_json: Annotated[
        bool,
        typer.Option(
            "--json",
            "-j",
            help="Render in JSON format",
            case_sensitive=False,
        ),
    ] = False,
):
    query = search.VariantQuery(
        gene=gene,
        hgvsp=hgvsp,
        hgvsc=hgvsc,
        rsid=rsid,
        region=region,
        significance=significance,
        min_frequency=min_frequency,
        max_frequency=max_frequency,
        cadd=cadd,
        polyphen=polyphen,
        sift=sift,
        size=size,
        sources=sources.split(",") if sources else [],
    )

    result = asyncio.run(search.search_variants(query, output_json))
    typer.echo(result)


@variant_app.command("predict")
def predict_variant_effects(
    chromosome: Annotated[
        str,
        typer.Argument(help="Chromosome (e.g., chr7, chrX)"),
    ],
    position: Annotated[
        int,
        typer.Argument(help="1-based genomic position"),
    ],
    reference: Annotated[
        str,
        typer.Argument(help="Reference allele(s) (e.g., A, ATG)"),
    ],
    alternate: Annotated[
        str,
        typer.Argument(help="Alternate allele(s) (e.g., T, A)"),
    ],
    interval_size: Annotated[
        int,
        typer.Option(
            "--interval",
            "-i",
            help="Analysis interval size in bp (max 1000000)",
            min=2000,
            max=1000000,
        ),
    ] = 131072,
    tissue: Annotated[
        list[str] | None,
        typer.Option(
            "--tissue",
            "-t",
            help="UBERON ontology terms for tissue-specific predictions",
        ),
    ] = None,
    threshold: Annotated[
        float,
        typer.Option(
            "--threshold",
            help="Significance threshold for log2 fold changes",
            min=0.0,
            max=5.0,
        ),
    ] = 0.5,
    api_key: Annotated[
        str | None,
        typer.Option(
            "--api-key",
            help="AlphaGenome API key (overrides ALPHAGENOME_API_KEY env var)",
            envvar="ALPHAGENOME_API_KEY",
        ),
    ] = None,
):
    """
    Predict variant effects using Google DeepMind's AlphaGenome:\n
    - Gene expression changes\n
    - Chromatin accessibility\n
    - Splicing alterations\n
    - Promoter activity\n
    \n
    Requires AlphaGenome API key via --api-key or ALPHAGENOME_API_KEY env var.\n
    \n
    Examples:
    \n\t# Predict BRAF V600E mutation
    \n\tbiomcp variant predict chr7 140753336 A T
    \n
    \n\t# With API key specified
    \n\tbiomcp variant predict chr7 140753336 A T --api-key YOUR_KEY
    \n
    \n\t# With tissue-specific predictions
    \n\tbiomcp variant predict chr7 140753336 A T --tissue UBERON:0002367
    \n
    \n\t# With larger analysis interval
    \n\tbiomcp variant predict chr7 140753336 A T --interval 500000
    """
    from ..variants.alphagenome import predict_variant_effects

    result = asyncio.run(
        predict_variant_effects(
            chromosome=chromosome,
            position=position,
            reference=reference,
            alternate=alternate,
            interval_size=interval_size,
            tissue_types=tissue,
            significance_threshold=threshold,
            api_key=api_key,
        )
    )
    typer.echo(result)

```

--------------------------------------------------------------------------------
/tests/integration/test_variants_integration.py:
--------------------------------------------------------------------------------

```python
"""Integration tests for external variant data sources."""

import asyncio

import pytest

from biomcp.variants.external import (
    ExternalVariantAggregator,
    TCGAClient,
    ThousandGenomesClient,
)
from biomcp.variants.getter import get_variant


class TestTCGAIntegration:
    """Integration tests for TCGA/GDC API."""

    @pytest.mark.asyncio
    async def test_tcga_real_variant(self):
        """Test real TCGA API with known variant."""
        client = TCGAClient()

        # Try with BRAF V600E - a well-known cancer mutation
        # TCGA can search by gene AA change format
        result = await client.get_variant_data("BRAF V600E")

        print(f"TCGA result: {result}")

        if result:
            print(f"COSMIC ID: {result.cosmic_id}")
            print(f"Tumor types: {result.tumor_types}")
            print(f"Affected cases: {result.affected_cases}")
            print(f"Consequence: {result.consequence_type}")
        else:
            print("No TCGA data found for this variant")


class TestThousandGenomesIntegration:
    """Integration tests for 1000 Genomes via Ensembl."""

    @pytest.mark.asyncio
    async def test_1000g_real_variant(self):
        """Test real 1000 Genomes API with known variant."""
        client = ThousandGenomesClient()

        # Try with a known rsID
        result = await client.get_variant_data("rs7412")  # APOE variant

        print(f"1000 Genomes result: {result}")

        if result:
            print(f"Global MAF: {result.global_maf}")
            print(f"EUR MAF: {result.eur_maf}")
            print(f"AFR MAF: {result.afr_maf}")
            print(f"Consequence: {result.most_severe_consequence}")
            print(f"Ancestral allele: {result.ancestral_allele}")

            # This variant should have frequency data
            assert result.global_maf is not None
        else:
            print("No 1000 Genomes data found")


class TestExternalAggregatorIntegration:
    """Integration tests for the aggregator."""

    @pytest.mark.asyncio
    async def test_aggregator_basic(self):
        """Test aggregator with basic functionality."""
        aggregator = ExternalVariantAggregator()

        # Test with a known variant
        result = await aggregator.get_enhanced_annotations(
            "rs7412",  # APOE variant
            include_tcga=True,
            include_1000g=True,
        )

        print(f"Variant ID: {result.variant_id}")
        print(f"TCGA data: {'Present' if result.tcga else 'Not found'}")
        print(
            f"1000G data: {'Present' if result.thousand_genomes else 'Not found'}"
        )
        print(f"Errors: {result.error_sources}")

        # Should still work
        assert result.variant_id == "rs7412"

    @pytest.mark.asyncio
    async def test_aggregator_partial_failures(self):
        """Test aggregator handles partial failures gracefully."""
        aggregator = ExternalVariantAggregator()

        # Use a variant that might not be in all databases
        result = await aggregator.get_enhanced_annotations(
            "chr1:g.12345678A>G",  # Arbitrary variant
            include_tcga=True,
            include_1000g=True,
        )

        print("Results for arbitrary variant:")
        print(f"- TCGA: {'Found' if result.tcga else 'Not found'}")
        print(
            f"- 1000G: {'Found' if result.thousand_genomes else 'Not found'}"
        )
        print(f"- Errors: {result.error_sources}")

        # Should complete without crashing
        assert result.variant_id == "chr1:g.12345678A>G"


class TestAssemblyParameter:
    """Integration tests for assembly parameter."""

    @pytest.mark.integration
    @pytest.mark.asyncio
    async def test_get_variant_hg19_assembly(self):
        """Test get_variant with hg19 assembly on real API."""
        # Use a well-known variant: BRAF V600E
        variant_id = "rs113488022"

        result = await get_variant(
            variant_id,
            output_json=True,
            include_external=False,
            assembly="hg19",
        )

        # Should return valid JSON
        assert result is not None
        assert len(result) > 0

        # Parse and check for hg19 data
        import json

        data = json.loads(result)
        if data and len(data) > 0:
            variant_data = data[0]
            # BRAF V600E should have hg19 coordinates
            if "hg19" in variant_data:
                print(f"hg19 coordinates: {variant_data['hg19']}")
                assert "start" in variant_data["hg19"]
                assert "end" in variant_data["hg19"]
            else:
                pytest.skip("hg19 data not available in API response")
        else:
            pytest.skip("No data returned from API")

    @pytest.mark.integration
    @pytest.mark.asyncio
    async def test_get_variant_hg38_assembly(self):
        """Test get_variant with hg38 assembly on real API."""
        # Use the same variant but request hg38
        variant_id = "rs113488022"

        result = await get_variant(
            variant_id,
            output_json=True,
            include_external=False,
            assembly="hg38",
        )

        # Should return valid JSON
        assert result is not None
        assert len(result) > 0

        # Parse and check for hg38 data
        import json

        data = json.loads(result)
        if data and len(data) > 0:
            variant_data = data[0]
            # Should have hg38 coordinates
            if "hg38" in variant_data:
                print(f"hg38 coordinates: {variant_data['hg38']}")
                assert "start" in variant_data["hg38"]
                assert "end" in variant_data["hg38"]
            else:
                pytest.skip("hg38 data not available in API response")
        else:
            pytest.skip("No data returned from API")

    @pytest.mark.integration
    @pytest.mark.asyncio
    async def test_assembly_coordinate_differences(self):
        """Test that hg19 and hg38 return different coordinates for same variant."""
        variant_id = "rs113488022"  # BRAF V600E

        # Get both assemblies
        result_hg19 = await get_variant(
            variant_id,
            output_json=True,
            include_external=False,
            assembly="hg19",
        )

        result_hg38 = await get_variant(
            variant_id,
            output_json=True,
            include_external=False,
            assembly="hg38",
        )

        import json

        data_hg19 = json.loads(result_hg19)
        data_hg38 = json.loads(result_hg38)

        # Both should return data
        if not data_hg19 or not data_hg38:
            pytest.skip("API did not return data for both assemblies")

        # Compare coordinates if available
        if len(data_hg19) > 0 and len(data_hg38) > 0:
            v19 = data_hg19[0]
            v38 = data_hg38[0]

            # BRAF V600E has different coordinates in hg19 vs hg38
            # hg19: chr7:140453136
            # hg38: chr7:140753336
            if "hg19" in v19 and "hg38" in v38:
                print(f"hg19 start: {v19['hg19']['start']}")
                print(f"hg38 start: {v38['hg38']['start']}")

                # Coordinates should be different (BRAF moved between assemblies)
                assert v19["hg19"]["start"] != v38["hg38"]["start"]
            else:
                pytest.skip("Assembly-specific coordinates not in response")


if __name__ == "__main__":
    print("Testing TCGA/GDC...")
    asyncio.run(TestTCGAIntegration().test_tcga_real_variant())

    print("\n" + "=" * 50 + "\n")
    print("Testing 1000 Genomes...")
    asyncio.run(TestThousandGenomesIntegration().test_1000g_real_variant())

    print("\n" + "=" * 50 + "\n")
    print("Testing aggregator...")
    asyncio.run(TestExternalAggregatorIntegration().test_aggregator_basic())

    print("\n" + "=" * 50 + "\n")
    print("Testing aggregator with partial failures...")
    asyncio.run(
        TestExternalAggregatorIntegration().test_aggregator_partial_failures()
    )

    print("\n" + "=" * 50 + "\n")
    print("Testing assembly parameter...")
    asyncio.run(TestAssemblyParameter().test_get_variant_hg19_assembly())
    asyncio.run(TestAssemblyParameter().test_get_variant_hg38_assembly())
    asyncio.run(TestAssemblyParameter().test_assembly_coordinate_differences())

```

--------------------------------------------------------------------------------
/tests/tdd/trials/test_backward_compatibility.py:
--------------------------------------------------------------------------------

```python
"""Test backward compatibility for trial search and getter functions."""

from unittest.mock import patch

import pytest

from biomcp.trials.getter import Module, get_trial, get_trial_unified
from biomcp.trials.search import (
    TrialQuery,
    search_trials,
    search_trials_unified,
)


class TestTrialSearchBackwardCompatibility:
    """Test that existing trial search functionality remains unchanged."""

    @pytest.mark.asyncio
    async def test_search_trials_defaults_to_clinicaltrials(self):
        """Test that search_trials still defaults to ClinicalTrials.gov."""
        query = TrialQuery(conditions=["diabetes"])

        with patch("biomcp.http_client.request_api") as mock_request:
            mock_request.return_value = (
                {
                    "studies": [
                        {
                            "protocolSection": {
                                "identificationModule": {"nctId": "NCT12345"}
                            }
                        }
                    ]
                },
                None,
            )

            await search_trials(query, output_json=True)

            # Verify it called the ClinicalTrials.gov API
            assert mock_request.called
            call_args = mock_request.call_args
            # Check the URL argument
            url_arg = call_args.kwargs.get("url")
            assert url_arg is not None
            assert "clinicaltrials.gov" in url_arg

    @pytest.mark.asyncio
    async def test_search_trials_no_source_parameter(self):
        """Test that search_trials function signature hasn't changed."""
        # This test ensures the function can still be called without source
        query = TrialQuery(conditions=["cancer"])

        with patch("biomcp.http_client.request_api") as mock_request:
            mock_request.return_value = ({"studies": []}, None)

            # Should not raise TypeError about unexpected keyword argument
            await search_trials(query)
            assert mock_request.called

    @pytest.mark.asyncio
    async def test_search_trials_unified_with_source(self):
        """Test unified function supports source parameter."""
        query = TrialQuery(conditions=["melanoma"])

        # Test with ClinicalTrials.gov
        with patch("biomcp.trials.search.search_trials") as mock_ct:
            mock_ct.return_value = "CT results"

            result = await search_trials_unified(
                query, source="clinicaltrials"
            )
            assert result == "CT results"
            mock_ct.assert_called_once_with(query, False)

        # Test with NCI
        with (
            patch("biomcp.trials.nci_search.search_trials_nci") as mock_nci,
            patch(
                "biomcp.trials.nci_search.format_nci_trial_results"
            ) as mock_format,
        ):
            mock_nci.return_value = {"source": "nci", "trials": []}
            mock_format.return_value = "NCI formatted results"

            result = await search_trials_unified(
                query, source="nci", api_key="test-key"
            )
            assert result == "NCI formatted results"
            mock_nci.assert_called_once_with(query, "test-key")


class TestTrialGetterBackwardCompatibility:
    """Test that existing trial getter functionality remains unchanged."""

    @pytest.mark.asyncio
    async def test_get_trial_defaults_to_clinicaltrials(self):
        """Test that get_trial still defaults to ClinicalTrials.gov."""
        with patch("biomcp.http_client.request_api") as mock_request:
            mock_request.return_value = (
                {
                    "protocolSection": {
                        "identificationModule": {"nctId": "NCT12345"}
                    }
                },
                None,
            )

            await get_trial("NCT12345", Module.PROTOCOL)

            # Verify it called the ClinicalTrials.gov API
            assert mock_request.called
            call_args = mock_request.call_args
            # Check the URL argument
            url_arg = call_args.kwargs.get("url")
            assert url_arg is not None
            assert "clinicaltrials.gov" in url_arg
            # NCT ID would be in the request params, not the URL

    @pytest.mark.asyncio
    async def test_get_trial_no_source_parameter(self):
        """Test that get_trial function signature hasn't changed."""
        with patch("biomcp.http_client.request_api") as mock_request:
            mock_request.return_value = (
                {
                    "protocolSection": {
                        "identificationModule": {"nctId": "NCT99999"}
                    }
                },
                None,
            )

            # Should not raise TypeError about unexpected keyword argument
            await get_trial("NCT99999", Module.PROTOCOL, output_json=True)
            assert mock_request.called

    @pytest.mark.asyncio
    async def test_get_trial_unified_with_source(self):
        """Test unified function supports source parameter."""
        # Test with ClinicalTrials.gov - uses private functions
        with patch("biomcp.trials.getter._trial_protocol") as mock_protocol:
            mock_protocol.return_value = "CT trial details"

            result = await get_trial_unified(
                "NCT12345", source="clinicaltrials", sections=["protocol"]
            )
            assert result == "CT trial details"
            mock_protocol.assert_called_once_with(
                nct_id="NCT12345",
                call_benefit="Getting protocol information for trial NCT12345",
            )

        # Test with NCI
        with (
            patch("biomcp.trials.nci_getter.get_trial_nci") as mock_nci,
            patch(
                "biomcp.trials.nci_getter.format_nci_trial_details"
            ) as mock_format,
        ):
            mock_nci.return_value = {"nct_id": "NCT12345", "source": "nci"}
            mock_format.return_value = "NCI formatted trial"

            result = await get_trial_unified(
                "NCT12345", source="nci", api_key="test-key"
            )
            assert result == "NCI formatted trial"
            mock_nci.assert_called_once_with("NCT12345", "test-key")

    @pytest.mark.asyncio
    async def test_get_trial_all_modules_still_work(self):
        """Test that all existing Module options still work."""
        modules_to_test = [
            Module.PROTOCOL,
            Module.LOCATIONS,
            Module.REFERENCES,
            Module.OUTCOMES,
        ]

        for module in modules_to_test:
            with patch("biomcp.http_client.request_api") as mock_request:
                mock_request.return_value = (
                    {
                        "protocolSection": {
                            "identificationModule": {"nctId": "NCT12345"}
                        }
                    },
                    None,
                )

                await get_trial("NCT12345", module)
            assert mock_request.called
            # Reset for next iteration
            mock_request.reset_mock()


class TestCLIBackwardCompatibility:
    """Test that CLI commands maintain backward compatibility."""

    def test_cli_imports_exist(self):
        """Test that CLI still imports the expected functions."""
        # These imports should not raise ImportError
        from biomcp.cli.trials import get_trial_cli, search_trials_cli

        assert search_trials_cli is not None
        assert get_trial_cli is not None

    def test_search_defaults_without_source(self):
        """Test CLI search works without source parameter."""
        from typer.testing import CliRunner

        from biomcp.cli.main import app

        runner = CliRunner()

        with patch("biomcp.cli.trials.asyncio.run") as mock_run:
            mock_run.return_value = None

            # Run CLI command without --source
            result = runner.invoke(
                app, ["trial", "search", "--condition", "diabetes"]
            )

            # Should succeed
            assert result.exit_code == 0

            # Verify asyncio.run was called with the right function
            mock_run.assert_called()
            args = mock_run.call_args[0][0]
            # Check that it's the unified search function being called
            assert hasattr(args, "__name__") or hasattr(args, "func")

```

--------------------------------------------------------------------------------
/docs/reference/architecture-diagrams.md:
--------------------------------------------------------------------------------

```markdown
# BioMCP Architecture Diagrams

This page describes BioMCP's architecture, data flows, and workflows.

## System Architecture Overview

BioMCP consists of three main layers:

### Client Layer

- **CLI Interface**: Command-line tool for direct interaction
- **Claude Desktop**: AI assistant integration via MCP protocol
- **Python SDK**: Programmatic access for custom applications
- **Custom MCP Clients**: Any MCP-compatible client

### BioMCP Core

- **MCP Server**: Handles protocol communication
- **Request Router**: Directs queries to appropriate handlers
- **Cache Layer**: Intelligent caching for API responses
- **Domain Handlers**: Specialized processors for each data type
  - Articles Handler (PubMed/PubTator3)
  - Trials Handler (ClinicalTrials.gov, NCI)
  - Variants Handler (MyVariant.info)
  - Genes Handler (MyGene.info)

### External APIs

- **PubMed/PubTator3**: Biomedical literature
- **ClinicalTrials.gov**: US clinical trials registry
- **NCI CTS API**: National Cancer Institute trials
- **MyVariant.info**: Genetic variant annotations
- **MyGene.info**: Gene information
- **cBioPortal**: Cancer genomics data
- **AlphaGenome**: Variant effect predictions

## Data Flow Architecture

1. **User Request**: Query submitted via CLI, Claude, or SDK
2. **Cache Check**: System checks for cached results
3. **API Request**: If cache miss, fetch from external API
4. **Result Processing**: Normalize and enrich data
5. **Cache Storage**: Store results for future use
6. **Response Delivery**: Return formatted results to user

## Key Workflows

### Search Workflow

1. **Think Tool**: Plan search strategy
2. **Execute Search**: Query relevant data sources
3. **Enrich Results**: Add contextual information
4. **Combine Data**: Merge results from multiple sources
5. **Format Output**: Present in user-friendly format

### Article Search Pipeline

1. **Query Processing**: Parse user input
2. **Entity Recognition**: Normalize gene/disease names
3. **PubTator3 Search**: Query literature database
4. **Preprint Integration**: Include bioRxiv/medRxiv if enabled
5. **cBioPortal Enrichment**: Add cancer genomics data for genes
6. **Result Merging**: Combine all data sources

### Clinical Trial Matching

1. **Patient Profile**: Parse eligibility criteria
2. **Location Filter**: Geographic constraints
3. **Molecular Profile**: Mutation requirements
4. **Prior Treatments**: Treatment history matching
5. **Scoring Algorithm**: Rank trials by relevance
6. **Contact Extraction**: Retrieve site information

### Variant Interpretation

1. **Input Parsing**: Process VCF/MAF files
2. **Batch Processing**: Group variants efficiently
3. **Annotation Gathering**:
   - Clinical significance from MyVariant.info
   - Population frequency data
   - In silico predictions
   - Literature evidence
   - Clinical trial associations
4. **AlphaGenome Integration**: Regulatory predictions (optional)
5. **Tier Classification**: Categorize by clinical relevance
6. **Report Generation**: Create interpretation summary

## Architecture Patterns

### Caching Strategy

- **Multi-tier Cache**: Memory → Disk → External
- **Smart TTL**: Domain-specific expiration times
- **Cache Key Generation**: Include all query parameters
- **Invalidation Logic**: Clear on errors or updates

### Error Handling

- **Retry Logic**: Exponential backoff for transient errors
- **Rate Limiting**: Respect API limits with queuing
- **Graceful Degradation**: Return partial results when possible
- **Clear Error Messages**: Help users troubleshoot issues

### Authentication Flow

1. Check for user-provided API key
2. Fall back to environment variable
3. Use public access if no key available
4. Handle authentication errors gracefully

### Performance Optimization

- **Request Batching**: Combine multiple queries
- **Parallel Execution**: Concurrent API calls
- **Connection Pooling**: Reuse HTTP connections
- **Result Streaming**: Return data as available

## Deployment Options

### Local Development

- Single process with in-memory cache
- Direct file system access
- Simple configuration

### Docker Deployment

- Containerized application
- Volume-mounted cache
- Environment-based configuration

### Cloud Deployment

- Load-balanced instances
- Shared Redis cache
- Auto-scaling capabilities
- Monitoring integration

## Creating Documentation Diagrams

For visual diagrams, we recommend:

1. **ASCII Art**: Universal compatibility

   - Use tools like asciiflow.com
   - Store in `docs/assets/` directory

2. **Screenshots**: For complex UIs

   - Annotate with arrows/labels
   - Save as PNG in `docs/assets/`

3. **External Tools**:
   - draw.io for flowcharts
   - Lucidchart for professional diagrams
   - Export as static images

## ASCII System Architecture

```
┌─────────────────────────────────────────────────────────────────────────┐
│                              USER INTERFACES                             │
├────────────────┬───────────────────┬───────────────┬───────────────────┤
│                │                   │               │                   │
│   CLI Tool     │  Claude Desktop   │  Python SDK   │   Custom Client   │
│  (biomcp)      │   (MCP Client)    │   (async)     │    (your app)     │
│                │                   │               │                   │
└────────┬───────┴─────────┬─────────┴───────┬───────┴───────────┬───────┘
         │                 │                 │                   │
         └─────────────────┴─────────────────┴───────────────────┘
                                    │
                                    ▼
┌─────────────────────────────────────────────────────────────────────────┐
│                            BioMCP CORE SERVER                            │
├─────────────────────────────────────────────────────────────────────────┤
│                                                                         │
│  ┌─────────────┐  ┌──────────────┐  ┌──────────────┐  ┌────────────┐  │
│  │   Router    │  │ Rate Limiter │  │ Cache Manager│  │   Logger   │  │
│  │             │  │              │  │              │  │            │  │
│  └──────┬──────┘  └──────────────┘  └──────────────┘  └────────────┘  │
│         │                                                               │
│         ▼                                                               │
│  ┌─────────────────────────────────────────────────────────────────┐   │
│  │                      Domain Handlers                             │   │
│  ├─────────────┬─────────────┬─────────────┬──────────────────────┤   │
│  │  Articles   │   Trials    │  Variants   │  Genes/Drugs/Disease │   │
│  │  Handler    │   Handler   │  Handler    │      Handler         │   │
│  └──────┬──────┴──────┬──────┴──────┬──────┴──────────┬───────────┘   │
│         │             │             │                 │                 │
└─────────┼─────────────┼─────────────┼─────────────────┼─────────────────┘
          │             │             │                 │
          ▼             ▼             ▼                 ▼
┌─────────────────────────────────────────────────────────────────────────┐
│                          EXTERNAL DATA SOURCES                           │
├─────────────┬─────────────┬─────────────┬──────────────────────────────┤
│             │             │             │                              │
│  PubMed/    │ Clinical    │ MyVariant   │        BioThings Suite       │
│  PubTator3  │ Trials.gov  │   .info     │  (MyGene/MyDisease/MyChem)  │
│             │    + NCI    │             │                              │
│             │             │             │                              │
├─────────────┴─────────────┴─────────────┴──────────────────────────────┤
│                                                                         │
│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐                 │
│  │  cBioPortal  │  │  AlphaGenome │  │  Europe PMC  │                 │
│  │   (Cancer)   │  │ (Predictions)│  │  (Preprints) │                 │
│  └──────────────┘  └──────────────┘  └──────────────┘                 │
│                                                                         │
└─────────────────────────────────────────────────────────────────────────┘
```

See also: [Quick Architecture Reference](quick-architecture.md)

## Next Steps

- View the [Quick Architecture Guide](quick-architecture.md) for a concise overview
- Check [Developer Guides](../developer-guides/01-server-deployment.md) for implementation details
- See [API Reference](../apis/overview.md) for detailed specifications

```

--------------------------------------------------------------------------------
/tests/tdd/test_circuit_breaker.py:
--------------------------------------------------------------------------------

```python
"""Tests for circuit breaker pattern."""

import asyncio

import pytest

from biomcp.circuit_breaker import (
    CircuitBreaker,
    CircuitBreakerConfig,
    CircuitBreakerError,
    CircuitState,
    circuit_breaker,
    get_circuit_breaker,
)


class CircuitBreakerTestException(Exception):
    """Test exception for circuit breaker tests."""

    pass


class IgnoredException(Exception):
    """Exception that should be ignored by circuit breaker."""

    pass


@pytest.mark.asyncio
async def test_circuit_breaker_closed_state():
    """Test circuit breaker in closed state allows calls."""
    breaker = CircuitBreaker("test_closed")
    call_count = 0

    async def test_func():
        nonlocal call_count
        call_count += 1
        return "success"

    # Should allow calls in closed state
    assert breaker.is_closed
    result = await breaker.call(test_func)
    assert result == "success"
    assert call_count == 1


@pytest.mark.asyncio
async def test_circuit_breaker_opens_on_threshold():
    """Test circuit breaker opens after failure threshold."""
    config = CircuitBreakerConfig(
        failure_threshold=3,
        expected_exception=CircuitBreakerTestException,
    )
    breaker = CircuitBreaker("test_threshold", config)

    async def failing_func():
        raise CircuitBreakerTestException("Test failure")

    # First 2 failures should pass through
    for _i in range(2):
        with pytest.raises(CircuitBreakerTestException):
            await breaker.call(failing_func)
        assert breaker.is_closed

    # Third failure should open the circuit
    with pytest.raises(CircuitBreakerTestException):
        await breaker.call(failing_func)
    assert breaker.is_open

    # Subsequent calls should fail fast
    with pytest.raises(CircuitBreakerError):
        await breaker.call(failing_func)


@pytest.mark.asyncio
async def test_circuit_breaker_half_open_recovery():
    """Test circuit breaker recovery through half-open state."""
    config = CircuitBreakerConfig(
        failure_threshold=2,
        recovery_timeout=0.1,  # 100ms for testing
        success_threshold=2,
    )
    breaker = CircuitBreaker("test_recovery", config)

    call_count = 0
    should_fail = True

    async def test_func():
        nonlocal call_count
        call_count += 1
        if should_fail:
            raise CircuitBreakerTestException("Failure")
        return "success"

    # Open the circuit
    for _ in range(2):
        with pytest.raises(CircuitBreakerTestException):
            await breaker.call(test_func)
    assert breaker.is_open

    # Wait for recovery timeout
    await asyncio.sleep(0.15)

    # Next call should attempt (half-open state)
    should_fail = False
    result = await breaker.call(test_func)
    assert result == "success"
    assert breaker.state == CircuitState.HALF_OPEN

    # Need one more success to close
    result = await breaker.call(test_func)
    assert result == "success"
    assert breaker.is_closed


@pytest.mark.asyncio
async def test_circuit_breaker_half_open_failure():
    """Test circuit breaker reopens on failure in half-open state."""
    config = CircuitBreakerConfig(
        failure_threshold=2,
        recovery_timeout=0.1,
    )
    breaker = CircuitBreaker("test_half_open_fail", config)

    async def failing_func():
        raise CircuitBreakerTestException("Failure")

    # Open the circuit
    for _ in range(2):
        with pytest.raises(CircuitBreakerTestException):
            await breaker.call(failing_func)
    assert breaker.is_open

    # Wait for recovery timeout
    await asyncio.sleep(0.15)

    # Failure in half-open should reopen immediately
    with pytest.raises(CircuitBreakerTestException):
        await breaker.call(failing_func)
    assert breaker.is_open


@pytest.mark.asyncio
async def test_circuit_breaker_ignored_exceptions():
    """Test that certain exceptions don't trigger circuit breaker."""
    config = CircuitBreakerConfig(
        failure_threshold=2,
        expected_exception=Exception,
        exclude_exceptions=(IgnoredException,),
    )
    breaker = CircuitBreaker("test_ignored", config)

    async def func_with_ignored_exception():
        raise IgnoredException("Should be ignored")

    # These exceptions shouldn't count
    for _ in range(5):
        with pytest.raises(IgnoredException):
            await breaker.call(func_with_ignored_exception)
        assert breaker.is_closed


@pytest.mark.asyncio
async def test_circuit_breaker_reset():
    """Test manual reset of circuit breaker."""
    config = CircuitBreakerConfig(failure_threshold=1)
    breaker = CircuitBreaker("test_reset", config)

    async def failing_func():
        raise CircuitBreakerTestException("Failure")

    # Open the circuit
    with pytest.raises(CircuitBreakerTestException):
        await breaker.call(failing_func)
    assert breaker.is_open

    # Manual reset
    await breaker.reset()
    assert breaker.is_closed

    # Should allow calls again
    async def success_func():
        return "success"

    result = await breaker.call(success_func)
    assert result == "success"


@pytest.mark.asyncio
async def test_circuit_breaker_decorator():
    """Test circuit breaker decorator."""
    call_count = 0

    @circuit_breaker(
        "test_decorator", CircuitBreakerConfig(failure_threshold=2)
    )
    async def decorated_func(should_fail=False):
        nonlocal call_count
        call_count += 1
        if should_fail:
            raise CircuitBreakerTestException("Failure")
        return "success"

    # Success calls
    result = await decorated_func()
    assert result == "success"

    # Open circuit with failures
    for _ in range(2):
        with pytest.raises(CircuitBreakerTestException):
            await decorated_func(should_fail=True)

    # Circuit should be open
    with pytest.raises(CircuitBreakerError):
        await decorated_func()


def test_get_circuit_breaker():
    """Test getting circuit breaker from registry."""
    # First call creates breaker
    breaker1 = get_circuit_breaker("test_registry")
    assert breaker1.name == "test_registry"

    # Second call returns same instance
    breaker2 = get_circuit_breaker("test_registry")
    assert breaker1 is breaker2

    # Different name creates different breaker
    breaker3 = get_circuit_breaker("test_registry_2")
    assert breaker3 is not breaker1


@pytest.mark.asyncio
async def test_circuit_breaker_concurrent_calls():
    """Test circuit breaker handles concurrent calls correctly."""
    config = CircuitBreakerConfig(
        failure_threshold=5,
        expected_exception=CircuitBreakerTestException,
    )
    breaker = CircuitBreaker("test_concurrent", config)

    failure_count = 0

    async def failing_func():
        nonlocal failure_count
        failure_count += 1
        if failure_count <= 5:
            raise CircuitBreakerTestException("Failure")
        return "success"

    # Run concurrent failing calls
    tasks = []
    for _ in range(10):
        tasks.append(breaker.call(failing_func))

    results = await asyncio.gather(*tasks, return_exceptions=True)

    # Should have some CircuitBreakerTestExceptions and some CircuitBreakerErrors
    test_exceptions = sum(
        1 for r in results if isinstance(r, CircuitBreakerTestException)
    )
    breaker_errors = sum(
        1 for r in results if isinstance(r, CircuitBreakerError)
    )

    # At least failure_threshold CircuitBreakerTestExceptions
    assert test_exceptions >= config.failure_threshold
    # Some calls should have been blocked
    assert breaker_errors > 0
    # Circuit should be open
    assert breaker.is_open


@pytest.mark.asyncio
async def test_circuit_breaker_success_resets_failures():
    """Test that successes reset failure count in closed state."""
    config = CircuitBreakerConfig(failure_threshold=3)
    breaker = CircuitBreaker("test_success_reset", config)

    async def sometimes_failing_func(fail=False):
        if fail:
            raise CircuitBreakerTestException("Failure")
        return "success"

    # Two failures
    for _ in range(2):
        with pytest.raises(CircuitBreakerTestException):
            await breaker.call(sometimes_failing_func, fail=True)

    # Success should reset failure count
    result = await breaker.call(sometimes_failing_func, fail=False)
    assert result == "success"
    assert breaker.is_closed

    # Can now fail 2 more times without opening
    for _ in range(2):
        with pytest.raises(CircuitBreakerTestException):
            await breaker.call(sometimes_failing_func, fail=True)
    assert breaker.is_closed

```
Page 4/15FirstPrevNextLast