genomoncology/biomcp # codebase.md

This is page 10 of 19. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .github
│   ├── actions
│   │   └── setup-python-env
│   │       └── action.yml
│   ├── dependabot.yml
│   └── workflows
│       ├── ci.yml
│       ├── deploy-docs.yml
│       ├── main.yml.disabled
│       ├── on-release-main.yml
│       └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── apis
│   │   ├── error-codes.md
│   │   ├── overview.md
│   │   └── python-sdk.md
│   ├── assets
│   │   ├── biomcp-cursor-locations.png
│   │   ├── favicon.ico
│   │   ├── icon.png
│   │   ├── logo.png
│   │   ├── mcp_architecture.txt
│   │   └── remote-connection
│   │       ├── 00_connectors.png
│   │       ├── 01_add_custom_connector.png
│   │       ├── 02_connector_enabled.png
│   │       ├── 03_connect_to_biomcp.png
│   │       ├── 04_select_google_oauth.png
│   │       └── 05_success_connect.png
│   ├── backend-services-reference
│   │   ├── 01-overview.md
│   │   ├── 02-biothings-suite.md
│   │   ├── 03-cbioportal.md
│   │   ├── 04-clinicaltrials-gov.md
│   │   ├── 05-nci-cts-api.md
│   │   ├── 06-pubtator3.md
│   │   └── 07-alphagenome.md
│   ├── blog
│   │   ├── ai-assisted-clinical-trial-search-analysis.md
│   │   ├── images
│   │   │   ├── deep-researcher-video.png
│   │   │   ├── researcher-announce.png
│   │   │   ├── researcher-drop-down.png
│   │   │   ├── researcher-prompt.png
│   │   │   ├── trial-search-assistant.png
│   │   │   └── what_is_biomcp_thumbnail.png
│   │   └── researcher-persona-resource.md
│   ├── changelog.md
│   ├── CNAME
│   ├── concepts
│   │   ├── 01-what-is-biomcp.md
│   │   ├── 02-the-deep-researcher-persona.md
│   │   └── 03-sequential-thinking-with-the-think-tool.md
│   ├── developer-guides
│   │   ├── 01-server-deployment.md
│   │   ├── 02-contributing-and-testing.md
│   │   ├── 03-third-party-endpoints.md
│   │   ├── 04-transport-protocol.md
│   │   ├── 05-error-handling.md
│   │   ├── 06-http-client-and-caching.md
│   │   ├── 07-performance-optimizations.md
│   │   └── generate_endpoints.py
│   ├── faq-condensed.md
│   ├── FDA_SECURITY.md
│   ├── genomoncology.md
│   ├── getting-started
│   │   ├── 01-quickstart-cli.md
│   │   ├── 02-claude-desktop-integration.md
│   │   └── 03-authentication-and-api-keys.md
│   ├── how-to-guides
│   │   ├── 01-find-articles-and-cbioportal-data.md
│   │   ├── 02-find-trials-with-nci-and-biothings.md
│   │   ├── 03-get-comprehensive-variant-annotations.md
│   │   ├── 04-predict-variant-effects-with-alphagenome.md
│   │   ├── 05-logging-and-monitoring-with-bigquery.md
│   │   └── 06-search-nci-organizations-and-interventions.md
│   ├── index.md
│   ├── policies.md
│   ├── reference
│   │   ├── architecture-diagrams.md
│   │   ├── quick-architecture.md
│   │   ├── quick-reference.md
│   │   └── visual-architecture.md
│   ├── robots.txt
│   ├── stylesheets
│   │   ├── announcement.css
│   │   └── extra.css
│   ├── troubleshooting.md
│   ├── tutorials
│   │   ├── biothings-prompts.md
│   │   ├── claude-code-biomcp-alphagenome.md
│   │   ├── nci-prompts.md
│   │   ├── openfda-integration.md
│   │   ├── openfda-prompts.md
│   │   ├── pydantic-ai-integration.md
│   │   └── remote-connection.md
│   ├── user-guides
│   │   ├── 01-command-line-interface.md
│   │   ├── 02-mcp-tools-reference.md
│   │   └── 03-integrating-with-ides-and-clients.md
│   └── workflows
│       └── all-workflows.md
├── example_scripts
│   ├── mcp_integration.py
│   └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│   ├── check_docs_in_mkdocs.py
│   ├── check_http_imports.py
│   └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│   └── biomcp
│       ├── __init__.py
│       ├── __main__.py
│       ├── articles
│       │   ├── __init__.py
│       │   ├── autocomplete.py
│       │   ├── fetch.py
│       │   ├── preprints.py
│       │   ├── search_optimized.py
│       │   ├── search.py
│       │   └── unified.py
│       ├── biomarkers
│       │   ├── __init__.py
│       │   └── search.py
│       ├── cbioportal_helper.py
│       ├── circuit_breaker.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── articles.py
│       │   ├── biomarkers.py
│       │   ├── diseases.py
│       │   ├── health.py
│       │   ├── interventions.py
│       │   ├── main.py
│       │   ├── openfda.py
│       │   ├── organizations.py
│       │   ├── server.py
│       │   ├── trials.py
│       │   └── variants.py
│       ├── connection_pool.py
│       ├── constants.py
│       ├── core.py
│       ├── diseases
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── domain_handlers.py
│       ├── drugs
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── exceptions.py
│       ├── genes
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── http_client_simple.py
│       ├── http_client.py
│       ├── individual_tools.py
│       ├── integrations
│       │   ├── __init__.py
│       │   ├── biothings_client.py
│       │   └── cts_api.py
│       ├── interventions
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── logging_filter.py
│       ├── metrics_handler.py
│       ├── metrics.py
│       ├── openfda
│       │   ├── __init__.py
│       │   ├── adverse_events_helpers.py
│       │   ├── adverse_events.py
│       │   ├── cache.py
│       │   ├── constants.py
│       │   ├── device_events_helpers.py
│       │   ├── device_events.py
│       │   ├── drug_approvals.py
│       │   ├── drug_labels_helpers.py
│       │   ├── drug_labels.py
│       │   ├── drug_recalls_helpers.py
│       │   ├── drug_recalls.py
│       │   ├── drug_shortages_detail_helpers.py
│       │   ├── drug_shortages_helpers.py
│       │   ├── drug_shortages.py
│       │   ├── exceptions.py
│       │   ├── input_validation.py
│       │   ├── rate_limiter.py
│       │   ├── utils.py
│       │   └── validation.py
│       ├── organizations
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── parameter_parser.py
│       ├── prefetch.py
│       ├── query_parser.py
│       ├── query_router.py
│       ├── rate_limiter.py
│       ├── render.py
│       ├── request_batcher.py
│       ├── resources
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── instructions.md
│       │   └── researcher.md
│       ├── retry.py
│       ├── router_handlers.py
│       ├── router.py
│       ├── shared_context.py
│       ├── thinking
│       │   ├── __init__.py
│       │   ├── sequential.py
│       │   └── session.py
│       ├── thinking_tool.py
│       ├── thinking_tracker.py
│       ├── trials
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── nci_getter.py
│       │   ├── nci_search.py
│       │   └── search.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cancer_types_api.py
│       │   ├── cbio_http_adapter.py
│       │   ├── endpoint_registry.py
│       │   ├── gene_validator.py
│       │   ├── metrics.py
│       │   ├── mutation_filter.py
│       │   ├── query_utils.py
│       │   ├── rate_limiter.py
│       │   └── request_cache.py
│       ├── variants
│       │   ├── __init__.py
│       │   ├── alphagenome.py
│       │   ├── cancer_types.py
│       │   ├── cbio_external_client.py
│       │   ├── cbioportal_mutations.py
│       │   ├── cbioportal_search_helpers.py
│       │   ├── cbioportal_search.py
│       │   ├── constants.py
│       │   ├── external.py
│       │   ├── filters.py
│       │   ├── getter.py
│       │   ├── links.py
│       │   └── search.py
│       └── workers
│           ├── __init__.py
│           ├── worker_entry_stytch.js
│           ├── worker_entry.js
│           └── worker.py
├── tests
│   ├── bdd
│   │   ├── cli_help
│   │   │   ├── help.feature
│   │   │   └── test_help.py
│   │   ├── conftest.py
│   │   ├── features
│   │   │   └── alphagenome_integration.feature
│   │   ├── fetch_articles
│   │   │   ├── fetch.feature
│   │   │   └── test_fetch.py
│   │   ├── get_trials
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── get_variants
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── search_articles
│   │   │   ├── autocomplete.feature
│   │   │   ├── search.feature
│   │   │   ├── test_autocomplete.py
│   │   │   └── test_search.py
│   │   ├── search_trials
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   ├── search_variants
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   └── steps
│   │       └── test_alphagenome_steps.py
│   ├── config
│   │   └── test_smithery_config.py
│   ├── conftest.py
│   ├── data
│   │   ├── ct_gov
│   │   │   ├── clinical_trials_api_v2.yaml
│   │   │   ├── trials_NCT04280705.json
│   │   │   └── trials_NCT04280705.txt
│   │   ├── myvariant
│   │   │   ├── myvariant_api.yaml
│   │   │   ├── myvariant_field_descriptions.csv
│   │   │   ├── variants_full_braf_v600e.json
│   │   │   ├── variants_full_braf_v600e.txt
│   │   │   └── variants_part_braf_v600_multiple.json
│   │   ├── openfda
│   │   │   ├── drugsfda_detail.json
│   │   │   ├── drugsfda_search.json
│   │   │   ├── enforcement_detail.json
│   │   │   └── enforcement_search.json
│   │   └── pubtator
│   │       ├── pubtator_autocomplete.json
│   │       └── pubtator3_paper.txt
│   ├── integration
│   │   ├── test_openfda_integration.py
│   │   ├── test_preprints_integration.py
│   │   ├── test_simple.py
│   │   └── test_variants_integration.py
│   ├── tdd
│   │   ├── articles
│   │   │   ├── test_autocomplete.py
│   │   │   ├── test_cbioportal_integration.py
│   │   │   ├── test_fetch.py
│   │   │   ├── test_preprints.py
│   │   │   ├── test_search.py
│   │   │   └── test_unified.py
│   │   ├── conftest.py
│   │   ├── drugs
│   │   │   ├── __init__.py
│   │   │   └── test_drug_getter.py
│   │   ├── openfda
│   │   │   ├── __init__.py
│   │   │   ├── test_adverse_events.py
│   │   │   ├── test_device_events.py
│   │   │   ├── test_drug_approvals.py
│   │   │   ├── test_drug_labels.py
│   │   │   ├── test_drug_recalls.py
│   │   │   ├── test_drug_shortages.py
│   │   │   └── test_security.py
│   │   ├── test_biothings_integration_real.py
│   │   ├── test_biothings_integration.py
│   │   ├── test_circuit_breaker.py
│   │   ├── test_concurrent_requests.py
│   │   ├── test_connection_pool.py
│   │   ├── test_domain_handlers.py
│   │   ├── test_drug_approvals.py
│   │   ├── test_drug_recalls.py
│   │   ├── test_drug_shortages.py
│   │   ├── test_endpoint_documentation.py
│   │   ├── test_error_scenarios.py
│   │   ├── test_europe_pmc_fetch.py
│   │   ├── test_mcp_integration.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_metrics.py
│   │   ├── test_nci_integration.py
│   │   ├── test_nci_mcp_tools.py
│   │   ├── test_network_policies.py
│   │   ├── test_offline_mode.py
│   │   ├── test_openfda_unified.py
│   │   ├── test_pten_r173_search.py
│   │   ├── test_render.py
│   │   ├── test_request_batcher.py.disabled
│   │   ├── test_retry.py
│   │   ├── test_router.py
│   │   ├── test_shared_context.py.disabled
│   │   ├── test_unified_biothings.py
│   │   ├── thinking
│   │   │   ├── __init__.py
│   │   │   └── test_sequential.py
│   │   ├── trials
│   │   │   ├── test_backward_compatibility.py
│   │   │   ├── test_getter.py
│   │   │   └── test_search.py
│   │   ├── utils
│   │   │   ├── test_gene_validator.py
│   │   │   ├── test_mutation_filter.py
│   │   │   ├── test_rate_limiter.py
│   │   │   └── test_request_cache.py
│   │   ├── variants
│   │   │   ├── constants.py
│   │   │   ├── test_alphagenome_api_key.py
│   │   │   ├── test_alphagenome_comprehensive.py
│   │   │   ├── test_alphagenome.py
│   │   │   ├── test_cbioportal_mutations.py
│   │   │   ├── test_cbioportal_search.py
│   │   │   ├── test_external_integration.py
│   │   │   ├── test_external.py
│   │   │   ├── test_extract_gene_aa_change.py
│   │   │   ├── test_filters.py
│   │   │   ├── test_getter.py
│   │   │   ├── test_links.py
│   │   │   └── test_search.py
│   │   └── workers
│   │       └── test_worker_sanitization.js
│   └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```

# Files

--------------------------------------------------------------------------------
/src/biomcp/resources/researcher.md:
--------------------------------------------------------------------------------

```markdown
  1 | # BioMCP Biomedical Research Assistant
  2 | 
  3 | ## Goals & Personality
  4 | 
  5 | - **Mission:** Produce rigorous, source-grounded biomedical research briefs using the BioMCP tool suite.
  6 | - **Voice:** Professional, concise, transparent; always cites evidence.
  7 | - **Key Traits:**
  8 |   _Agentic_: autonomously plans, executes, and critiques.
  9 |   _Self-critical_: excludes for gaps, bias, stale or low-quality sources.
 10 |   _Interactive_: provides clear updates on progress through the steps.
 11 |   _Safety-first_: never invents data; flags uncertainty and unsupported claims.
 12 | 
 13 | **Default recency horizon:** Review evidence published ≤5 years unless user specifies otherwise.
 14 | 
 15 | ## Available Tools
 16 | 
 17 | | Category       | Tool                      | Purpose                                      |
 18 | | -------------- | ------------------------- | -------------------------------------------- |
 19 | | **Trials**     | `trial_searcher`          | Find trials by advanced search               |
 20 | |                | `trial_protocol_getter`   | Retrieve full study design details           |
 21 | |                | `trial_locations_getter`  | List recruiting sites                        |
 22 | |                | `trial_outcomes_getter`   | Fetch results & endpoints (if available)     |
 23 | |                | `trial_references_getter` | Get linked publications for a trial          |
 24 | | **Literature** | `article_searcher`        | Query biomedical papers (PubMed + preprints) |
 25 | |                | `article_getter`          | Full metadata & abstracts/full text          |
 26 | | **Genomics**   | `variant_searcher`        | Locate variants with filters                 |
 27 | |                | `variant_getter`          | Comprehensive annotations                    |
 28 | | **Planning**   | `think`                   | Structured think-plan-reflect steps          |
 29 | | **Unified**    | `search`                  | Cross-domain search with query language      |
 30 | |                | `fetch`                   | Retrieve detailed records from any domain    |
 31 | | **Generic**    | `web_search`              | For initial scoping & term discovery         |
 32 | | **Artifacts**  | `artifacts`               | For creating final research briefs           |
 33 | 
 34 | ## MANDATORY: Use the 'think' Tool for ALL Research Tasks
 35 | 
 36 | **CRITICAL REQUIREMENT:** You MUST use the `think` tool as your PRIMARY reasoning mechanism throughout ALL biomedical research tasks. This is NOT optional.
 37 | 
 38 | 🚨 **ENFORCEMENT RULES:**
 39 | 
 40 | - **Start IMMEDIATELY:** You MUST call 'think' BEFORE any other BioMCP tool
 41 | - **Use CONTINUOUSLY:** Invoke 'think' before, during, and after each tool call
 42 | - **Track EVERYTHING:** Document findings, reasoning, and synthesis in sequential thoughts
 43 | - **Only STOP when complete:** Set nextThoughtNeeded=false only after full analysis
 44 | 
 45 | ⚠️ **WARNING:** Failure to use 'think' first will compromise research quality!
 46 | 
 47 | ## Sequential Thinking - 10-Step Process
 48 | 
 49 | You **MUST** invoke the `think` tool for the entire workflow and progress through all 10 steps in sequential order. Each step should involve multiple 'think' calls. If user explicitly requests to skip tool use (e.g., "Don't search"), adapt the process accordingly.
 50 | 
 51 | ### Step 1: Topic Scoping & Domain Framework
 52 | 
 53 | Goal: Create a comprehensive framework to ensure complete coverage of all relevant aspects.
 54 | 
 55 | - Identify domains relevant to the topic (e.g., therapeutic modalities, diagnostic approaches, risk factors) based on the user's query
 56 | - Aim for 4-8 domains unless topic complexity justifies more
 57 | - Consider including a "Contextual Factors" domain for health economics, patient-reported outcomes, or health-systems impact when relevant
 58 | - Identify appropriate subdivisions (e.g., subtypes, patient cohorts, disease stages) based on the user's query
 59 | - Use brainstorming + quick web searches (e.g., "[topic] categories," "[topic] taxonomy") to draft a "Domain Checklist"
 60 | - Create a Domain × Subdivision matrix of appropriate size to track evidence coverage
 61 | - Initialize an **internal coverage matrix** in your sequential_thinking thoughts. Update that matrix in Steps 6, 7, and 8
 62 | - Define your task-specific research framework based on the clinical question type:
 63 |   - Therapeutic questions: Use PICO (Population, Intervention, Comparator, Outcome)
 64 |   - Diagnostic questions: Use PIRD (Population, Index test, Reference standard, Diagnosis)
 65 |   - Prognostic questions: Use PECO (Population, Exposure, Comparator, Outcome)
 66 |   - Epidemiological questions: Use PIRT (Population, Indicator, Reference, Time)
 67 | - Define initial research plan, todo list, and success criteria checklist
 68 | - Determine appropriate tool selection based on question type:
 69 |   - `trial_*` tools: For therapeutic or interventional questions
 70 |   - `article_*` tools: For all questions
 71 |   - `variant_*` tools: Only when the query involves genetic or biomarker questions
 72 | 
 73 | ### Step 2: Initial Information Gathering
 74 | 
 75 | Goal: Establish baseline terminology, modalities, and recent developments.
 76 | 
 77 | - Run at least one targeted `web_search` per domain on your Domain × Subdivision matrix
 78 | - If matrix is large, batch searches by grouping similar domains or prioritize by relevance
 79 | - Generate domain-specific search strings appropriate to the topic
 80 | - Invoke regulatory searches only when the user explicitly requests approval or guideline information or when the topic focuses on therapeutic interventions
 81 | - Maintain an **internal Regulatory Log** in your sequential_thinking thoughts if relevant to the query
 82 | - Prioritize authoritative sources but don't exclude other relevant sources
 83 | - Include relevant regulatory and guideline updates from the past 24 months if applicable
 84 | 
 85 | ### Step 3: Focused & Frontier Retrieval
 86 | 
 87 | Goal: Fill knowledge gaps and identify cutting-edge developments.
 88 | 
 89 | - Run targeted `web_search` calls for any empty cells in your Domain × Subdivision matrix
 90 | - Conduct subdivision-focused searches for each identified classification
 91 | - Document high-value URLs and sources
 92 | - Identify specific gaps requiring specialized database searches
 93 | - Simultaneously conduct frontier scan:
 94 |   - Run targeted searches restricted to past 12 months with keywords: "emerging," "novel," "breakthrough," "future directions" + topic
 95 |   - Include appropriate site filters for the domain and topic
 96 |   - Search for conference proceedings, pre-prints, and non-peer-reviewed sources for very recent developments
 97 |   - Document these findings separately, clearly labeled as early-stage evidence
 98 | 
 99 | ### Step 4: Primary Trials Analysis
100 | 
101 | Goal: Identify and analyze key clinical trials.
102 | 
103 | - For therapeutic or interventional questions, run `trial_searcher` with filters based on Step 3 gaps
104 | - For other question types, skip to Step 5 or use `trial_searcher` only if directly relevant
105 | - Select a manageable number of trials per major domain (typically 3-5), adjusting as needed for question complexity
106 | - Retrieve full details using appropriate trial tools
107 | - For each trial, capture relevant metadata and outcomes based on the research question
108 | - Create structured evidence table with appropriate framework elements and results
109 | 
110 | ### Step 5: Primary Literature Analysis
111 | 
112 | Goal: Identify and analyze pivotal publications.
113 | 
114 | - Run `article_searcher` for recent reviews, meta-analyses, and guidelines relevant to the topic
115 |   - **TIP:** Use OR logic with pipe separators for variant notations: `keywords=["R173|Arg173|p.R173"]`
116 |   - **TIP:** Combine synonyms for better coverage: `keywords=["immunotherapy|checkpoint inhibitor|PD-1"]`
117 |   - **NOTE:** Preprints from bioRxiv/medRxiv are included by default
118 |   - **NOTE:** cBioPortal cancer genomics data is automatically included for gene-based searches
119 | - Select highest-quality sources and retrieve full details using `article_details`
120 | - For each source, capture appropriate metadata and findings relevant to the research question
121 | - Extract study designs, cohort sizes, outcomes, and limitations as appropriate
122 | - Create evidence table for articles with relevant identifiers and key findings
123 | 
124 | ### Step 6: Initial Evidence Synthesis
125 | 
126 | Goal: Create preliminary framework of findings and identify gaps.
127 | 
128 | - Merge trial and article evidence tables
129 | - Check WIP findings against initial plan and success criteria checklist
130 | - Categorize findings by domains from your matrix
131 | - Apply CRAAP assessment to each source
132 | - Flag any claim that relies solely on grey literature; mark with '[GL]' in evidence table
133 | - Identify contradictions and knowledge gaps
134 | - Draft evidence matrix with categorization
135 | - For each domain/finding, categorize as: Established, Emerging, Experimental, Theoretical, or Retired (for approaches shown ineffective)
136 | - Update the internal coverage matrix in your thoughts; ensure those indicators appear in the Findings tables
137 | - Create gap analysis for further searches
138 | 
139 | ### Step 7: Integrated Gap-Filling
140 | 
141 | Goal: Address identified knowledge gaps in a single integrated pass.
142 | 
143 | - Run additional database queries for missing categories as needed
144 | - Conduct additional searches to capture recent developments or resolve conflicts
145 | - Retrieve full details for new sources identified
146 | - Extract key data from all source types
147 | - Add column `Source Type` (Peer-review / Conf-abstract / Press-release / Preprint)
148 | - Integrate new findings into existing evidence tables
149 | - Update the internal coverage matrix in your thoughts
150 | - Update documentation of very recent developments
151 | 
152 | ### Step 8: Comprehensive Evidence Synthesis
153 | 
154 | Goal: Create final integrated framework of findings with quality assessment.
155 | 
156 | - Merge all evidence into a unified matrix
157 | - Grade evidence strength using GRADE anchors appropriate to the research question:
158 |   - High = Multiple high-quality studies or meta-analyses
159 |   - Moderate = Well-designed controlled studies without randomization
160 |   - Low = Observational studies
161 |   - Very Low = Case reports, expert opinion, pre-clinical studies
162 | - Draft conclusions for each domain with supporting evidence
163 | - Tag each domain with appropriate classification and recency information
164 | - Identify contradictory findings and limitations
165 | - Update the internal coverage matrix in your thoughts
166 | - Update claim-to-evidence mapping with confidence levels
167 | - Produce quantitative outcome summaries appropriate to the research question
168 | 
169 | ### Step 9: Self-Critique and Verification
170 | 
171 | Goal: Rigorously assess the quality and comprehensiveness of the analysis.
172 | 
173 | - Perform a systematic gap analysis:
174 |   - Check each Domain × Subdivision cell for evidence coverage
175 |   - Ensure recent developments are captured for each major domain
176 |   - Verify all key metrics and quantitative data are extracted where available
177 |   - Identify any conflicting evidence or perspectives
178 |   - Document at least 3 concrete gaps or weaknesses in the current evidence
179 | - Conduct verification searches to ensure no breaking news was missed
180 | - Assess potential biases in the analysis
181 | - Update final confidence assessments for key claims
182 | - Update documented limitations and potential biases
183 | - Update verification statement of currency
184 | 
185 | ### Step 10: Research Brief Creation
186 | 
187 | Goal: Produce the final deliverable with all required elements.
188 | 
189 | 1. Create a new _Research Brief_ artifact using the `artifacts` tool
190 | 2. Structure the Findings section to highlight novel developments first, organized by innovation level
191 | 3. Include inline citations linked to comprehensive reference list
192 | 4. Embed necessary tables (coverage matrix, regulatory log if applicable, quantitative outcomes) directly in the Markdown Research Brief
193 | 
194 | ## Final Research Brief Requirements
195 | 
196 | The final research brief must include:
197 | 
198 | - Executive summary ≤ 120 words (hard cap) with main conclusions and confidence levels
199 | - Background providing context and current standards
200 | - Methodology section detailing research approach
201 | - Findings section with properly cited evidence, organized by themes and innovation levels (Established, Emerging, Experimental, Theoretical, Retired)
202 | - Clear delineation of established facts vs. emerging concepts
203 | - Limitations section incorporating self-critique results
204 | - Future directions and implications section
205 | - Regulatory/approval status table where applicable (or state: "Not applicable to this topic")
206 | - Comprehensive reference list using Vancouver numeric style for inline citations; list sources in order of appearance
207 | - Domain × Subdivision Coverage Matrix (showing evidence density across domains)
208 | - Quantitative Outcomes Table for key sources (including Source Type column to maintain provenance visibility)
209 | 
```

--------------------------------------------------------------------------------
/src/biomcp/trials/nci_getter.py:
--------------------------------------------------------------------------------

```python
  1 | """NCI Clinical Trials Search API integration for getting trial details."""
  2 | 
  3 | import logging
  4 | from typing import Any
  5 | 
  6 | from ..constants import NCI_TRIALS_URL
  7 | from ..integrations.cts_api import CTSAPIError, make_cts_request
  8 | from ..organizations.getter import get_organization
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | async def get_trial_nci(
 14 |     nct_id: str,
 15 |     api_key: str | None = None,
 16 | ) -> dict[str, Any]:
 17 |     """
 18 |     Get detailed trial information from NCI CTS API.
 19 | 
 20 |     Args:
 21 |         nct_id: NCT identifier (e.g., "NCT04280705")
 22 |         api_key: Optional API key
 23 | 
 24 |     Returns:
 25 |         Dictionary with trial details
 26 |     """
 27 |     try:
 28 |         # Make API request
 29 |         url = f"{NCI_TRIALS_URL}/{nct_id}"
 30 |         response = await make_cts_request(
 31 |             url=url,
 32 |             api_key=api_key,
 33 |         )
 34 | 
 35 |         # Return the trial data
 36 |         if "data" in response:
 37 |             return response["data"]
 38 |         elif "trial" in response:
 39 |             return response["trial"]
 40 |         else:
 41 |             return response
 42 | 
 43 |     except CTSAPIError:
 44 |         raise
 45 |     except Exception as e:
 46 |         logger.error(f"Failed to get NCI trial {nct_id}: {e}")
 47 |         raise CTSAPIError(f"Failed to retrieve trial: {e!s}") from e
 48 | 
 49 | 
 50 | def _format_trial_header(trial: dict[str, Any]) -> list[str]:
 51 |     """Format trial header section."""
 52 |     nct_id = trial.get("nct_id", trial.get("protocol_id", "Unknown"))
 53 |     title = trial.get("official_title", trial.get("title", "Untitled"))
 54 |     brief_title = trial.get("brief_title", "")
 55 | 
 56 |     lines = [
 57 |         f"# Clinical Trial: {nct_id}",
 58 |         "",
 59 |         f"## {title}",
 60 |         "",
 61 |     ]
 62 | 
 63 |     if brief_title and brief_title != title:
 64 |         lines.append(f"**Brief Title**: {brief_title}")
 65 |         lines.append("")
 66 | 
 67 |     return lines
 68 | 
 69 | 
 70 | def _format_protocol_section(trial: dict[str, Any]) -> list[str]:
 71 |     """Format protocol information section."""
 72 |     lines = [
 73 |         "## Protocol Information",
 74 |         "",
 75 |         f"- **NCT ID**: {trial.get('nct_id', trial.get('protocol_id', 'Unknown'))}",
 76 |         f"- **Phase**: {trial.get('phase', 'Not specified')}",
 77 |         f"- **Status**: {trial.get('overall_status', 'Unknown')}",
 78 |         f"- **Study Type**: {trial.get('study_type', 'Not specified')}",
 79 |     ]
 80 | 
 81 |     if trial.get("primary_purpose"):
 82 |         lines.append(f"- **Primary Purpose**: {trial['primary_purpose']}")
 83 | 
 84 |     if trial.get("study_design"):
 85 |         design = trial["study_design"]
 86 |         if isinstance(design, dict):
 87 |             if design.get("allocation"):
 88 |                 lines.append(f"- **Allocation**: {design['allocation']}")
 89 |             if design.get("masking"):
 90 |                 lines.append(f"- **Masking**: {design['masking']}")
 91 |             if design.get("intervention_model"):
 92 |                 lines.append(
 93 |                     f"- **Intervention Model**: {design['intervention_model']}"
 94 |                 )
 95 |         else:
 96 |             lines.append(f"- **Study Design**: {design}")
 97 | 
 98 |     if trial.get("start_date"):
 99 |         lines.append(f"- **Start Date**: {trial['start_date']}")
100 |     if trial.get("completion_date"):
101 |         lines.append(f"- **Completion Date**: {trial['completion_date']}")
102 | 
103 |     lines.append("")
104 |     return lines
105 | 
106 | 
107 | def _format_summary_section(trial: dict[str, Any]) -> list[str]:
108 |     """Format summary section."""
109 |     lines = []
110 |     if trial.get("brief_summary") or trial.get("description"):
111 |         lines.extend([
112 |             "## Summary",
113 |             "",
114 |             trial.get("brief_summary", trial.get("description", "")),
115 |             "",
116 |         ])
117 |     return lines
118 | 
119 | 
120 | def _format_conditions_section(trial: dict[str, Any]) -> list[str]:
121 |     """Format conditions/diseases section."""
122 |     conditions = trial.get("diseases", trial.get("conditions", []))
123 |     if not conditions:
124 |         return []
125 | 
126 |     lines = ["## Conditions", ""]
127 |     if isinstance(conditions, list):
128 |         for condition in conditions:
129 |             lines.append(f"- {condition}")
130 |     else:
131 |         lines.append(f"- {conditions}")
132 |     lines.append("")
133 |     return lines
134 | 
135 | 
136 | def _format_interventions_section(trial: dict[str, Any]) -> list[str]:
137 |     """Format interventions section."""
138 |     interventions = trial.get("interventions", [])
139 |     if not interventions:
140 |         return []
141 | 
142 |     lines = ["## Interventions", ""]
143 |     for intervention in interventions:
144 |         if isinstance(intervention, dict):
145 |             name = intervention.get("name", "Unknown")
146 |             int_type = intervention.get("type", "")
147 |             desc = intervention.get("description", "")
148 | 
149 |             if int_type:
150 |                 lines.append(f"### {name} ({int_type})")
151 |             else:
152 |                 lines.append(f"### {name}")
153 | 
154 |             if desc:
155 |                 lines.append(desc)
156 |             lines.append("")
157 |         else:
158 |             lines.append(f"- {intervention}")
159 |     return lines
160 | 
161 | 
162 | def _format_eligibility_section(trial: dict[str, Any]) -> list[str]:
163 |     """Format eligibility criteria section."""
164 |     eligibility = trial.get("eligibility", {})
165 |     if not eligibility:
166 |         return []
167 | 
168 |     lines = ["## Eligibility Criteria", ""]
169 | 
170 |     # Basic eligibility info
171 |     min_age = eligibility.get("minimum_age")
172 |     max_age = eligibility.get("maximum_age")
173 |     if min_age or max_age:
174 |         age_str = []
175 |         if min_age:
176 |             age_str.append(f"Minimum: {min_age}")
177 |         if max_age:
178 |             age_str.append(f"Maximum: {max_age}")
179 |         lines.append(f"**Age**: {' | '.join(age_str)}")
180 | 
181 |     if eligibility.get("gender"):
182 |         lines.append(f"**Gender**: {eligibility['gender']}")
183 | 
184 |     if "accepts_healthy_volunteers" in eligibility:
185 |         accepts = "Yes" if eligibility["accepts_healthy_volunteers"] else "No"
186 |         lines.append(f"**Accepts Healthy Volunteers**: {accepts}")
187 | 
188 |     lines.append("")
189 | 
190 |     # Detailed criteria
191 |     if eligibility.get("inclusion_criteria"):
192 |         lines.extend([
193 |             "### Inclusion Criteria",
194 |             "",
195 |             eligibility["inclusion_criteria"],
196 |             "",
197 |         ])
198 | 
199 |     if eligibility.get("exclusion_criteria"):
200 |         lines.extend([
201 |             "### Exclusion Criteria",
202 |             "",
203 |             eligibility["exclusion_criteria"],
204 |             "",
205 |         ])
206 | 
207 |     return lines
208 | 
209 | 
210 | def _format_biomarker_section(trial: dict[str, Any]) -> list[str]:
211 |     """Format biomarker requirements section."""
212 |     biomarkers = trial.get("biomarkers", [])
213 |     if not biomarkers:
214 |         return []
215 | 
216 |     lines = ["## Biomarker Requirements", ""]
217 |     for biomarker in biomarkers:
218 |         if isinstance(biomarker, dict):
219 |             name = biomarker.get("name", "Unknown")
220 |             requirement = biomarker.get("requirement", "")
221 |             lines.append(f"- **{name}**: {requirement}")
222 |         else:
223 |             lines.append(f"- {biomarker}")
224 |     lines.append("")
225 | 
226 |     # Special eligibility notes
227 |     if trial.get("accepts_brain_mets"):
228 |         lines.extend([
229 |             "## Special Eligibility Notes",
230 |             "",
231 |             "- Accepts patients with brain metastases",
232 |             "",
233 |         ])
234 | 
235 |     return lines
236 | 
237 | 
238 | async def _format_organizations_section(
239 |     trial: dict[str, Any],
240 |     api_key: str | None = None,
241 | ) -> list[str]:
242 |     """Format organizations section."""
243 |     lead_org_id = trial.get("lead_org_id")
244 |     lead_org_name = trial.get("lead_org", trial.get("sponsor"))
245 | 
246 |     if not (lead_org_id or lead_org_name):
247 |         return []
248 | 
249 |     lines = ["## Organizations", "", "### Lead Organization"]
250 | 
251 |     # Try to get detailed org info if we have an ID
252 |     if lead_org_id and api_key:
253 |         try:
254 |             org_details = await get_organization(lead_org_id, api_key)
255 |             lines.append(
256 |                 f"- **Name**: {org_details.get('name', lead_org_name)}"
257 |             )
258 |             if org_details.get("type"):
259 |                 lines.append(f"- **Type**: {org_details['type']}")
260 |             if org_details.get("city") and org_details.get("state"):
261 |                 lines.append(
262 |                     f"- **Location**: {org_details['city']}, {org_details['state']}"
263 |                 )
264 |         except Exception:
265 |             lines.append(f"- **Name**: {lead_org_name}")
266 |     else:
267 |         lines.append(f"- **Name**: {lead_org_name}")
268 | 
269 |     lines.append("")
270 | 
271 |     # Collaborators
272 |     collaborators = trial.get("collaborators", [])
273 |     if collaborators:
274 |         lines.append("### Collaborating Organizations")
275 |         for collab in collaborators:
276 |             if isinstance(collab, dict):
277 |                 lines.append(f"- {collab.get('name', 'Unknown')}")
278 |             else:
279 |                 lines.append(f"- {collab}")
280 |         lines.append("")
281 | 
282 |     return lines
283 | 
284 | 
285 | def _format_locations_section(trial: dict[str, Any]) -> list[str]:
286 |     """Format locations section."""
287 |     locations = trial.get("sites", trial.get("locations", []))
288 |     if not locations:
289 |         return []
290 | 
291 |     lines = ["## Locations", ""]
292 | 
293 |     # Group by status
294 |     recruiting_sites = []
295 |     other_sites = []
296 | 
297 |     for location in locations:
298 |         if isinstance(location, dict):
299 |             status = location.get("recruitment_status", "").lower()
300 |             if "recruiting" in status:
301 |                 recruiting_sites.append(location)
302 |             else:
303 |                 other_sites.append(location)
304 |         else:
305 |             other_sites.append(location)
306 | 
307 |     if recruiting_sites:
308 |         lines.append(
309 |             f"### Currently Recruiting ({len(recruiting_sites)} sites)"
310 |         )
311 |         lines.append("")
312 |         for site in recruiting_sites[:10]:
313 |             _format_site(site, lines)
314 |         if len(recruiting_sites) > 10:
315 |             lines.append(
316 |                 f"*... and {len(recruiting_sites) - 10} more recruiting sites*"
317 |             )
318 |             lines.append("")
319 | 
320 |     if other_sites and len(other_sites) <= 5:
321 |         lines.append(f"### Other Sites ({len(other_sites)} sites)")
322 |         lines.append("")
323 |         for site in other_sites:
324 |             _format_site(site, lines)
325 | 
326 |     return lines
327 | 
328 | 
329 | def _format_contact_section(trial: dict[str, Any]) -> list[str]:
330 |     """Format contact information section."""
331 |     contact = trial.get("overall_contact")
332 |     if not contact:
333 |         return []
334 | 
335 |     lines = ["## Contact Information", ""]
336 |     if isinstance(contact, dict):
337 |         if contact.get("name"):
338 |             lines.append(f"**Name**: {contact['name']}")
339 |         if contact.get("phone"):
340 |             lines.append(f"**Phone**: {contact['phone']}")
341 |         if contact.get("email"):
342 |             lines.append(f"**Email**: {contact['email']}")
343 |     else:
344 |         lines.append(str(contact))
345 |     lines.append("")
346 |     return lines
347 | 
348 | 
349 | async def format_nci_trial_details(
350 |     trial: dict[str, Any],
351 |     api_key: str | None = None,
352 | ) -> str:
353 |     """
354 |     Format NCI trial details as comprehensive markdown.
355 | 
356 |     Args:
357 |         trial: Trial data from NCI API
358 |         api_key: Optional API key for organization lookups
359 | 
360 |     Returns:
361 |         Formatted markdown string
362 |     """
363 |     lines = []
364 | 
365 |     # Build document sections
366 |     lines.extend(_format_trial_header(trial))
367 |     lines.extend(_format_protocol_section(trial))
368 |     lines.extend(_format_summary_section(trial))
369 |     lines.extend(_format_conditions_section(trial))
370 |     lines.extend(_format_interventions_section(trial))
371 |     lines.extend(_format_eligibility_section(trial))
372 |     lines.extend(_format_biomarker_section(trial))
373 |     lines.extend(await _format_organizations_section(trial, api_key))
374 |     lines.extend(_format_locations_section(trial))
375 |     lines.extend(_format_contact_section(trial))
376 | 
377 |     # Footer
378 |     lines.extend([
379 |         "---",
380 |         "*Source: NCI Clinical Trials Search API*",
381 |     ])
382 | 
383 |     return "\n".join(lines)
384 | 
385 | 
386 | def _format_site(site: dict[str, Any], lines: list[str]) -> None:
387 |     """Helper to format a single site/location."""
388 |     if isinstance(site, dict):
389 |         name = site.get("org_name", site.get("facility", ""))
390 |         city = site.get("city", "")
391 |         state = site.get("state", "")
392 |         country = site.get("country", "")
393 | 
394 |         location_parts = [p for p in [city, state] if p]
395 |         if country and country != "United States":
396 |             location_parts.append(country)
397 | 
398 |         if name:
399 |             lines.append(f"**{name}**")
400 |         if location_parts:
401 |             lines.append(f"*{', '.join(location_parts)}*")
402 | 
403 |         # Contact info if available
404 |         if site.get("contact_name"):
405 |             lines.append(f"Contact: {site['contact_name']}")
406 |         if site.get("contact_phone"):
407 |             lines.append(f"Phone: {site['contact_phone']}")
408 | 
409 |         lines.append("")
410 |     else:
411 |         lines.append(f"- {site}")
412 |         lines.append("")
413 | 
```

--------------------------------------------------------------------------------
/tests/tdd/variants/test_external.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for external variant data sources."""
  2 | 
  3 | from unittest.mock import AsyncMock, patch
  4 | 
  5 | import pytest
  6 | 
  7 | from biomcp.variants.cbio_external_client import (
  8 |     CBioPortalExternalClient,
  9 |     CBioPortalVariantData,
 10 | )
 11 | from biomcp.variants.external import (
 12 |     EnhancedVariantAnnotation,
 13 |     ExternalVariantAggregator,
 14 |     TCGAClient,
 15 |     TCGAVariantData,
 16 |     ThousandGenomesClient,
 17 |     ThousandGenomesData,
 18 |     format_enhanced_annotations,
 19 | )
 20 | 
 21 | 
 22 | class TestTCGAClient:
 23 |     """Tests for TCGA/GDC client."""
 24 | 
 25 |     @pytest.mark.asyncio
 26 |     async def test_get_variant_data_success(self):
 27 |         """Test successful TCGA variant data retrieval."""
 28 |         client = TCGAClient()
 29 | 
 30 |         mock_response = {
 31 |             "data": {
 32 |                 "hits": [
 33 |                     {
 34 |                         "ssm_id": "test-ssm-id",
 35 |                         "cosmic_id": ["COSM476"],
 36 |                         "gene_aa_change": ["BRAF V600E"],
 37 |                         "genomic_dna_change": "chr7:g.140453136A>T",
 38 |                     }
 39 |                 ]
 40 |             }
 41 |         }
 42 | 
 43 |         mock_occ_response = {
 44 |             "data": {
 45 |                 "hits": [
 46 |                     {"case": {"project": {"project_id": "TCGA-LUAD"}}},
 47 |                     {"case": {"project": {"project_id": "TCGA-LUAD"}}},
 48 |                     {"case": {"project": {"project_id": "TCGA-LUSC"}}},
 49 |                 ]
 50 |             }
 51 |         }
 52 | 
 53 |         with patch("biomcp.http_client.request_api") as mock_request:
 54 |             # First call is for SSM search, second is for occurrences
 55 |             mock_request.side_effect = [
 56 |                 (mock_response, None),
 57 |                 (mock_occ_response, None),
 58 |             ]
 59 | 
 60 |             result = await client.get_variant_data("BRAF V600E")
 61 | 
 62 |             assert result is not None
 63 |             assert result.cosmic_id == "COSM476"
 64 |             assert "LUAD" in result.tumor_types
 65 |             assert "LUSC" in result.tumor_types
 66 |             assert result.affected_cases == 3
 67 |             assert result.consequence_type == "missense_variant"
 68 | 
 69 |     @pytest.mark.asyncio
 70 |     async def test_get_variant_data_not_found(self):
 71 |         """Test TCGA variant data when not found."""
 72 |         client = TCGAClient()
 73 | 
 74 |         mock_response = {"data": {"hits": []}}
 75 | 
 76 |         with patch("biomcp.http_client.request_api") as mock_request:
 77 |             mock_request.return_value = (mock_response, None)
 78 | 
 79 |             result = await client.get_variant_data("chr7:g.140453136A>T")
 80 | 
 81 |             assert result is None
 82 | 
 83 | 
 84 | class TestThousandGenomesClient:
 85 |     """Tests for 1000 Genomes client."""
 86 | 
 87 |     @pytest.mark.asyncio
 88 |     async def test_get_variant_data_success(self):
 89 |         """Test successful 1000 Genomes data retrieval."""
 90 |         client = ThousandGenomesClient()
 91 | 
 92 |         mock_response = {
 93 |             "populations": [
 94 |                 {"population": "1000GENOMES:phase_3:ALL", "frequency": 0.05},
 95 |                 {"population": "1000GENOMES:phase_3:EUR", "frequency": 0.08},
 96 |                 {"population": "1000GENOMES:phase_3:EAS", "frequency": 0.02},
 97 |             ],
 98 |             "mappings": [
 99 |                 {
100 |                     "transcript_consequences": [
101 |                         {"consequence_terms": ["missense_variant"]}
102 |                     ]
103 |                 }
104 |             ],
105 |             "ancestral_allele": "A",
106 |         }
107 | 
108 |         with patch("biomcp.http_client.request_api") as mock_request:
109 |             mock_request.return_value = (mock_response, None)
110 | 
111 |             result = await client.get_variant_data("rs113488022")
112 | 
113 |             assert result is not None
114 |             assert result.global_maf == 0.05
115 |             assert result.eur_maf == 0.08
116 |             assert result.eas_maf == 0.02
117 |             assert result.most_severe_consequence == "missense_variant"
118 |             assert result.ancestral_allele == "A"
119 | 
120 |     def test_extract_population_frequencies(self):
121 |         """Test population frequency extraction."""
122 |         client = ThousandGenomesClient()
123 | 
124 |         populations = [
125 |             {"population": "1000GENOMES:phase_3:ALL", "frequency": 0.05},
126 |             {"population": "1000GENOMES:phase_3:AFR", "frequency": 0.10},
127 |             {"population": "1000GENOMES:phase_3:AMR", "frequency": 0.07},
128 |             {"population": "1000GENOMES:phase_3:EAS", "frequency": 0.02},
129 |             {"population": "1000GENOMES:phase_3:EUR", "frequency": 0.08},
130 |             {"population": "1000GENOMES:phase_3:SAS", "frequency": 0.06},
131 |             {
132 |                 "population": "OTHER:population",
133 |                 "frequency": 0.99,
134 |             },  # Should be ignored
135 |         ]
136 | 
137 |         result = client._extract_population_frequencies(populations)
138 | 
139 |         assert result["global_maf"] == 0.05
140 |         assert result["afr_maf"] == 0.10
141 |         assert result["amr_maf"] == 0.07
142 |         assert result["eas_maf"] == 0.02
143 |         assert result["eur_maf"] == 0.08
144 |         assert result["sas_maf"] == 0.06
145 |         assert "OTHER" not in str(result)
146 | 
147 | 
148 | class TestCBioPortalExternalClient:
149 |     """Tests for cBioPortal client."""
150 | 
151 |     @pytest.mark.asyncio
152 |     @pytest.mark.integration
153 |     async def test_get_variant_data_success(self):
154 |         """Test successful cBioPortal variant data retrieval using real API."""
155 |         client = CBioPortalExternalClient()
156 | 
157 |         # Test with a known variant
158 |         result = await client.get_variant_data("BRAF V600E")
159 | 
160 |         assert result is not None
161 |         assert result.total_cases > 0
162 |         assert len(result.studies) > 0
163 |         assert "Missense_Mutation" in result.mutation_types
164 |         assert result.mutation_types["Missense_Mutation"] > 0
165 |         assert result.mean_vaf is not None
166 |         assert result.mean_vaf > 0.0
167 |         assert result.mean_vaf < 1.0
168 | 
169 |         # Check cancer type distribution
170 |         assert len(result.cancer_type_distribution) > 0
171 |         # BRAF V600E is common in melanoma and colorectal
172 |         cancer_types = list(result.cancer_type_distribution.keys())
173 |         assert any(
174 |             "glioma" in ct.lower()
175 |             or "lung" in ct.lower()
176 |             or "colorectal" in ct.lower()
177 |             for ct in cancer_types
178 |         )
179 | 
180 |     @pytest.mark.asyncio
181 |     @pytest.mark.integration
182 |     async def test_get_variant_data_not_found(self):
183 |         """Test cBioPortal variant data when not found using real API."""
184 |         client = CBioPortalExternalClient()
185 | 
186 |         # Test with a variant that's extremely rare or doesn't exist
187 |         result = await client.get_variant_data("BRAF X999Z")
188 | 
189 |         # Should return None for non-existent variants
190 |         assert result is None
191 | 
192 |     @pytest.mark.asyncio
193 |     @pytest.mark.integration
194 |     async def test_get_variant_data_invalid_format(self):
195 |         """Test cBioPortal with invalid gene/AA format."""
196 |         client = CBioPortalExternalClient()
197 | 
198 |         result = await client.get_variant_data("InvalidFormat")
199 | 
200 |         assert result is None
201 | 
202 |     @pytest.mark.asyncio
203 |     @pytest.mark.integration
204 |     async def test_get_variant_data_gene_not_found(self):
205 |         """Test cBioPortal when gene is not found."""
206 |         client = CBioPortalExternalClient()
207 | 
208 |         # Test with a non-existent gene
209 |         result = await client.get_variant_data("FAKEGENE123 V600E")
210 | 
211 |         assert result is None
212 | 
213 | 
214 | class TestExternalVariantAggregator:
215 |     """Tests for external variant aggregator."""
216 | 
217 |     @pytest.mark.asyncio
218 |     async def test_get_enhanced_annotations_all_sources(self):
219 |         """Test aggregating data from all sources."""
220 |         aggregator = ExternalVariantAggregator()
221 | 
222 |         # Mock all clients
223 |         mock_tcga_data = TCGAVariantData(
224 |             cosmic_id="COSM476", tumor_types=["LUAD"], affected_cases=10
225 |         )
226 | 
227 |         mock_1000g_data = ThousandGenomesData(global_maf=0.05, eur_maf=0.08)
228 | 
229 |         mock_cbio_data = CBioPortalVariantData(
230 |             total_cases=42, studies=["tcga_pan_can_atlas_2018"]
231 |         )
232 | 
233 |         aggregator.tcga_client.get_variant_data = AsyncMock(
234 |             return_value=mock_tcga_data
235 |         )
236 |         aggregator.thousand_genomes_client.get_variant_data = AsyncMock(
237 |             return_value=mock_1000g_data
238 |         )
239 |         aggregator.cbioportal_client.get_variant_data = AsyncMock(
240 |             return_value=mock_cbio_data
241 |         )
242 | 
243 |         # Mock variant data to extract gene/AA change
244 |         variant_data = {
245 |             "cadd": {"gene": {"genename": "BRAF"}},
246 |             "docm": {"aa_change": "p.V600E"},
247 |         }
248 | 
249 |         result = await aggregator.get_enhanced_annotations(
250 |             "chr7:g.140453136A>T", variant_data=variant_data
251 |         )
252 | 
253 |         assert result.variant_id == "chr7:g.140453136A>T"
254 |         assert result.tcga is not None
255 |         assert result.tcga.cosmic_id == "COSM476"
256 |         assert result.thousand_genomes is not None
257 |         assert result.thousand_genomes.global_maf == 0.05
258 |         assert result.cbioportal is not None
259 |         assert result.cbioportal.total_cases == 42
260 |         assert "tcga_pan_can_atlas_2018" in result.cbioportal.studies
261 | 
262 |     @pytest.mark.asyncio
263 |     async def test_get_enhanced_annotations_with_errors(self):
264 |         """Test aggregation when some sources fail."""
265 |         aggregator = ExternalVariantAggregator()
266 | 
267 |         # Mock TCGA to succeed
268 |         mock_tcga_data = TCGAVariantData(cosmic_id="COSM476")
269 |         aggregator.tcga_client.get_variant_data = AsyncMock(
270 |             return_value=mock_tcga_data
271 |         )
272 | 
273 |         # Mock 1000G to fail
274 |         aggregator.thousand_genomes_client.get_variant_data = AsyncMock(
275 |             side_effect=Exception("Network error")
276 |         )
277 | 
278 |         result = await aggregator.get_enhanced_annotations(
279 |             "chr7:g.140453136A>T", include_tcga=True, include_1000g=True
280 |         )
281 | 
282 |         assert result.tcga is not None
283 |         assert result.thousand_genomes is None
284 |         assert "thousand_genomes" in result.error_sources
285 | 
286 | 
287 | class TestFormatEnhancedAnnotations:
288 |     """Tests for formatting enhanced annotations."""
289 | 
290 |     def test_format_all_annotations(self):
291 |         """Test formatting when all annotations are present."""
292 |         annotation = EnhancedVariantAnnotation(
293 |             variant_id="chr7:g.140453136A>T",
294 |             tcga=TCGAVariantData(
295 |                 cosmic_id="COSM476",
296 |                 tumor_types=["LUAD", "LUSC"],
297 |                 affected_cases=10,
298 |             ),
299 |             thousand_genomes=ThousandGenomesData(
300 |                 global_maf=0.05, eur_maf=0.08, ancestral_allele="A"
301 |             ),
302 |             cbioportal=CBioPortalVariantData(
303 |                 total_cases=42,
304 |                 studies=["tcga_pan_can_atlas_2018", "msk_impact_2017"],
305 |                 cancer_type_distribution={
306 |                     "Melanoma": 30,
307 |                     "Thyroid Cancer": 12,
308 |                 },
309 |                 mutation_types={
310 |                     "Missense_Mutation": 40,
311 |                     "Nonsense_Mutation": 2,
312 |                 },
313 |                 hotspot_count=35,
314 |                 mean_vaf=0.285,
315 |                 sample_types={"Primary": 25, "Metastatic": 17},
316 |             ),
317 |         )
318 | 
319 |         result = format_enhanced_annotations(annotation)
320 | 
321 |         assert result["variant_id"] == "chr7:g.140453136A>T"
322 |         assert "tcga" in result["external_annotations"]
323 |         assert result["external_annotations"]["tcga"]["cosmic_id"] == "COSM476"
324 |         assert "1000_genomes" in result["external_annotations"]
325 |         assert (
326 |             result["external_annotations"]["1000_genomes"]["global_maf"]
327 |             == 0.05
328 |         )
329 |         assert "cbioportal" in result["external_annotations"]
330 |         cbio = result["external_annotations"]["cbioportal"]
331 |         assert cbio["total_cases"] == 42
332 |         assert "tcga_pan_can_atlas_2018" in cbio["studies"]
333 |         assert cbio["cancer_types"]["Melanoma"] == 30
334 |         assert cbio["mutation_types"]["Missense_Mutation"] == 40
335 |         assert cbio["hotspot_samples"] == 35
336 |         assert cbio["mean_vaf"] == 0.285
337 |         assert cbio["sample_types"]["Primary"] == 25
338 | 
339 |     def test_format_partial_annotations(self):
340 |         """Test formatting when only some annotations are present."""
341 |         annotation = EnhancedVariantAnnotation(
342 |             variant_id="chr7:g.140453136A>T",
343 |             tcga=TCGAVariantData(cosmic_id="COSM476"),
344 |             error_sources=["thousand_genomes"],
345 |         )
346 | 
347 |         result = format_enhanced_annotations(annotation)
348 | 
349 |         assert "tcga" in result["external_annotations"]
350 |         assert "1000_genomes" not in result["external_annotations"]
351 |         assert "errors" in result["external_annotations"]
352 |         assert "thousand_genomes" in result["external_annotations"]["errors"]
353 | 
```

--------------------------------------------------------------------------------
/src/biomcp/cli/trials.py:
--------------------------------------------------------------------------------

```python
  1 | """BioMCP Command Line Interface for clinical trials."""
  2 | 
  3 | import asyncio
  4 | from typing import Annotated
  5 | 
  6 | import typer
  7 | 
  8 | from ..trials.getter import Module
  9 | from ..trials.search import (
 10 |     AgeGroup,
 11 |     DateField,
 12 |     InterventionType,
 13 |     LineOfTherapy,
 14 |     PrimaryPurpose,
 15 |     RecruitingStatus,
 16 |     SortOrder,
 17 |     SponsorType,
 18 |     StudyDesign,
 19 |     StudyType,
 20 |     TrialPhase,
 21 |     TrialQuery,
 22 | )
 23 | 
 24 | trial_app = typer.Typer(help="Clinical trial operations")
 25 | 
 26 | 
 27 | @trial_app.command("get")
 28 | def get_trial_cli(
 29 |     nct_id: str,
 30 |     module: Annotated[
 31 |         Module | None,
 32 |         typer.Argument(
 33 |             help="Module to retrieve: Protocol, Locations, References, or Outcomes",
 34 |             show_choices=True,
 35 |             show_default=True,
 36 |             case_sensitive=False,
 37 |         ),
 38 |     ] = Module.PROTOCOL,
 39 |     output_json: Annotated[
 40 |         bool,
 41 |         typer.Option(
 42 |             "--json",
 43 |             "-j",
 44 |             help="Render in JSON format",
 45 |             case_sensitive=False,
 46 |         ),
 47 |     ] = False,
 48 |     source: Annotated[
 49 |         str,
 50 |         typer.Option(
 51 |             "--source",
 52 |             help="Data source: 'clinicaltrials' (default) or 'nci'",
 53 |             show_choices=True,
 54 |         ),
 55 |     ] = "clinicaltrials",
 56 |     api_key: Annotated[
 57 |         str | None,
 58 |         typer.Option(
 59 |             "--api-key",
 60 |             help="NCI API key (required if source='nci', overrides NCI_API_KEY env var)",
 61 |             envvar="NCI_API_KEY",
 62 |         ),
 63 |     ] = None,
 64 | ):
 65 |     """Get trial information by NCT ID from ClinicalTrials.gov or NCI CTS API."""
 66 |     # Import here to avoid circular imports
 67 |     from ..trials.getter import get_trial_unified
 68 | 
 69 |     # Check if NCI source requires API key
 70 |     if source == "nci" and not api_key:
 71 |         from ..integrations.cts_api import get_api_key_instructions
 72 | 
 73 |         typer.echo(get_api_key_instructions())
 74 |         raise typer.Exit(1)
 75 | 
 76 |     # For ClinicalTrials.gov, use the direct get_trial function when JSON is requested
 77 |     if source == "clinicaltrials" and output_json:
 78 |         from ..trials.getter import get_trial
 79 | 
 80 |         if module is None:
 81 |             result = asyncio.run(get_trial(nct_id, output_json=True))
 82 |         else:
 83 |             result = asyncio.run(
 84 |                 get_trial(nct_id, module=module, output_json=True)
 85 |             )
 86 |         typer.echo(result)
 87 |     else:
 88 |         # Map module to sections for unified getter
 89 |         sections = None
 90 |         if source == "clinicaltrials" and module:
 91 |             sections = (
 92 |                 ["all"] if module == Module.ALL else [module.value.lower()]
 93 |             )
 94 | 
 95 |         result = asyncio.run(
 96 |             get_trial_unified(
 97 |                 nct_id, source=source, api_key=api_key, sections=sections
 98 |             )
 99 |         )
100 |         typer.echo(result)
101 | 
102 | 
103 | @trial_app.command("search")
104 | def search_trials_cli(
105 |     condition: Annotated[
106 |         list[str] | None,
107 |         typer.Option(
108 |             "--condition",
109 |             "-c",
110 |             help="Medical condition to search for (can specify multiple)",
111 |         ),
112 |     ] = None,
113 |     intervention: Annotated[
114 |         list[str] | None,
115 |         typer.Option(
116 |             "--intervention",
117 |             "-i",
118 |             help="Treatment or intervention to search for (can specify multiple)",
119 |             show_choices=True,
120 |             show_default=True,
121 |             case_sensitive=False,
122 |         ),
123 |     ] = None,
124 |     term: Annotated[
125 |         list[str] | None,
126 |         typer.Option(
127 |             "--term",
128 |             "-t",
129 |             help="General search terms (can specify multiple)",
130 |             show_choices=True,
131 |             show_default=True,
132 |             case_sensitive=False,
133 |         ),
134 |     ] = None,
135 |     nct_id: Annotated[
136 |         list[str] | None,
137 |         typer.Option(
138 |             "--nct-id",
139 |             "-n",
140 |             help="Clinical trial NCT ID (can specify multiple)",
141 |             show_choices=True,
142 |             show_default=True,
143 |             case_sensitive=False,
144 |         ),
145 |     ] = None,
146 |     recruiting_status: Annotated[
147 |         RecruitingStatus | None,
148 |         typer.Option(
149 |             "--status",
150 |             "-s",
151 |             help="Recruiting status.",
152 |             show_choices=True,
153 |             show_default=True,
154 |             case_sensitive=False,
155 |         ),
156 |     ] = None,
157 |     study_type: Annotated[
158 |         StudyType | None,
159 |         typer.Option(
160 |             "--type",
161 |             help="Study type",
162 |             show_choices=True,
163 |             show_default=True,
164 |             case_sensitive=False,
165 |         ),
166 |     ] = None,
167 |     phase: Annotated[
168 |         TrialPhase | None,
169 |         typer.Option(
170 |             "--phase",
171 |             "-p",
172 |             help="Trial phase",
173 |             show_choices=True,
174 |             show_default=True,
175 |             case_sensitive=False,
176 |         ),
177 |     ] = None,
178 |     sort_order: Annotated[
179 |         SortOrder | None,
180 |         typer.Option(
181 |             "--sort",
182 |             help="Sort order",
183 |             show_choices=True,
184 |             show_default=True,
185 |             case_sensitive=False,
186 |         ),
187 |     ] = None,
188 |     age_group: Annotated[
189 |         AgeGroup | None,
190 |         typer.Option(
191 |             "--age-group",
192 |             "-a",
193 |             help="Age group filter",
194 |             show_choices=True,
195 |             show_default=True,
196 |             case_sensitive=False,
197 |         ),
198 |     ] = None,
199 |     primary_purpose: Annotated[
200 |         PrimaryPurpose | None,
201 |         typer.Option(
202 |             "--purpose",
203 |             help="Primary purpose filter",
204 |             show_choices=True,
205 |             show_default=True,
206 |             case_sensitive=False,
207 |         ),
208 |     ] = None,
209 |     min_date: Annotated[
210 |         str | None,
211 |         typer.Option(
212 |             "--min-date",
213 |             help="Minimum date for filtering (YYYY-MM-DD format)",
214 |         ),
215 |     ] = None,
216 |     max_date: Annotated[
217 |         str | None,
218 |         typer.Option(
219 |             "--max-date",
220 |             help="Maximum date for filtering (YYYY-MM-DD format)",
221 |         ),
222 |     ] = None,
223 |     date_field: Annotated[
224 |         DateField | None,
225 |         typer.Option(
226 |             "--date-field",
227 |             help="Date field to filter",
228 |             show_choices=True,
229 |             show_default=True,
230 |             case_sensitive=False,
231 |         ),
232 |     ] = DateField.STUDY_START,
233 |     intervention_type: Annotated[
234 |         InterventionType | None,
235 |         typer.Option(
236 |             "--intervention-type",
237 |             help="Intervention type filter",
238 |             show_choices=True,
239 |             show_default=True,
240 |             case_sensitive=False,
241 |         ),
242 |     ] = None,
243 |     sponsor_type: Annotated[
244 |         SponsorType | None,
245 |         typer.Option(
246 |             "--sponsor-type",
247 |             help="Sponsor type filter",
248 |             show_choices=True,
249 |             show_default=True,
250 |             case_sensitive=False,
251 |         ),
252 |     ] = None,
253 |     study_design: Annotated[
254 |         StudyDesign | None,
255 |         typer.Option(
256 |             "--study-design",
257 |             help="Study design filter",
258 |             show_choices=True,
259 |             show_default=True,
260 |             case_sensitive=False,
261 |         ),
262 |     ] = None,
263 |     next_page_hash: Annotated[
264 |         str | None,
265 |         typer.Option(
266 |             "--next-page",
267 |             help="Next page hash for pagination",
268 |         ),
269 |     ] = None,
270 |     latitude: Annotated[
271 |         float | None,
272 |         typer.Option(
273 |             "--lat",
274 |             help="Latitude for location-based search. For city names, geocode first (e.g., Cleveland: 41.4993)",
275 |         ),
276 |     ] = None,
277 |     longitude: Annotated[
278 |         float | None,
279 |         typer.Option(
280 |             "--lon",
281 |             help="Longitude for location-based search. For city names, geocode first (e.g., Cleveland: -81.6944)",
282 |         ),
283 |     ] = None,
284 |     distance: Annotated[
285 |         int | None,
286 |         typer.Option(
287 |             "--distance",
288 |             "-d",
289 |             help="Distance in miles for location-based search (default: 50 miles if lat/lon provided)",
290 |         ),
291 |     ] = None,
292 |     output_json: Annotated[
293 |         bool,
294 |         typer.Option(
295 |             "--json",
296 |             "-j",
297 |             help="Render in JSON format",
298 |             case_sensitive=False,
299 |         ),
300 |     ] = False,
301 |     prior_therapy: Annotated[
302 |         list[str] | None,
303 |         typer.Option(
304 |             "--prior-therapy",
305 |             help="Prior therapies to search for in eligibility criteria (can specify multiple)",
306 |         ),
307 |     ] = None,
308 |     progression_on: Annotated[
309 |         list[str] | None,
310 |         typer.Option(
311 |             "--progression-on",
312 |             help="Therapies the patient has progressed on (can specify multiple)",
313 |         ),
314 |     ] = None,
315 |     required_mutation: Annotated[
316 |         list[str] | None,
317 |         typer.Option(
318 |             "--required-mutation",
319 |             help="Required mutations in eligibility criteria (can specify multiple)",
320 |         ),
321 |     ] = None,
322 |     excluded_mutation: Annotated[
323 |         list[str] | None,
324 |         typer.Option(
325 |             "--excluded-mutation",
326 |             help="Excluded mutations in eligibility criteria (can specify multiple)",
327 |         ),
328 |     ] = None,
329 |     biomarker: Annotated[
330 |         list[str] | None,
331 |         typer.Option(
332 |             "--biomarker",
333 |             help="Biomarker expression requirements in format 'MARKER:EXPRESSION' (e.g., 'PD-L1:≥50%')",
334 |         ),
335 |     ] = None,
336 |     line_of_therapy: Annotated[
337 |         LineOfTherapy | None,
338 |         typer.Option(
339 |             "--line-of-therapy",
340 |             help="Line of therapy filter",
341 |             show_choices=True,
342 |             show_default=True,
343 |             case_sensitive=False,
344 |         ),
345 |     ] = None,
346 |     allow_brain_mets: Annotated[
347 |         bool | None,
348 |         typer.Option(
349 |             "--allow-brain-mets/--no-brain-mets",
350 |             help="Whether to allow trials that accept brain metastases",
351 |         ),
352 |     ] = None,
353 |     return_field: Annotated[
354 |         list[str] | None,
355 |         typer.Option(
356 |             "--return-field",
357 |             help="Specific fields to return in the response (can specify multiple)",
358 |         ),
359 |     ] = None,
360 |     page_size: Annotated[
361 |         int | None,
362 |         typer.Option(
363 |             "--page-size",
364 |             help="Number of results per page (1-1000)",
365 |             min=1,
366 |             max=1000,
367 |         ),
368 |     ] = None,
369 |     source: Annotated[
370 |         str,
371 |         typer.Option(
372 |             "--source",
373 |             help="Data source: 'clinicaltrials' (default) or 'nci'",
374 |             show_choices=True,
375 |         ),
376 |     ] = "clinicaltrials",
377 |     api_key: Annotated[
378 |         str | None,
379 |         typer.Option(
380 |             "--api-key",
381 |             help="NCI API key (required if source='nci', overrides NCI_API_KEY env var)",
382 |             envvar="NCI_API_KEY",
383 |         ),
384 |     ] = None,
385 | ):
386 |     """Search for clinical trials from ClinicalTrials.gov or NCI CTS API."""
387 |     # Parse biomarker expression from CLI format
388 |     biomarker_expression = None
389 |     if biomarker:
390 |         biomarker_expression = {}
391 |         for item in biomarker:
392 |             if ":" in item:
393 |                 marker, expr = item.split(":", 1)
394 |                 biomarker_expression[marker] = expr
395 | 
396 |     query = TrialQuery(
397 |         conditions=condition,
398 |         interventions=intervention,
399 |         terms=term,
400 |         nct_ids=nct_id,
401 |         recruiting_status=recruiting_status,
402 |         study_type=study_type,
403 |         phase=phase,
404 |         sort=sort_order,
405 |         age_group=age_group,
406 |         primary_purpose=primary_purpose,
407 |         min_date=min_date,
408 |         max_date=max_date,
409 |         date_field=date_field,
410 |         intervention_type=intervention_type,
411 |         sponsor_type=sponsor_type,
412 |         study_design=study_design,
413 |         next_page_hash=next_page_hash,
414 |         lat=latitude,
415 |         long=longitude,
416 |         distance=distance,
417 |         prior_therapies=prior_therapy,
418 |         progression_on=progression_on,
419 |         required_mutations=required_mutation,
420 |         excluded_mutations=excluded_mutation,
421 |         biomarker_expression=biomarker_expression,
422 |         line_of_therapy=line_of_therapy,
423 |         allow_brain_mets=allow_brain_mets,
424 |         return_fields=return_field,
425 |         page_size=page_size,
426 |     )
427 | 
428 |     # Import here to avoid circular imports
429 |     from ..trials.search import search_trials_unified
430 | 
431 |     # Check if NCI source requires API key
432 |     if source == "nci" and not api_key:
433 |         from ..integrations.cts_api import get_api_key_instructions
434 | 
435 |         typer.echo(get_api_key_instructions())
436 |         raise typer.Exit(1)
437 | 
438 |     result = asyncio.run(
439 |         search_trials_unified(
440 |             query, source=source, api_key=api_key, output_json=output_json
441 |         )
442 |     )
443 |     typer.echo(result)
444 | 
```

--------------------------------------------------------------------------------
/tests/tdd/openfda/test_drug_approvals.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for FDA drug approval search and retrieval."""
  2 | 
  3 | from unittest.mock import patch
  4 | 
  5 | import pytest
  6 | 
  7 | from biomcp.openfda.drug_approvals import (
  8 |     get_drug_approval,
  9 |     search_drug_approvals,
 10 | )
 11 | 
 12 | 
 13 | class TestDrugApprovals:
 14 |     """Test FDA drug approval functions."""
 15 | 
 16 |     @pytest.mark.asyncio
 17 |     async def test_search_drug_approvals_success(self):
 18 |         """Test successful drug approval search."""
 19 |         mock_response = {
 20 |             "meta": {"results": {"skip": 0, "limit": 10, "total": 2}},
 21 |             "results": [
 22 |                 {
 23 |                     "application_number": "BLA125514",
 24 |                     "openfda": {
 25 |                         "brand_name": ["KEYTRUDA"],
 26 |                         "generic_name": ["PEMBROLIZUMAB"],
 27 |                     },
 28 |                     "products": [
 29 |                         {
 30 |                             "brand_name": "KEYTRUDA",
 31 |                             "dosage_form": "INJECTION",
 32 |                             "strength": "100MG/4ML",
 33 |                             "marketing_status": "Prescription",
 34 |                         }
 35 |                     ],
 36 |                     "sponsor_name": "MERCK SHARP DOHME",
 37 |                     "submissions": [
 38 |                         {
 39 |                             "submission_type": "ORIG",
 40 |                             "submission_number": "1",
 41 |                             "submission_status": "AP",
 42 |                             "submission_status_date": "20140904",
 43 |                             "review_priority": "PRIORITY",
 44 |                         }
 45 |                     ],
 46 |                 },
 47 |                 {
 48 |                     "application_number": "NDA208716",
 49 |                     "openfda": {
 50 |                         "brand_name": ["VENCLEXTA"],
 51 |                         "generic_name": ["VENETOCLAX"],
 52 |                     },
 53 |                     "products": [
 54 |                         {
 55 |                             "brand_name": "VENCLEXTA",
 56 |                             "dosage_form": "TABLET",
 57 |                             "strength": "100MG",
 58 |                             "marketing_status": "Prescription",
 59 |                         }
 60 |                     ],
 61 |                     "sponsor_name": "ABBVIE INC",
 62 |                     "submissions": [
 63 |                         {
 64 |                             "submission_type": "ORIG",
 65 |                             "submission_number": "1",
 66 |                             "submission_status": "AP",
 67 |                             "submission_status_date": "20160411",
 68 |                             "review_priority": "PRIORITY",
 69 |                         }
 70 |                     ],
 71 |                 },
 72 |             ],
 73 |         }
 74 | 
 75 |         with patch(
 76 |             "biomcp.openfda.drug_approvals.make_openfda_request"
 77 |         ) as mock_request:
 78 |             mock_request.return_value = (mock_response, None)
 79 | 
 80 |             result = await search_drug_approvals(
 81 |                 drug="pembrolizumab", limit=10
 82 |             )
 83 | 
 84 |             # Check that result contains expected drug names
 85 |             assert "KEYTRUDA" in result
 86 |             assert "PEMBROLIZUMAB" in result
 87 |             assert "BLA125514" in result
 88 |             assert "MERCK" in result
 89 | 
 90 |             # Check for disclaimer
 91 |             assert "FDA Data Notice" in result
 92 | 
 93 |             # Check summary statistics
 94 |             assert "Total Records Found**: 2 records" in result
 95 | 
 96 |     @pytest.mark.asyncio
 97 |     async def test_search_drug_approvals_no_results(self):
 98 |         """Test drug approval search with no results."""
 99 |         mock_response = {
100 |             "meta": {"results": {"skip": 0, "limit": 10, "total": 0}},
101 |             "results": [],
102 |         }
103 | 
104 |         with patch(
105 |             "biomcp.openfda.drug_approvals.make_openfda_request"
106 |         ) as mock_request:
107 |             mock_request.return_value = (mock_response, None)
108 | 
109 |             result = await search_drug_approvals(
110 |                 drug="nonexistentdrug123", limit=10
111 |             )
112 | 
113 |             assert "No drug approval records found" in result
114 | 
115 |     @pytest.mark.asyncio
116 |     async def test_search_drug_approvals_api_error(self):
117 |         """Test drug approval search with API error."""
118 |         with patch(
119 |             "biomcp.openfda.drug_approvals.make_openfda_request"
120 |         ) as mock_request:
121 |             mock_request.return_value = (None, "API rate limit exceeded")
122 | 
123 |             result = await search_drug_approvals(drug="pembrolizumab")
124 | 
125 |             assert "Error searching drug approvals" in result
126 |             assert "API rate limit exceeded" in result
127 | 
128 |     @pytest.mark.asyncio
129 |     async def test_get_drug_approval_success(self):
130 |         """Test successful retrieval of specific drug approval."""
131 |         mock_response = {
132 |             "results": [
133 |                 {
134 |                     "application_number": "BLA125514",
135 |                     "openfda": {
136 |                         "brand_name": ["KEYTRUDA"],
137 |                         "generic_name": ["PEMBROLIZUMAB"],
138 |                         "manufacturer_name": ["MERCK SHARP & DOHME CORP."],
139 |                         "substance_name": ["PEMBROLIZUMAB"],
140 |                         "product_type": ["HUMAN PRESCRIPTION DRUG"],
141 |                     },
142 |                     "sponsor_name": "MERCK SHARP DOHME",
143 |                     "products": [
144 |                         {
145 |                             "product_number": "001",
146 |                             "brand_name": "KEYTRUDA",
147 |                             "dosage_form": "INJECTION",
148 |                             "strength": "100MG/4ML",
149 |                             "marketing_status": "Prescription",
150 |                             "te_code": "AB",
151 |                         }
152 |                     ],
153 |                     "submissions": [
154 |                         {
155 |                             "submission_type": "ORIG",
156 |                             "submission_number": "1",
157 |                             "submission_status": "AP",
158 |                             "submission_status_date": "20140904",
159 |                             "submission_class_code": "N",
160 |                             "review_priority": "PRIORITY",
161 |                             "submission_public_notes": "APPROVAL FOR ADVANCED MELANOMA",
162 |                         },
163 |                         {
164 |                             "submission_type": "SUPPL",
165 |                             "submission_number": "2",
166 |                             "submission_status": "AP",
167 |                             "submission_status_date": "20151002",
168 |                             "submission_class_code": "S",
169 |                             "review_priority": "PRIORITY",
170 |                             "submission_public_notes": "NSCLC INDICATION",
171 |                         },
172 |                     ],
173 |                 }
174 |             ]
175 |         }
176 | 
177 |         with patch(
178 |             "biomcp.openfda.drug_approvals.make_openfda_request"
179 |         ) as mock_request:
180 |             mock_request.return_value = (mock_response, None)
181 | 
182 |             result = await get_drug_approval("BLA125514")
183 | 
184 |             # Check basic information
185 |             assert "BLA125514" in result
186 |             assert "KEYTRUDA" in result
187 |             assert "PEMBROLIZUMAB" in result
188 |             assert "MERCK" in result
189 | 
190 |             # Check product details
191 |             assert "100MG/4ML" in result
192 |             assert "INJECTION" in result
193 | 
194 |             # Check submission history
195 |             assert "20140904" in result  # Submission date
196 |             assert "20151002" in result  # Second submission date
197 |             assert "PRIORITY" in result
198 | 
199 |             # Check disclaimer
200 |             assert "FDA Data Notice" in result
201 | 
202 |     @pytest.mark.asyncio
203 |     async def test_get_drug_approval_not_found(self):
204 |         """Test retrieval of non-existent drug approval."""
205 |         mock_response = {"results": []}
206 | 
207 |         with patch(
208 |             "biomcp.openfda.drug_approvals.make_openfda_request"
209 |         ) as mock_request:
210 |             mock_request.return_value = (mock_response, None)
211 | 
212 |             result = await get_drug_approval("INVALID123")
213 | 
214 |             assert "No approval record found" in result
215 |             assert "INVALID123" in result
216 | 
217 |     @pytest.mark.asyncio
218 |     async def test_search_with_application_type_filter(self):
219 |         """Test drug approval search with application type filter."""
220 |         mock_response = {
221 |             "meta": {"results": {"skip": 0, "limit": 10, "total": 5}},
222 |             "results": [
223 |                 {
224 |                     "application_number": "BLA125514",
225 |                     "openfda": {
226 |                         "brand_name": ["KEYTRUDA"],
227 |                         "generic_name": ["PEMBROLIZUMAB"],
228 |                     },
229 |                     "sponsor_name": "MERCK SHARP DOHME",
230 |                     "submissions": [
231 |                         {
232 |                             "submission_type": "ORIG",
233 |                             "submission_status": "AP",
234 |                             "submission_status_date": "20140904",
235 |                         }
236 |                     ],
237 |                 }
238 |             ]
239 |             * 5,  # Simulate 5 BLA results
240 |         }
241 | 
242 |         with patch(
243 |             "biomcp.openfda.drug_approvals.make_openfda_request"
244 |         ) as mock_request:
245 |             mock_request.return_value = (mock_response, None)
246 | 
247 |             # Test with a specific application number pattern
248 |             result = await search_drug_approvals(
249 |                 application_number="BLA125514", limit=10
250 |             )
251 | 
252 |             # Just check that results are returned
253 |             assert "Total Records Found**: 5 records" in result
254 |             assert "BLA125514" in result
255 | 
256 |     @pytest.mark.asyncio
257 |     async def test_search_with_sponsor_filter(self):
258 |         """Test drug approval search with sponsor filter."""
259 |         mock_response = {
260 |             "meta": {"results": {"skip": 0, "limit": 10, "total": 3}},
261 |             "results": [
262 |                 {
263 |                     "application_number": "NDA123456",
264 |                     "sponsor_name": "PFIZER INC",
265 |                     "openfda": {"brand_name": ["DRUG1"]},
266 |                 },
267 |                 {
268 |                     "application_number": "NDA789012",
269 |                     "sponsor_name": "PFIZER INC",
270 |                     "openfda": {"brand_name": ["DRUG2"]},
271 |                 },
272 |             ],
273 |         }
274 | 
275 |         with patch(
276 |             "biomcp.openfda.drug_approvals.make_openfda_request"
277 |         ) as mock_request:
278 |             mock_request.return_value = (mock_response, None)
279 | 
280 |             # Test with a drug name instead of sponsor
281 |             result = await search_drug_approvals(
282 |                 drug="pembrolizumab", limit=10
283 |             )
284 | 
285 |             # Just check that results are returned
286 |             assert "PFIZER INC" in result
287 |             assert "Total Records Found**: 3 records" in result
288 | 
289 |     def test_validate_approval_response(self):
290 |         """Test validation of drug approval response structure."""
291 |         from biomcp.openfda.validation import validate_fda_response
292 | 
293 |         # Valid response
294 |         valid_response = {
295 |             "results": [
296 |                 {"application_number": "BLA125514", "sponsor_name": "MERCK"}
297 |             ]
298 |         }
299 | 
300 |         assert validate_fda_response(valid_response) is True
301 | 
302 |         # Invalid response (not a dict)
303 |         from biomcp.openfda.exceptions import OpenFDAValidationError
304 | 
305 |         with pytest.raises(OpenFDAValidationError):
306 |             validate_fda_response("not a dict")
307 | 
308 |         # Response missing results
309 |         empty_response = {}
310 |         assert (
311 |             validate_fda_response(empty_response) is True
312 |         )  # Should handle gracefully
313 | 
314 |     @pytest.mark.asyncio
315 |     async def test_rate_limit_handling(self):
316 |         """Test handling of FDA API rate limits."""
317 |         with patch(
318 |             "biomcp.openfda.drug_approvals.make_openfda_request"
319 |         ) as mock_request:
320 |             # First call returns rate limit error
321 |             mock_request.side_effect = [
322 |                 (None, "429 Too Many Requests"),
323 |                 (
324 |                     {  # Second call succeeds after retry
325 |                         "meta": {"results": {"total": 1}},
326 |                         "results": [{"application_number": "NDA123456"}],
327 |                     },
328 |                     None,
329 |                 ),
330 |             ]
331 | 
332 |             result = await search_drug_approvals(drug="test")
333 | 
334 |             # Should retry and eventually succeed
335 |             assert mock_request.call_count >= 1
336 |             # Result should be from successful retry
337 |             if "NDA123456" in result:
338 |                 assert "NDA123456" in result
339 |             else:
340 |                 # Or should show rate limit error if retries exhausted
341 |                 assert "429" in result.lower() or "too many" in result.lower()
342 | 
```

--------------------------------------------------------------------------------
/src/biomcp/variants/cbioportal_mutations.py:
--------------------------------------------------------------------------------

```python
  1 | """cBioPortal mutation-specific search functionality."""
  2 | 
  3 | import logging
  4 | from collections import Counter, defaultdict
  5 | from typing import Any, cast
  6 | 
  7 | from pydantic import BaseModel, Field
  8 | 
  9 | from ..utils.cancer_types_api import get_cancer_type_client
 10 | from ..utils.cbio_http_adapter import CBioHTTPAdapter
 11 | from ..utils.gene_validator import is_valid_gene_symbol, sanitize_gene_symbol
 12 | from ..utils.metrics import track_api_call
 13 | from ..utils.mutation_filter import MutationFilter
 14 | from ..utils.request_cache import request_cache
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | class MutationHit(BaseModel):
 20 |     """A specific mutation occurrence in a study."""
 21 | 
 22 |     study_id: str
 23 |     molecular_profile_id: str
 24 |     protein_change: str
 25 |     mutation_type: str
 26 |     start_position: int | None = None
 27 |     end_position: int | None = None
 28 |     reference_allele: str | None = None
 29 |     variant_allele: str | None = None
 30 |     sample_id: str | None = None
 31 | 
 32 | 
 33 | class StudyMutationSummary(BaseModel):
 34 |     """Summary of mutations in a specific study."""
 35 | 
 36 |     study_id: str
 37 |     study_name: str
 38 |     cancer_type: str
 39 |     mutation_count: int
 40 |     sample_count: int = 0
 41 |     mutations: list[str] = Field(default_factory=list)
 42 | 
 43 | 
 44 | class MutationSearchResult(BaseModel):
 45 |     """Result of a mutation-specific search."""
 46 | 
 47 |     gene: str
 48 |     specific_mutation: str | None = None
 49 |     pattern: str | None = None
 50 |     total_studies: int = 0
 51 |     studies_with_mutation: int = 0
 52 |     total_mutations: int = 0
 53 |     top_studies: list[StudyMutationSummary] = Field(default_factory=list)
 54 |     mutation_types: dict[str, int] = Field(default_factory=dict)
 55 | 
 56 | 
 57 | class CBioPortalMutationClient:
 58 |     """Client for mutation-specific searches in cBioPortal."""
 59 | 
 60 |     def __init__(self):
 61 |         """Initialize the mutation search client."""
 62 |         self.http_adapter = CBioHTTPAdapter()
 63 | 
 64 |     async def __aenter__(self):
 65 |         """Async context manager entry."""
 66 |         return self
 67 | 
 68 |     async def __aexit__(self, exc_type, exc_val, exc_tb):
 69 |         """Async context manager exit."""
 70 |         pass  # No cleanup needed with centralized client
 71 | 
 72 |     @request_cache(ttl=1800)  # Cache for 30 minutes
 73 |     @track_api_call("cbioportal_mutation_search")
 74 |     async def search_specific_mutation(
 75 |         self,
 76 |         gene: str,
 77 |         mutation: str | None = None,
 78 |         pattern: str | None = None,
 79 |         max_studies: int = 20,
 80 |     ) -> MutationSearchResult | None:
 81 |         """Search for specific mutations across all cBioPortal studies.
 82 | 
 83 |         Args:
 84 |             gene: Gene symbol (e.g., "SRSF2")
 85 |             mutation: Specific mutation (e.g., "F57Y")
 86 |             pattern: Pattern to match (e.g., "F57" for F57*)
 87 |             max_studies: Maximum number of top studies to return
 88 | 
 89 |         Returns:
 90 |             Detailed mutation search results or None if not found
 91 |         """
 92 |         # Validate gene
 93 |         if not is_valid_gene_symbol(gene):
 94 |             logger.warning(f"Invalid gene symbol: {gene}")
 95 |             return None
 96 | 
 97 |         gene = sanitize_gene_symbol(gene)
 98 | 
 99 |         try:
100 |             return await self._search_mutations_with_adapter(
101 |                 gene, mutation, pattern, max_studies
102 |             )
103 |         except TimeoutError:
104 |             logger.error(f"Timeout searching mutations for {gene}")
105 |             return None
106 |         except Exception as e:
107 |             logger.error(f"Error searching mutations for {gene}: {e}")
108 |             return None
109 | 
110 |     async def _search_mutations_with_adapter(
111 |         self,
112 |         gene: str,
113 |         mutation: str | None,
114 |         pattern: str | None,
115 |         max_studies: int,
116 |     ) -> MutationSearchResult | None:
117 |         """Perform the actual mutation search with the adapter."""
118 |         # Get gene info
119 |         gene_data, error = await self.http_adapter.get(
120 |             f"/genes/{gene}", endpoint_key="cbioportal_genes"
121 |         )
122 | 
123 |         if error or not gene_data:
124 |             logger.warning(f"Gene {gene} not found in cBioPortal")
125 |             return None
126 | 
127 |         entrez_id = gene_data.get("entrezGeneId")
128 | 
129 |         if not entrez_id:
130 |             logger.warning(f"No Entrez ID found for gene {gene}")
131 |             return None
132 | 
133 |         # Get all mutation profiles
134 |         logger.info(f"Fetching mutation profiles for {gene}")
135 |         all_profiles, prof_error = await self.http_adapter.get(
136 |             "/molecular-profiles",
137 |             params={"molecularAlterationType": "MUTATION_EXTENDED"},
138 |             endpoint_key="cbioportal_molecular_profiles",
139 |         )
140 | 
141 |         if prof_error or not all_profiles:
142 |             logger.error("Failed to fetch molecular profiles")
143 |             return None
144 |         profile_ids = [p["molecularProfileId"] for p in all_profiles]
145 | 
146 |         # Batch fetch mutations (this is the slow part)
147 |         logger.info(
148 |             f"Fetching mutations for {gene} across {len(profile_ids)} profiles"
149 |         )
150 |         mutations = await self._fetch_all_mutations(profile_ids, entrez_id)
151 | 
152 |         if not mutations:
153 |             logger.info(f"No mutations found for {gene}")
154 |             return MutationSearchResult(gene=gene)
155 | 
156 |         # Filter mutations based on criteria
157 |         mutation_filter = MutationFilter(mutation, pattern)
158 |         filtered_mutations = mutation_filter.filter_mutations(mutations)
159 | 
160 |         # Get study information
161 |         studies_info = await self._get_studies_info()
162 | 
163 |         # Aggregate results by study
164 |         study_mutations = self._aggregate_by_study(
165 |             cast(list[MutationHit], filtered_mutations), studies_info
166 |         )
167 | 
168 |         # Sort by mutation count and take top studies
169 |         top_studies = sorted(
170 |             study_mutations.values(),
171 |             key=lambda x: x.mutation_count,
172 |             reverse=True,
173 |         )[:max_studies]
174 | 
175 |         # Count mutation types
176 |         mutation_types = Counter(m.protein_change for m in filtered_mutations)
177 | 
178 |         return MutationSearchResult(
179 |             gene=gene,
180 |             specific_mutation=mutation,
181 |             pattern=pattern,
182 |             total_studies=len(all_profiles),
183 |             studies_with_mutation=len(study_mutations),
184 |             total_mutations=len(filtered_mutations),
185 |             top_studies=top_studies,
186 |             mutation_types=dict(mutation_types.most_common(10)),
187 |         )
188 | 
189 |     @track_api_call("cbioportal_fetch_mutations")
190 |     async def _fetch_all_mutations(
191 |         self,
192 |         profile_ids: list[str],
193 |         entrez_id: int,
194 |     ) -> list[MutationHit]:
195 |         """Fetch all mutations for a gene across all profiles."""
196 | 
197 |         try:
198 |             raw_mutations, error = await self.http_adapter.post(
199 |                 "/mutations/fetch",
200 |                 data={
201 |                     "molecularProfileIds": profile_ids,
202 |                     "entrezGeneIds": [entrez_id],
203 |                 },
204 |                 endpoint_key="cbioportal_mutations",
205 |                 cache_ttl=1800,  # Cache for 30 minutes
206 |             )
207 | 
208 |             if error or not raw_mutations:
209 |                 logger.error(f"Failed to fetch mutations: {error}")
210 |                 return []
211 | 
212 |             # Convert to MutationHit objects
213 |             mutations = []
214 |             for mut in raw_mutations:
215 |                 try:
216 |                     # Extract study ID from molecular profile ID
217 |                     study_id = mut.get("molecularProfileId", "").replace(
218 |                         "_mutations", ""
219 |                     )
220 | 
221 |                     mutations.append(
222 |                         MutationHit(
223 |                             study_id=study_id,
224 |                             molecular_profile_id=mut.get(
225 |                                 "molecularProfileId", ""
226 |                             ),
227 |                             protein_change=mut.get("proteinChange", ""),
228 |                             mutation_type=mut.get("mutationType", ""),
229 |                             start_position=mut.get("startPosition"),
230 |                             end_position=mut.get("endPosition"),
231 |                             reference_allele=mut.get("referenceAllele"),
232 |                             variant_allele=mut.get("variantAllele"),
233 |                             sample_id=mut.get("sampleId"),
234 |                         )
235 |                     )
236 |                 except Exception as e:
237 |                     logger.debug(f"Failed to parse mutation: {e}")
238 |                     continue
239 | 
240 |             return mutations
241 | 
242 |         except Exception as e:
243 |             logger.error(f"Error fetching mutations: {e}")
244 |             return []
245 | 
246 |     async def _get_studies_info(self) -> dict[str, dict[str, Any]]:
247 |         """Get information about all studies."""
248 | 
249 |         try:
250 |             studies, error = await self.http_adapter.get(
251 |                 "/studies",
252 |                 endpoint_key="cbioportal_studies",
253 |                 cache_ttl=3600,  # Cache for 1 hour
254 |             )
255 | 
256 |             if error or not studies:
257 |                 return {}
258 |             study_info = {}
259 |             cancer_type_client = get_cancer_type_client()
260 | 
261 |             for s in studies:
262 |                 cancer_type_id = s.get("cancerTypeId", "")
263 |                 if cancer_type_id and cancer_type_id != "unknown":
264 |                     # Use the API to get the proper display name
265 |                     cancer_type = (
266 |                         await cancer_type_client.get_cancer_type_name(
267 |                             cancer_type_id
268 |                         )
269 |                     )
270 |                 else:
271 |                     # Try to get from full study info
272 |                     cancer_type = (
273 |                         await cancer_type_client.get_study_cancer_type(
274 |                             s["studyId"]
275 |                         )
276 |                     )
277 | 
278 |                 study_info[s["studyId"]] = {
279 |                     "name": s.get("name", ""),
280 |                     "cancer_type": cancer_type,
281 |                 }
282 |             return study_info
283 |         except Exception as e:
284 |             logger.error(f"Error fetching studies: {e}")
285 |             return {}
286 | 
287 |     def _aggregate_by_study(
288 |         self,
289 |         mutations: list[MutationHit],
290 |         studies_info: dict[str, dict[str, Any]],
291 |     ) -> dict[str, StudyMutationSummary]:
292 |         """Aggregate mutations by study."""
293 |         study_mutations = defaultdict(list)
294 |         study_samples = defaultdict(set)
295 | 
296 |         for mut in mutations:
297 |             study_id = mut.study_id
298 |             study_mutations[study_id].append(mut.protein_change)
299 |             if mut.sample_id:
300 |                 study_samples[study_id].add(mut.sample_id)
301 | 
302 |         # Create summaries
303 |         summaries = {}
304 |         for study_id, mutations_list in study_mutations.items():
305 |             info = studies_info.get(study_id, {})
306 |             summaries[study_id] = StudyMutationSummary(
307 |                 study_id=study_id,
308 |                 study_name=info.get("name", study_id),
309 |                 cancer_type=info.get("cancer_type", "unknown"),
310 |                 mutation_count=len(mutations_list),
311 |                 sample_count=len(study_samples[study_id]),
312 |                 mutations=list(set(mutations_list))[
313 |                     :5
314 |                 ],  # Top 5 unique mutations
315 |             )
316 | 
317 |         return summaries
318 | 
319 | 
320 | def format_mutation_search_result(result: MutationSearchResult) -> str:
321 |     """Format mutation search results as markdown."""
322 |     lines = [f"### cBioPortal Mutation Search: {result.gene}"]
323 | 
324 |     if result.specific_mutation:
325 |         lines.append(f"**Specific Mutation**: {result.specific_mutation}")
326 |     elif result.pattern:
327 |         lines.append(f"**Pattern**: {result.pattern}")
328 | 
329 |     lines.extend([
330 |         f"- **Total Studies**: {result.total_studies}",
331 |         f"- **Studies with Mutation**: {result.studies_with_mutation}",
332 |         f"- **Total Mutations Found**: {result.total_mutations}",
333 |     ])
334 | 
335 |     if result.top_studies:
336 |         lines.append("\n**Top Studies by Mutation Count:**")
337 |         lines.append("| Count | Study ID | Cancer Type | Study Name |")
338 |         lines.append("|-------|----------|-------------|------------|")
339 | 
340 |         for study in result.top_studies[:10]:
341 |             study_id = (
342 |                 study.study_id[:20] + "..."
343 |                 if len(study.study_id) > 20
344 |                 else study.study_id
345 |             )
346 |             study_name = (
347 |                 study.study_name[:40] + "..."
348 |                 if len(study.study_name) > 40
349 |                 else study.study_name
350 |             )
351 |             lines.append(
352 |                 f"| {study.mutation_count:5d} | {study_id:<20} | "
353 |                 f"{study.cancer_type:<11} | {study_name} |"
354 |             )
355 | 
356 |     if result.mutation_types and len(result.mutation_types) > 1:
357 |         lines.append("\n**Mutation Types Found:**")
358 |         for mut_type, count in list(result.mutation_types.items())[:5]:
359 |             lines.append(f"- {mut_type}: {count} occurrences")
360 | 
361 |     return "\n".join(lines)
362 | 
```

--------------------------------------------------------------------------------
/src/biomcp/router_handlers.py:
--------------------------------------------------------------------------------

```python
  1 | """Domain-specific search handlers for the router module."""
  2 | 
  3 | import json
  4 | import logging
  5 | from typing import Any
  6 | 
  7 | from .exceptions import (
  8 |     InvalidParameterError,
  9 |     ResultParsingError,
 10 |     SearchExecutionError,
 11 | )
 12 | from .parameter_parser import ParameterParser
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | async def handle_article_search(
 18 |     genes: list[str] | None,
 19 |     diseases: list[str] | None,
 20 |     variants: list[str] | None,
 21 |     chemicals: list[str] | None,
 22 |     keywords: list[str] | None,
 23 |     page: int,
 24 |     page_size: int,
 25 | ) -> tuple[list[dict], int]:
 26 |     """Handle article domain search."""
 27 |     logger.info("Executing article search")
 28 |     try:
 29 |         from biomcp.articles.search import PubmedRequest
 30 |         from biomcp.articles.unified import search_articles_unified
 31 | 
 32 |         request = PubmedRequest(
 33 |             chemicals=chemicals or [],
 34 |             diseases=diseases or [],
 35 |             genes=genes or [],
 36 |             keywords=keywords or [],
 37 |             variants=variants or [],
 38 |         )
 39 |         result_str = await search_articles_unified(
 40 |             request,
 41 |             include_pubmed=True,
 42 |             include_preprints=True,  # Changed to match individual tool default
 43 |             output_json=True,
 44 |         )
 45 |     except Exception as e:
 46 |         logger.error(f"Article search failed: {e}")
 47 |         raise SearchExecutionError("article", e) from e
 48 | 
 49 |     # Parse the JSON results
 50 |     try:
 51 |         parsed_result = json.loads(result_str)
 52 |         # Handle unified search format (may include cBioPortal data)
 53 |         if isinstance(parsed_result, dict) and "articles" in parsed_result:
 54 |             all_results = parsed_result["articles"]
 55 |             # Log if cBioPortal data was included
 56 |             if "cbioportal_summary" in parsed_result:
 57 |                 logger.info("Article search included cBioPortal summary data")
 58 |         elif isinstance(parsed_result, list):
 59 |             all_results = parsed_result
 60 |         else:
 61 |             # Handle unexpected format
 62 |             logger.warning(
 63 |                 f"Unexpected article result format: {type(parsed_result)}"
 64 |             )
 65 |             all_results = []
 66 |     except (json.JSONDecodeError, TypeError) as e:
 67 |         logger.error(f"Failed to parse article results: {e}")
 68 |         raise ResultParsingError("article", e) from e
 69 | 
 70 |     # Manual pagination
 71 |     start = (page - 1) * page_size
 72 |     end = start + page_size
 73 |     items = all_results[start:end]
 74 |     total = len(all_results)
 75 | 
 76 |     logger.info(
 77 |         f"Article search returned {total} total results, showing {len(items)}"
 78 |     )
 79 | 
 80 |     return items, total
 81 | 
 82 | 
 83 | def _parse_trial_results(result_str: str) -> tuple[list[dict], int]:
 84 |     """Parse trial search results from JSON."""
 85 |     try:
 86 |         result_dict = json.loads(result_str)
 87 |         # Handle both API v2 structure and flat structure
 88 |         if isinstance(result_dict, dict) and "studies" in result_dict:
 89 |             all_results = result_dict["studies"]
 90 |         elif isinstance(result_dict, list):
 91 |             all_results = result_dict
 92 |         else:
 93 |             all_results = [result_dict]
 94 |     except (json.JSONDecodeError, TypeError) as e:
 95 |         logger.error(f"Failed to parse trial results: {e}")
 96 |         raise ResultParsingError("trial", e) from e
 97 | 
 98 |     return all_results, len(all_results)
 99 | 
100 | 
101 | async def handle_trial_search(
102 |     conditions: list[str] | None,
103 |     interventions: list[str] | None,
104 |     keywords: list[str] | None,
105 |     recruiting_status: str | None,
106 |     phase: str | None,
107 |     genes: list[str] | None,
108 |     page: int,
109 |     page_size: int,
110 | ) -> tuple[list[dict], int]:
111 |     """Handle trial domain search."""
112 |     logger.info("Executing trial search")
113 | 
114 |     # Build the trial search parameters
115 |     search_params: dict[str, Any] = {}
116 |     if conditions:
117 |         search_params["conditions"] = conditions
118 |     if interventions:
119 |         search_params["interventions"] = interventions
120 |     if recruiting_status:
121 |         search_params["recruiting_status"] = recruiting_status
122 |     if phase:
123 |         try:
124 |             search_params["phase"] = ParameterParser.normalize_phase(phase)
125 |         except InvalidParameterError:
126 |             raise
127 |     if keywords:
128 |         search_params["keywords"] = keywords
129 | 
130 |     # Add gene support for trials
131 |     if genes:
132 |         # Convert genes to keywords for trial search
133 |         if "keywords" in search_params:
134 |             search_params["keywords"].extend(genes)
135 |         else:
136 |             search_params["keywords"] = genes
137 | 
138 |     try:
139 |         from biomcp.trials.search import TrialQuery, search_trials
140 | 
141 |         # Convert search_params to TrialQuery
142 |         trial_query = TrialQuery(**search_params, page_size=page_size)
143 |         result_str = await search_trials(trial_query, output_json=True)
144 |     except Exception as e:
145 |         logger.error(f"Trial search failed: {e}")
146 |         raise SearchExecutionError("trial", e) from e
147 | 
148 |     # Parse the JSON results
149 |     all_results, total = _parse_trial_results(result_str)
150 | 
151 |     # Manual pagination
152 |     start = (page - 1) * page_size
153 |     end = start + page_size
154 |     items = all_results[start:end]
155 | 
156 |     logger.info(
157 |         f"Trial search returned {total} total results, showing {len(items)}"
158 |     )
159 | 
160 |     return items, total
161 | 
162 | 
163 | async def handle_variant_search(
164 |     genes: list[str] | None,
165 |     significance: str | None,
166 |     keywords: list[str] | None,
167 |     page: int,
168 |     page_size: int,
169 | ) -> tuple[list[dict], int]:
170 |     """Handle variant domain search."""
171 |     logger.info("Executing variant search")
172 | 
173 |     try:
174 |         from biomcp.variants.search import VariantQuery, search_variants
175 | 
176 |         # Build query
177 |         queries = []
178 |         if genes:
179 |             queries.extend(genes)
180 |         if keywords:
181 |             queries.extend(keywords)
182 | 
183 |         if not queries:
184 |             raise InvalidParameterError(
185 |                 "genes or keywords",
186 |                 None,
187 |                 "at least one search term for variant search",
188 |             )
189 | 
190 |         request = VariantQuery(
191 |             gene=genes[0] if genes else None,
192 |             size=page_size,
193 |             significance=significance,
194 |         )
195 |         result_str = await search_variants(request, output_json=True)
196 |     except Exception as e:
197 |         logger.error(f"Variant search failed: {e}")
198 |         raise SearchExecutionError("variant", e) from e
199 | 
200 |     # Parse the JSON results
201 |     try:
202 |         all_results = json.loads(result_str)
203 |     except (json.JSONDecodeError, TypeError) as e:
204 |         logger.error(f"Failed to parse variant results: {e}")
205 |         raise ResultParsingError("variant", e) from e
206 | 
207 |     # Variants API returns paginated results
208 |     total = len(all_results)
209 | 
210 |     logger.info(f"Variant search returned {total} results")
211 | 
212 |     return all_results, total
213 | 
214 | 
215 | async def handle_nci_organization_search(
216 |     name: str | None,
217 |     organization_type: str | None,
218 |     city: str | None,
219 |     state: str | None,
220 |     api_key: str | None,
221 |     page: int,
222 |     page_size: int,
223 | ) -> tuple[list[dict], int]:
224 |     """Handle NCI organization domain search."""
225 |     logger.info("Executing NCI organization search")
226 | 
227 |     try:
228 |         from biomcp.organizations import (
229 |             search_organizations,
230 |             search_organizations_with_or,
231 |         )
232 | 
233 |         # Check if name contains OR query
234 |         if name and (" OR " in name or " or " in name):
235 |             results = await search_organizations_with_or(
236 |                 name_query=name,
237 |                 org_type=organization_type,
238 |                 city=city,
239 |                 state=state,
240 |                 page_size=page_size,
241 |                 page=page,
242 |                 api_key=api_key,
243 |             )
244 |         else:
245 |             results = await search_organizations(
246 |                 name=name,
247 |                 org_type=organization_type,
248 |                 city=city,
249 |                 state=state,
250 |                 page_size=page_size,
251 |                 page=page,
252 |                 api_key=api_key,
253 |             )
254 | 
255 |         items = results.get("organizations", [])
256 |         total = results.get("total", len(items))
257 | 
258 |     except Exception as e:
259 |         logger.error(f"NCI organization search failed: {e}")
260 |         raise SearchExecutionError("nci_organization", e) from e
261 | 
262 |     logger.info(f"NCI organization search returned {total} results")
263 |     return items, total
264 | 
265 | 
266 | async def handle_nci_intervention_search(
267 |     name: str | None,
268 |     intervention_type: str | None,
269 |     synonyms: bool,
270 |     api_key: str | None,
271 |     page: int,
272 |     page_size: int,
273 | ) -> tuple[list[dict], int]:
274 |     """Handle NCI intervention domain search."""
275 |     logger.info("Executing NCI intervention search")
276 | 
277 |     try:
278 |         from biomcp.interventions import (
279 |             search_interventions,
280 |             search_interventions_with_or,
281 |         )
282 | 
283 |         # Check if name contains OR query
284 |         if name and (" OR " in name or " or " in name):
285 |             results = await search_interventions_with_or(
286 |                 name_query=name,
287 |                 intervention_type=intervention_type,
288 |                 synonyms=synonyms,
289 |                 page_size=page_size,
290 |                 page=page,
291 |                 api_key=api_key,
292 |             )
293 |         else:
294 |             results = await search_interventions(
295 |                 name=name,
296 |                 intervention_type=intervention_type,
297 |                 synonyms=synonyms,
298 |                 page_size=page_size,
299 |                 page=page,
300 |                 api_key=api_key,
301 |             )
302 | 
303 |         items = results.get("interventions", [])
304 |         total = results.get("total", len(items))
305 | 
306 |     except Exception as e:
307 |         logger.error(f"NCI intervention search failed: {e}")
308 |         raise SearchExecutionError("nci_intervention", e) from e
309 | 
310 |     logger.info(f"NCI intervention search returned {total} results")
311 |     return items, total
312 | 
313 | 
314 | async def handle_nci_biomarker_search(
315 |     name: str | None,
316 |     gene: str | None,
317 |     biomarker_type: str | None,
318 |     assay_type: str | None,
319 |     api_key: str | None,
320 |     page: int,
321 |     page_size: int,
322 | ) -> tuple[list[dict], int]:
323 |     """Handle NCI biomarker domain search."""
324 |     logger.info("Executing NCI biomarker search")
325 | 
326 |     try:
327 |         from biomcp.biomarkers import (
328 |             search_biomarkers,
329 |             search_biomarkers_with_or,
330 |         )
331 | 
332 |         # Check if name contains OR query
333 |         if name and (" OR " in name or " or " in name):
334 |             results = await search_biomarkers_with_or(
335 |                 name_query=name,
336 |                 eligibility_criterion=gene,  # Map gene to eligibility_criterion
337 |                 biomarker_type=biomarker_type,
338 |                 assay_purpose=assay_type,  # Map assay_type to assay_purpose
339 |                 page_size=page_size,
340 |                 page=page,
341 |                 api_key=api_key,
342 |             )
343 |         else:
344 |             results = await search_biomarkers(
345 |                 name=name,
346 |                 eligibility_criterion=gene,  # Map gene to eligibility_criterion
347 |                 biomarker_type=biomarker_type,
348 |                 assay_purpose=assay_type,  # Map assay_type to assay_purpose
349 |                 page_size=page_size,
350 |                 page=page,
351 |                 api_key=api_key,
352 |             )
353 | 
354 |         items = results.get("biomarkers", [])
355 |         total = results.get("total", len(items))
356 | 
357 |     except Exception as e:
358 |         logger.error(f"NCI biomarker search failed: {e}")
359 |         raise SearchExecutionError("nci_biomarker", e) from e
360 | 
361 |     logger.info(f"NCI biomarker search returned {total} results")
362 |     return items, total
363 | 
364 | 
365 | async def handle_nci_disease_search(
366 |     name: str | None,
367 |     include_synonyms: bool,
368 |     category: str | None,
369 |     api_key: str | None,
370 |     page: int,
371 |     page_size: int,
372 | ) -> tuple[list[dict], int]:
373 |     """Handle NCI disease domain search."""
374 |     logger.info("Executing NCI disease search")
375 | 
376 |     try:
377 |         from biomcp.diseases import search_diseases, search_diseases_with_or
378 | 
379 |         # Check if name contains OR query
380 |         if name and (" OR " in name or " or " in name):
381 |             results = await search_diseases_with_or(
382 |                 name_query=name,
383 |                 include_synonyms=include_synonyms,
384 |                 category=category,
385 |                 page_size=page_size,
386 |                 page=page,
387 |                 api_key=api_key,
388 |             )
389 |         else:
390 |             results = await search_diseases(
391 |                 name=name,
392 |                 include_synonyms=include_synonyms,
393 |                 category=category,
394 |                 page_size=page_size,
395 |                 page=page,
396 |                 api_key=api_key,
397 |             )
398 | 
399 |         items = results.get("diseases", [])
400 |         total = results.get("total", len(items))
401 | 
402 |     except Exception as e:
403 |         logger.error(f"NCI disease search failed: {e}")
404 |         raise SearchExecutionError("nci_disease", e) from e
405 | 
406 |     logger.info(f"NCI disease search returned {total} results")
407 |     return items, total
408 | 
```

--------------------------------------------------------------------------------
/docs/apis/python-sdk.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Python Package Reference
  2 | 
  3 | The BioMCP Python package provides direct access to biomedical data search and retrieval functions through modular domain-specific APIs.
  4 | 
  5 | ## Installation
  6 | 
  7 | ```bash
  8 | pip install biomcp-python
  9 | ```
 10 | 
 11 | ## Quick Start
 12 | 
 13 | ```python
 14 | import asyncio
 15 | from biomcp.variants.search import search_variants, VariantQuery, ClinicalSignificance
 16 | from biomcp.articles.search import search_articles, PubmedRequest
 17 | from biomcp.trials.search import search_trials, TrialQuery
 18 | 
 19 | async def main():
 20 |     # Search for pathogenic variants
 21 |     variant_query = VariantQuery(
 22 |         gene="BRAF",
 23 |         significance=ClinicalSignificance.PATHOGENIC
 24 |     )
 25 |     variants_result = await search_variants(variant_query)
 26 | 
 27 |     # Search articles
 28 |     article_request = PubmedRequest(
 29 |         genes=["BRAF"],
 30 |         diseases=["melanoma"]
 31 |     )
 32 |     articles_result = await search_articles(article_request)
 33 | 
 34 |     # Search clinical trials
 35 |     trial_query = TrialQuery(
 36 |         conditions=["melanoma"],
 37 |         status="RECRUITING"
 38 |     )
 39 |     trials_result = await search_trials(trial_query)
 40 | 
 41 | asyncio.run(main())
 42 | ```
 43 | 
 44 | ## API Structure
 45 | 
 46 | The BioMCP package is organized into domain-specific modules that you import directly:
 47 | 
 48 | ### Available Modules
 49 | 
 50 | - **Variants**: `biomcp.variants.search` - Search genetic variants
 51 | - **Articles**: `biomcp.articles.search` - Search biomedical literature
 52 | - **Trials**: `biomcp.trials.search` - Search clinical trials
 53 | - **Genes**: `biomcp.genes` - Get gene information
 54 | - **Diseases**: `biomcp.diseases` - Get disease information
 55 | - **Drugs**: `biomcp.drugs` - Get drug information
 56 | 
 57 | ### Import Patterns
 58 | 
 59 | ```python
 60 | # Variants
 61 | from biomcp.variants.search import search_variants, VariantQuery, ClinicalSignificance
 62 | from biomcp.variants.getter import get_variant
 63 | from biomcp.variants.alphagenome import predict_variant_effects
 64 | 
 65 | # Articles
 66 | from biomcp.articles.search import search_articles, PubmedRequest
 67 | 
 68 | # Trials
 69 | from biomcp.trials.search import search_trials, TrialQuery, TrialPhase
 70 | 
 71 | # Direct functions
 72 | from biomcp.genes import get_gene
 73 | from biomcp.diseases import get_disease
 74 | from biomcp.drugs import get_drug
 75 | ```
 76 | 
 77 | ## Articles API
 78 | 
 79 | ### search_articles()
 80 | 
 81 | Search PubMed/PubTator3 for biomedical literature.
 82 | 
 83 | ```python
 84 | from biomcp.articles.search import search_articles, PubmedRequest
 85 | 
 86 | async def search_articles(
 87 |     request: PubmedRequest,
 88 |     output_json: bool = False
 89 | ) -> str:
 90 | ```
 91 | 
 92 | **PubmedRequest Parameters:**
 93 | 
 94 | - `genes`: List of gene symbols (e.g., ["BRAF", "KRAS"])
 95 | - `diseases`: List of disease/condition terms
 96 | - `chemicals`: List of drug/chemical names
 97 | - `variants`: List of variant notations
 98 | - `keywords`: Additional search keywords (supports OR with |)
 99 | 
100 | **Example:**
101 | 
102 | ```python
103 | from biomcp.articles.search import search_articles, PubmedRequest
104 | 
105 | # Basic search
106 | request = PubmedRequest(
107 |     genes=["EGFR"],
108 |     diseases=["lung cancer"]
109 | )
110 | results = await search_articles(request)
111 | 
112 | # Advanced search with keywords
113 | request = PubmedRequest(
114 |     genes=["BRAF"],
115 |     keywords=["V600E|p.V600E|resistance"],
116 |     chemicals=["vemurafenib", "dabrafenib"]
117 | )
118 | results = await search_articles(request)
119 | ```
120 | 
121 | ## Trials API
122 | 
123 | ### search_trials()
124 | 
125 | Search clinical trials from ClinicalTrials.gov.
126 | 
127 | ```python
128 | from biomcp.trials.search import search_trials, TrialQuery, TrialPhase, RecruitingStatus
129 | 
130 | async def search_trials(
131 |     query: TrialQuery,
132 |     output_json: bool = False
133 | ) -> str:
134 | ```
135 | 
136 | **TrialQuery Parameters:**
137 | 
138 | - `conditions`: Disease/condition terms
139 | - `interventions`: Treatment/intervention terms
140 | - `other_terms`: Additional search terms
141 | - `status`: Trial status (use RecruitingStatus enum)
142 | - `phase`: Trial phase (use TrialPhase enum)
143 | - `study_type`: INTERVENTIONAL or OBSERVATIONAL
144 | - `lat`, `long`, `distance`: Geographic search parameters
145 | 
146 | **Available Enums:**
147 | 
148 | - `TrialPhase`: EARLY_PHASE1, PHASE1, PHASE2, PHASE3, PHASE4, NOT_APPLICABLE
149 | - `RecruitingStatus`: OPEN, CLOSED, ANY
150 | - `StudyType`: INTERVENTIONAL, OBSERVATIONAL, EXPANDED_ACCESS
151 | 
152 | **Example:**
153 | 
154 | ```python
155 | from biomcp.trials.search import search_trials, TrialQuery, TrialPhase
156 | 
157 | # Basic search
158 | query = TrialQuery(
159 |     conditions=["melanoma"],
160 |     phase=TrialPhase.PHASE3,
161 |     recruiting_status="RECRUITING"
162 | )
163 | results = await search_trials(query)
164 | 
165 | # Location-based search
166 | query = TrialQuery(
167 |     conditions=["breast cancer"],
168 |     lat=40.7128,
169 |     long=-74.0060,
170 |     distance=50
171 | )
172 | results = await search_trials(query)
173 | ```
174 | 
175 | ## Variants API
176 | 
177 | ### search_variants()
178 | 
179 | Search genetic variants in MyVariant.info.
180 | 
181 | ```python
182 | from biomcp.variants.search import search_variants, VariantQuery, ClinicalSignificance
183 | 
184 | async def search_variants(
185 |     query: VariantQuery,
186 |     output_json: bool = False,
187 |     include_cbioportal: bool = True
188 | ) -> str:
189 | ```
190 | 
191 | **VariantQuery Parameters:**
192 | 
193 | - `gene`: Gene symbol (e.g. BRAF, TP53)
194 | - `hgvsp`: Protein change notation (e.g., p.V600E, p.Arg557His)
195 | - `hgvsc`: cDNA notation (e.g., c.1799T>A)
196 | - `rsid`: dbSNP rsID (e.g., rs113488022)
197 | - `region`: Genomic region as chr:start-end (e.g. chr1:12345-67890)
198 | - `significance`: ClinVar clinical significance (use ClinicalSignificance enum)
199 | - `min_frequency`, `max_frequency`: Allele frequency filters
200 | - `cadd`: Minimum CADD phred score
201 | - `polyphen`: PolyPhen-2 prediction (use PolyPhenPrediction enum)
202 | - `sift`: SIFT prediction (use SiftPrediction enum)
203 | - `sources`: Include only specific data sources
204 | - `size`: Number of results to return
205 | - `offset`: Result offset for pagination
206 | 
207 | **Available Enums:**
208 | 
209 | - `ClinicalSignificance`: PATHOGENIC, LIKELY_PATHOGENIC, UNCERTAIN_SIGNIFICANCE, LIKELY_BENIGN, BENIGN
210 | - `PolyPhenPrediction`: PROBABLY_DAMAGING, POSSIBLY_DAMAGING, BENIGN
211 | - `SiftPrediction`: DELETERIOUS, TOLERATED
212 | 
213 | **Example:**
214 | 
215 | ```python
216 | from biomcp.variants.search import search_variants, VariantQuery, ClinicalSignificance
217 | 
218 | # Search pathogenic variants
219 | query = VariantQuery(
220 |     gene="BRCA1",
221 |     significance=ClinicalSignificance.PATHOGENIC,
222 |     max_frequency=0.01
223 | )
224 | results = await search_variants(query)
225 | 
226 | # Search by genomic region
227 | query = VariantQuery(
228 |     region="chr7:140453136-140453137"
229 | )
230 | results = await search_variants(query)
231 | 
232 | # Search by protein change
233 | query = VariantQuery(
234 |     gene="BRAF",
235 |     hgvsp="p.V600E"
236 | )
237 | results = await search_variants(query)
238 | ```
239 | 
240 | ### get_variant()
241 | 
242 | Get detailed variant information.
243 | 
244 | ```python
245 | from biomcp.variants.getter import get_variant
246 | 
247 | async def get_variant(
248 |     variant_id: str,
249 |     output_json: bool = False,
250 |     include_external: bool = False
251 | ) -> str:
252 | ```
253 | 
254 | **Parameters:**
255 | 
256 | - `variant_id`: Variant identifier (HGVS, rsID, or genomic like "chr7:g.140453136A>T")
257 | - `output_json`: Return JSON format instead of markdown
258 | - `include_external`: Include external database annotations
259 | 
260 | **Example:**
261 | 
262 | ```python
263 | # Get by HGVS
264 | variant_info = await get_variant("chr7:g.140453136A>T")
265 | 
266 | # Get by rsID
267 | variant_info = await get_variant("rs113488022")
268 | ```
269 | 
270 | ### predict_variant_effects()
271 | 
272 | Predict variant effects using AlphaGenome AI.
273 | 
274 | ```python
275 | from biomcp.variants.alphagenome import predict_variant_effects
276 | 
277 | async def predict_variant_effects(
278 |     chromosome: str,
279 |     position: int,
280 |     reference: str,
281 |     alternate: str,
282 |     interval_size: int = 131_072,
283 |     tissue_types: list[str] | None = None,
284 |     significance_threshold: float = 0.5,
285 |     api_key: str | None = None
286 | ) -> str:
287 | ```
288 | 
289 | **Parameters:**
290 | 
291 | - `chromosome`: Chromosome (e.g., 'chr7')
292 | - `position`: 1-based genomic position
293 | - `reference`: Reference allele(s)
294 | - `alternate`: Alternate allele(s)
295 | - `interval_size`: Size of genomic context window (max 1,000,000)
296 | - `tissue_types`: UBERON tissue ontology terms for tissue-specific predictions
297 | - `significance_threshold`: Threshold for significant log2 fold changes
298 | - `api_key`: AlphaGenome API key (or set ALPHAGENOME_API_KEY env var)
299 | 
300 | **Example:**
301 | 
302 | ```python
303 | # Predict effects of BRAF V600E mutation
304 | prediction = await predict_variant_effects(
305 |     chromosome="chr7",
306 |     position=140753336,
307 |     reference="A",
308 |     alternate="T",
309 |     api_key="your-alphagenome-api-key"
310 | )
311 | ```
312 | 
313 | ## Direct Data APIs
314 | 
315 | ### get_gene()
316 | 
317 | Get gene information from MyGene.info.
318 | 
319 | ```python
320 | from biomcp.genes import get_gene
321 | 
322 | async def get_gene(
323 |     gene_id_or_symbol: str,
324 |     output_json: bool = False
325 | ) -> str:
326 | ```
327 | 
328 | **Example:**
329 | 
330 | ```python
331 | gene_info = await get_gene("BRCA1")
332 | ```
333 | 
334 | ### get_disease()
335 | 
336 | Get disease information from MyDisease.info.
337 | 
338 | ```python
339 | from biomcp.diseases import get_disease
340 | 
341 | async def get_disease(
342 |     disease_id_or_name: str,
343 |     output_json: bool = False
344 | ) -> str:
345 | ```
346 | 
347 | **Example:**
348 | 
349 | ```python
350 | disease_info = await get_disease("melanoma")
351 | ```
352 | 
353 | ### get_drug()
354 | 
355 | Get drug information from MyChem.info.
356 | 
357 | ```python
358 | from biomcp.drugs import get_drug
359 | 
360 | async def get_drug(
361 |     drug_id_or_name: str,
362 |     output_json: bool = False
363 | ) -> str:
364 | ```
365 | 
366 | **Example:**
367 | 
368 | ```python
369 | drug_info = await get_drug("imatinib")
370 | ```
371 | 
372 | ## Complete Analysis Example
373 | 
374 | ```python
375 | import asyncio
376 | from biomcp.variants.search import search_variants, VariantQuery, ClinicalSignificance
377 | from biomcp.articles.search import search_articles, PubmedRequest
378 | from biomcp.trials.search import search_trials, TrialQuery, TrialPhase
379 | from biomcp.genes import get_gene
380 | 
381 | async def analyze_gene_variants(gene_symbol: str, disease: str):
382 |     """Complete gene variant analysis workflow."""
383 | 
384 |     # 1. Get gene information
385 |     gene_info = await get_gene(gene_symbol)
386 |     print(f"Gene: {gene_symbol}")
387 | 
388 |     # 2. Search for pathogenic variants
389 |     variant_query = VariantQuery(
390 |         gene=gene_symbol,
391 |         significance=ClinicalSignificance.PATHOGENIC,
392 |         max_frequency=0.01  # Rare variants
393 |     )
394 |     variants_result = await search_variants(variant_query)
395 |     print(f"Found pathogenic variants for {gene_symbol}")
396 | 
397 |     # 3. Search related literature
398 |     article_request = PubmedRequest(
399 |         genes=[gene_symbol],
400 |         diseases=[disease],
401 |         keywords=["therapy", "treatment", "prognosis"]
402 |     )
403 |     articles_result = await search_articles(article_request)
404 |     print(f"Found literature on {gene_symbol} and {disease}")
405 | 
406 |     # 4. Find clinical trials
407 |     trial_query = TrialQuery(
408 |         conditions=[disease],
409 |         other_terms=[gene_symbol, f"{gene_symbol} mutation"],
410 |         phase=TrialPhase.PHASE3,
411 |         recruiting_status="RECRUITING"
412 |     )
413 |     trials_result = await search_trials(trial_query)
414 |     print(f"Found trials for {disease} with {gene_symbol}")
415 | 
416 |     return {
417 |         "gene_info": gene_info,
418 |         "variants": variants_result,
419 |         "articles": articles_result,
420 |         "trials": trials_result
421 |     }
422 | 
423 | # Run the analysis
424 | results = asyncio.run(analyze_gene_variants("BRAF", "melanoma"))
425 | ```
426 | 
427 | ## LangChain Integration
428 | 
429 | ```python
430 | from langchain.tools import tool
431 | from biomcp.variants.search import search_variants, VariantQuery, ClinicalSignificance
432 | from biomcp.articles.search import search_articles, PubmedRequest
433 | 
434 | @tool
435 | def search_pathogenic_variants(gene: str) -> str:
436 |     """Search for pathogenic variants in a specific gene."""
437 |     import asyncio
438 | 
439 |     async def _search():
440 |         query = VariantQuery(
441 |             gene=gene,
442 |             significance=ClinicalSignificance.PATHOGENIC
443 |         )
444 |         return await search_variants(query)
445 | 
446 |     return asyncio.run(_search())
447 | 
448 | @tool
449 | def search_gene_literature(gene: str, disease: str = None) -> str:
450 |     """Search for scientific literature about a gene and optionally a disease."""
451 |     import asyncio
452 | 
453 |     async def _search():
454 |         request = PubmedRequest(
455 |             genes=[gene],
456 |             diseases=[disease] if disease else []
457 |         )
458 |         return await search_articles(request)
459 | 
460 |     return asyncio.run(_search())
461 | 
462 | # Use with your LLM/agent framework
463 | tools = [search_pathogenic_variants, search_gene_literature]
464 | ```
465 | 
466 | ## Key Differences from Other Documentation
467 | 
468 | ❌ **Does NOT work:**
469 | 
470 | ```python
471 | from biomcp import BioMCPClient  # This class doesn't exist
472 | ```
473 | 
474 | ✅ **Actually works:**
475 | 
476 | ```python
477 | from biomcp.variants.search import search_variants, VariantQuery
478 | from biomcp.articles.search import search_articles, PubmedRequest
479 | from biomcp.trials.search import search_trials, TrialQuery
480 | ```
481 | 
482 | ## Summary
483 | 
484 | The BioMCP package provides powerful biomedical data access through:
485 | 
486 | - **Direct async functions** for each domain (variants, articles, trials, genes, diseases, drugs)
487 | - **Pydantic models** for type-safe queries and responses
488 | - **Comprehensive enums** for standardized values
489 | - **No unified client** - use individual domain modules directly
490 | 
491 | This modular approach works well for building tools and integrating with frameworks like LangChain, as it provides direct access to specific functionality without the overhead of a unified client interface.
492 | 
493 | ## Additional Resources
494 | 
495 | - [MCP Tools Reference](../mcp-tools/)
496 | - [CLI Commands](../cli/)
497 | - [How-to Guides](../how-to-guides/01-find-articles-and-cbioportal-data.md)
498 | 
```

--------------------------------------------------------------------------------
/tests/tdd/openfda/test_drug_shortages.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for FDA drug shortage search and retrieval."""
  2 | 
  3 | import json
  4 | import tempfile
  5 | from datetime import datetime
  6 | from pathlib import Path
  7 | from unittest.mock import patch
  8 | 
  9 | import pytest
 10 | 
 11 | from biomcp.openfda.drug_shortages import (
 12 |     _fetch_shortage_data,
 13 |     _get_cached_shortage_data,
 14 |     get_drug_shortage,
 15 |     search_drug_shortages,
 16 | )
 17 | 
 18 | 
 19 | class TestDrugShortages:
 20 |     """Test FDA drug shortage functions."""
 21 | 
 22 |     @pytest.fixture
 23 |     def mock_shortage_data(self):
 24 |         """Mock drug shortage data structure."""
 25 |         return {
 26 |             "_fetched_at": datetime.now().isoformat(),
 27 |             "last_updated": "2024-02-15",
 28 |             "shortages": [
 29 |                 {
 30 |                     "generic_name": "Ampicillin Sodium",
 31 |                     "brand_names": ["Ampicillin"],
 32 |                     "status": "Current",
 33 |                     "therapeutic_category": "Anti-infective",
 34 |                     "shortage_reason": "Manufacturing delays",
 35 |                     "presentation": "Injection, 500mg vial",
 36 |                     "availability": "Limited supply available",
 37 |                     "estimated_recovery": "Q2 2024",
 38 |                     "last_updated": "2024-02-10",
 39 |                     "first_reported": "2024-01-15",
 40 |                     "related_shortages": [],
 41 |                     "alternatives": ["Ampicillin-Sulbactam", "Cefazolin"],
 42 |                 },
 43 |                 {
 44 |                     "generic_name": "Metoprolol Succinate",
 45 |                     "brand_names": ["Toprol XL"],
 46 |                     "status": "Resolved",
 47 |                     "therapeutic_category": "Cardiovascular",
 48 |                     "shortage_reason": "Increased demand",
 49 |                     "presentation": "Extended release tablets, 25mg",
 50 |                     "availability": "Available",
 51 |                     "resolved_date": "2024-02-01",
 52 |                     "last_updated": "2024-02-01",
 53 |                     "first_reported": "2023-11-15",
 54 |                 },
 55 |                 {
 56 |                     "generic_name": "Cisplatin",
 57 |                     "brand_names": ["Platinol"],
 58 |                     "status": "Current",
 59 |                     "therapeutic_category": "Oncology",
 60 |                     "shortage_reason": "Manufacturing issues",
 61 |                     "presentation": "Injection, 1mg/mL",
 62 |                     "availability": "Not available",
 63 |                     "estimated_recovery": "Unknown",
 64 |                     "last_updated": "2024-02-14",
 65 |                     "first_reported": "2023-12-01",
 66 |                     "notes": "Critical shortage affecting cancer treatment",
 67 |                 },
 68 |             ],
 69 |         }
 70 | 
 71 |     @pytest.mark.asyncio
 72 |     async def test_search_drug_shortages_success(self, mock_shortage_data):
 73 |         """Test successful drug shortage search."""
 74 |         with patch(
 75 |             "biomcp.openfda.drug_shortages._get_cached_shortage_data"
 76 |         ) as mock_cache:
 77 |             mock_cache.return_value = mock_shortage_data
 78 | 
 79 |             result = await search_drug_shortages(drug="ampicillin", limit=10)
 80 | 
 81 |             # Check that result contains expected shortage information
 82 |             assert "Ampicillin Sodium" in result
 83 |             assert "Current" in result
 84 |             assert "Anti-infective" in result
 85 |             # Note: shortage_reason and estimated_recovery fields from mock
 86 |             # are not displayed because formatter looks for different field names
 87 | 
 88 |             # Check for critical disclaimer
 89 |             assert "Critical Warning" in result
 90 |             assert "Drug shortage information is time-sensitive" in result
 91 |             assert (
 92 |                 "https://www.accessdata.fda.gov/scripts/drugshortages/"
 93 |                 in result
 94 |             )
 95 | 
 96 |             # Check summary statistics
 97 |             assert "Total Shortages Found**: 1 shortage" in result
 98 | 
 99 |     @pytest.mark.asyncio
100 |     async def test_search_by_status(self, mock_shortage_data):
101 |         """Test drug shortage search filtered by status."""
102 |         with patch(
103 |             "biomcp.openfda.drug_shortages._get_cached_shortage_data"
104 |         ) as mock_cache:
105 |             mock_cache.return_value = mock_shortage_data
106 | 
107 |             result = await search_drug_shortages(status="Current", limit=10)
108 | 
109 |             assert "Current" in result
110 |             assert "Ampicillin Sodium" in result
111 |             assert "Cisplatin" in result
112 |             # Should not include resolved shortage
113 |             assert "Metoprolol Succinate" not in result or "Resolved" in result
114 | 
115 |     @pytest.mark.asyncio
116 |     async def test_search_by_therapeutic_category(self, mock_shortage_data):
117 |         """Test drug shortage search filtered by therapeutic category."""
118 |         with patch(
119 |             "biomcp.openfda.drug_shortages._get_cached_shortage_data"
120 |         ) as mock_cache:
121 |             mock_cache.return_value = mock_shortage_data
122 | 
123 |             result = await search_drug_shortages(
124 |                 therapeutic_category="Oncology", limit=10
125 |             )
126 | 
127 |             assert "Oncology" in result
128 |             assert "Cisplatin" in result
129 |             assert "Critical shortage affecting cancer treatment" in result
130 | 
131 |     @pytest.mark.asyncio
132 |     async def test_search_no_results(self, mock_shortage_data):
133 |         """Test drug shortage search with no results."""
134 |         with patch(
135 |             "biomcp.openfda.drug_shortages._get_cached_shortage_data"
136 |         ) as mock_cache:
137 |             mock_cache.return_value = mock_shortage_data
138 | 
139 |             result = await search_drug_shortages(
140 |                 drug="nonexistentdrug999", limit=10
141 |             )
142 | 
143 |             assert "No drug shortages found" in result
144 | 
145 |     @pytest.mark.asyncio
146 |     async def test_get_drug_shortage_success(self, mock_shortage_data):
147 |         """Test successful retrieval of specific drug shortage."""
148 |         with patch(
149 |             "biomcp.openfda.drug_shortages._get_cached_shortage_data"
150 |         ) as mock_cache:
151 |             mock_cache.return_value = mock_shortage_data
152 | 
153 |             result = await get_drug_shortage("Cisplatin")
154 | 
155 |             # Check detailed information
156 |             assert "Cisplatin" in result
157 |             assert "Platinol" in result
158 |             assert "Current" in result
159 |             assert "Oncology" in result
160 |             # Note: shortage_reason and availability fields not displayed
161 |             assert "Critical shortage affecting cancer treatment" in result
162 | 
163 |             # Timeline fields also not displayed in current format
164 |             # Just verify basic structure
165 | 
166 |             # Check critical disclaimer
167 |             assert "Critical Warning" in result
168 | 
169 |     @pytest.mark.asyncio
170 |     async def test_get_drug_shortage_not_found(self, mock_shortage_data):
171 |         """Test retrieval of non-existent drug shortage."""
172 |         with patch(
173 |             "biomcp.openfda.drug_shortages._get_cached_shortage_data"
174 |         ) as mock_cache:
175 |             mock_cache.return_value = mock_shortage_data
176 | 
177 |             result = await get_drug_shortage("NonexistentDrug")
178 | 
179 |             assert "No shortage information found" in result
180 |             assert "NonexistentDrug" in result
181 | 
182 |     @pytest.mark.asyncio
183 |     async def test_cache_mechanism(self, mock_shortage_data):
184 |         """Test that caching mechanism works correctly."""
185 |         # Setup cache directory
186 |         cache_dir = Path(tempfile.gettempdir()) / "biomcp_cache"
187 |         cache_dir.mkdir(exist_ok=True)
188 |         cache_file = cache_dir / "drug_shortages.json"
189 | 
190 |         # Write cache file
191 |         cache_data = mock_shortage_data.copy()
192 |         cache_data["_cache_time"] = datetime.now().isoformat()
193 | 
194 |         with patch("biomcp.openfda.drug_shortages.CACHE_FILE", cache_file):
195 |             # Write cache
196 |             with open(cache_file, "w") as f:
197 |                 json.dump(cache_data, f)
198 | 
199 |             # Test cache is used when fresh
200 |             with patch(
201 |                 "biomcp.openfda.drug_shortages._fetch_shortage_data"
202 |             ) as mock_fetch:
203 |                 result = await _get_cached_shortage_data()
204 | 
205 |                 # Should not call fetch if cache is fresh
206 |                 if result and "_cache_time" in str(result):
207 |                     mock_fetch.assert_not_called()
208 | 
209 |             # Clean up
210 |             if cache_file.exists():
211 |                 cache_file.unlink()
212 | 
213 |     @pytest.mark.asyncio
214 |     async def test_data_unavailable(self):
215 |         """Test handling when shortage data is unavailable."""
216 |         with patch(
217 |             "biomcp.openfda.drug_shortages._get_cached_shortage_data"
218 |         ) as mock_cache:
219 |             mock_cache.return_value = None
220 | 
221 |             result = await search_drug_shortages(drug="aspirin")
222 | 
223 |             assert "Drug Shortage Data Temporarily Unavailable" in result
224 |             assert "Alternative Options:" in result
225 |             assert "FDA Drug Shortages Database" in result
226 | 
227 |     @pytest.mark.asyncio
228 |     async def test_fetch_shortage_data_error_handling(self):
229 |         """Test error handling in fetch_shortage_data."""
230 |         with patch(
231 |             "biomcp.openfda.drug_shortages.request_api"
232 |         ) as mock_request:
233 |             # Simulate API error
234 |             mock_request.return_value = (None, "Connection timeout")
235 | 
236 |             result = await _fetch_shortage_data()
237 | 
238 |             # Should return None, not mock data
239 |             assert result is None
240 | 
241 |     @pytest.mark.asyncio
242 |     async def test_shortage_with_alternatives(self, mock_shortage_data):
243 |         """Test that alternatives are displayed for shortages."""
244 |         with patch(
245 |             "biomcp.openfda.drug_shortages._get_cached_shortage_data"
246 |         ) as mock_cache:
247 |             mock_cache.return_value = mock_shortage_data
248 | 
249 |             result = await get_drug_shortage("Ampicillin Sodium")
250 | 
251 |             assert "Alternative Products" in result
252 |             assert "Ampicillin-Sulbactam" in result
253 |             assert "Cefazolin" in result
254 | 
255 |     @pytest.mark.asyncio
256 |     async def test_critical_shortage_highlighting(self, mock_shortage_data):
257 |         """Test that critical shortages are properly highlighted."""
258 |         with patch(
259 |             "biomcp.openfda.drug_shortages._get_cached_shortage_data"
260 |         ) as mock_cache:
261 |             mock_cache.return_value = mock_shortage_data
262 | 
263 |             result = await search_drug_shortages(
264 |                 therapeutic_category="Oncology", limit=10
265 |             )
266 | 
267 |             # Critical oncology shortages should be highlighted
268 |             assert "⚠️" in result or "Critical" in result
269 |             assert "cancer treatment" in result
270 | 
271 |     @pytest.mark.asyncio
272 |     async def test_resolved_shortage_display(self, mock_shortage_data):
273 |         """Test display of resolved shortages."""
274 |         with patch(
275 |             "biomcp.openfda.drug_shortages._get_cached_shortage_data"
276 |         ) as mock_cache:
277 |             mock_cache.return_value = mock_shortage_data
278 | 
279 |             result = await search_drug_shortages(status="Resolved", limit=10)
280 | 
281 |             assert "Metoprolol Succinate" in result
282 |             assert "Resolved" in result
283 |             # Resolved date not displayed in current format
284 | 
285 |     @pytest.mark.asyncio
286 |     async def test_pagination(self, mock_shortage_data):
287 |         """Test pagination of shortage results."""
288 |         # Add more shortages for pagination test
289 |         large_data = mock_shortage_data.copy()
290 |         large_data["shortages"] = (
291 |             mock_shortage_data["shortages"] * 10
292 |         )  # 30 items
293 | 
294 |         with patch(
295 |             "biomcp.openfda.drug_shortages._get_cached_shortage_data"
296 |         ) as mock_cache:
297 |             mock_cache.return_value = large_data
298 | 
299 |             # First page
300 |             result1 = await search_drug_shortages(limit=5, skip=0)
301 |             assert "showing 5 of" in result1
302 | 
303 |             # Second page
304 |             result2 = await search_drug_shortages(limit=5, skip=5)
305 |             assert "showing 5 of" in result2
306 | 
307 |     def test_no_mock_data_in_production(self):
308 |         """Verify that mock data is never returned in production code."""
309 |         import inspect
310 | 
311 |         import biomcp.openfda.drug_shortages as module
312 | 
313 |         # Get source code
314 |         source = inspect.getsource(module)
315 | 
316 |         # Check for patterns that would indicate mock data
317 |         dangerous_patterns = [
318 |             "return fake",
319 |             "return sample",
320 |             "return test_data",
321 |             "get_mock",
322 |             "get_fake",
323 |         ]
324 | 
325 |         for pattern in dangerous_patterns:
326 |             # Should not find these patterns (except in comments)
327 |             if pattern in source:
328 |                 # Check if it's in a comment
329 |                 lines = source.split("\n")
330 |                 for line in lines:
331 |                     if pattern in line and not line.strip().startswith("#"):
332 |                         # Found non-comment usage - this would be bad
333 |                         raise AssertionError(
334 |                             f"Found potential mock data pattern: {pattern}"
335 |                         )
336 | 
337 |         # Specifically check that errors return None (not mock data)
338 |         assert "return None  # Don't return mock data" in source
339 | 
```

--------------------------------------------------------------------------------
/docs/developer-guides/03-third-party-endpoints.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Third-Party Endpoints Used by BioMCP
  2 | 
  3 | _This file is auto-generated from the endpoint registry._
  4 | 
  5 | ## Overview
  6 | 
  7 | BioMCP connects to 14 external domains across 35 endpoints.
  8 | 
  9 | ## Endpoints by Category
 10 | 
 11 | ### Biomedical Literature
 12 | 
 13 | #### biorxiv_api
 14 | 
 15 | - **URL**: `https://api.biorxiv.org/details/biorxiv`
 16 | - **Description**: bioRxiv API for searching biology preprints
 17 | - **Data Types**: research_articles
 18 | - **Rate Limit**: Not specified
 19 | - **Compliance Notes**: Public preprint server, no PII transmitted
 20 | 
 21 | #### europe_pmc
 22 | 
 23 | - **URL**: `https://www.ebi.ac.uk/europepmc/webservices/rest/search`
 24 | - **Description**: Europe PMC REST API for searching biomedical literature
 25 | - **Data Types**: research_articles
 26 | - **Rate Limit**: Not specified
 27 | - **Compliance Notes**: Public EMBL-EBI service, no PII transmitted
 28 | 
 29 | #### medrxiv_api
 30 | 
 31 | - **URL**: `https://api.biorxiv.org/details/medrxiv`
 32 | - **Description**: medRxiv API for searching medical preprints
 33 | - **Data Types**: research_articles
 34 | - **Rate Limit**: Not specified
 35 | - **Compliance Notes**: Public preprint server, no PII transmitted
 36 | 
 37 | #### pubtator3_autocomplete
 38 | 
 39 | - **URL**: `https://www.ncbi.nlm.nih.gov/research/pubtator3-api/entity/autocomplete/`
 40 | - **Description**: PubTator3 API for entity name autocomplete suggestions
 41 | - **Data Types**: gene_annotations
 42 | - **Rate Limit**: 20 requests/second
 43 | - **Compliance Notes**: Public NIH/NCBI service, no PII transmitted
 44 | 
 45 | #### pubtator3_export
 46 | 
 47 | - **URL**: `https://www.ncbi.nlm.nih.gov/research/pubtator3-api/publications/export/biocjson`
 48 | - **Description**: PubTator3 API for fetching full article annotations in BioC-JSON format
 49 | - **Data Types**: research_articles
 50 | - **Rate Limit**: 20 requests/second
 51 | - **Compliance Notes**: Public NIH/NCBI service, no PII transmitted
 52 | 
 53 | #### pubtator3_search
 54 | 
 55 | - **URL**: `https://www.ncbi.nlm.nih.gov/research/pubtator3-api/search/`
 56 | - **Description**: PubTator3 API for searching biomedical literature with entity annotations
 57 | - **Data Types**: research_articles
 58 | - **Rate Limit**: 20 requests/second
 59 | - **Compliance Notes**: Public NIH/NCBI service, no PII transmitted
 60 | 
 61 | ### Clinical Trials
 62 | 
 63 | #### clinicaltrials_search
 64 | 
 65 | - **URL**: `https://clinicaltrials.gov/api/v2/studies`
 66 | - **Description**: ClinicalTrials.gov API v2 for searching clinical trials
 67 | - **Data Types**: clinical_trial_data
 68 | - **Rate Limit**: 10 requests/second
 69 | - **Compliance Notes**: Public NIH service, may contain trial participant criteria
 70 | 
 71 | #### nci_biomarkers
 72 | 
 73 | - **URL**: `https://clinicaltrialsapi.cancer.gov/api/v2/biomarkers`
 74 | - **Description**: NCI API for biomarkers used in clinical trials
 75 | - **Data Types**: clinical_trial_data
 76 | - **Rate Limit**: Not specified
 77 | - **Authentication**: Optional NCI_API_KEY for increased access
 78 | - **Compliance Notes**: Public NCI service, biomarker metadata
 79 | 
 80 | #### nci_diseases
 81 | 
 82 | - **URL**: `https://clinicaltrialsapi.cancer.gov/api/v2/diseases`
 83 | - **Description**: NCI API for cancer disease vocabulary
 84 | - **Data Types**: clinical_trial_data
 85 | - **Rate Limit**: Not specified
 86 | - **Authentication**: Optional NCI_API_KEY for increased access
 87 | - **Compliance Notes**: Public NCI service, disease ontology
 88 | 
 89 | #### nci_interventions
 90 | 
 91 | - **URL**: `https://clinicaltrialsapi.cancer.gov/api/v2/interventions`
 92 | - **Description**: NCI API for cancer treatment interventions
 93 | - **Data Types**: clinical_trial_data
 94 | - **Rate Limit**: Not specified
 95 | - **Authentication**: Optional NCI_API_KEY for increased access
 96 | - **Compliance Notes**: Public NCI service, intervention metadata
 97 | 
 98 | #### nci_organizations
 99 | 
100 | - **URL**: `https://clinicaltrialsapi.cancer.gov/api/v2/organizations`
101 | - **Description**: NCI API for cancer research organizations
102 | - **Data Types**: clinical_trial_data
103 | - **Rate Limit**: Not specified
104 | - **Authentication**: Optional NCI_API_KEY for increased access
105 | - **Compliance Notes**: Public NCI service, organization metadata
106 | 
107 | #### nci_trials
108 | 
109 | - **URL**: `https://clinicaltrialsapi.cancer.gov/api/v2/trials`
110 | - **Description**: NCI Clinical Trials Search API for cancer trials
111 | - **Data Types**: clinical_trial_data
112 | - **Rate Limit**: Not specified
113 | - **Authentication**: Optional NCI_API_KEY for increased access
114 | - **Compliance Notes**: Public NCI service, cancer trial data
115 | 
116 | ### Variant Databases
117 | 
118 | #### ensembl_variation
119 | 
120 | - **URL**: `https://rest.ensembl.org/variation/human`
121 | - **Description**: Ensembl REST API for human genetic variation data
122 | - **Data Types**: genetic_variants
123 | - **Rate Limit**: 15 requests/second
124 | - **Compliance Notes**: Public EMBL-EBI service, population genetics data
125 | 
126 | #### gdc_ssm_occurrences
127 | 
128 | - **URL**: `https://api.gdc.cancer.gov/ssm_occurrences`
129 | - **Description**: NCI GDC API for mutation occurrences in cancer samples
130 | - **Data Types**: cancer_mutations
131 | - **Rate Limit**: Not specified
132 | - **Compliance Notes**: Public NCI service, aggregate cancer genomics data
133 | 
134 | #### gdc_ssms
135 | 
136 | - **URL**: `https://api.gdc.cancer.gov/ssms`
137 | - **Description**: NCI GDC API for somatic mutations
138 | - **Data Types**: cancer_mutations
139 | - **Rate Limit**: Not specified
140 | - **Compliance Notes**: Public NCI service, aggregate cancer genomics data
141 | 
142 | #### mychem_chem
143 | 
144 | - **URL**: `https://mychem.info/v1/chem`
145 | - **Description**: MyChem.info API for fetching specific drug/chemical details
146 | - **Data Types**: gene_annotations
147 | - **Rate Limit**: 10 requests/second
148 | - **Compliance Notes**: Public BioThings service, drug/chemical annotation data
149 | 
150 | #### mychem_query
151 | 
152 | - **URL**: `https://mychem.info/v1/query`
153 | - **Description**: MyChem.info API for querying drug/chemical information
154 | - **Data Types**: gene_annotations
155 | - **Rate Limit**: 10 requests/second
156 | - **Compliance Notes**: Public BioThings service, drug/chemical annotation data
157 | 
158 | #### mydisease_disease
159 | 
160 | - **URL**: `https://mydisease.info/v1/disease`
161 | - **Description**: MyDisease.info API for fetching specific disease details
162 | - **Data Types**: gene_annotations
163 | - **Rate Limit**: 10 requests/second
164 | - **Compliance Notes**: Public BioThings service, disease ontology data
165 | 
166 | #### mydisease_query
167 | 
168 | - **URL**: `https://mydisease.info/v1/query`
169 | - **Description**: MyDisease.info API for querying disease information
170 | - **Data Types**: gene_annotations
171 | - **Rate Limit**: 10 requests/second
172 | - **Compliance Notes**: Public BioThings service, disease ontology data
173 | 
174 | #### mygene_gene
175 | 
176 | - **URL**: `https://mygene.info/v3/gene`
177 | - **Description**: MyGene.info API for fetching specific gene details
178 | - **Data Types**: gene_annotations
179 | - **Rate Limit**: 10 requests/second
180 | - **Compliance Notes**: Public BioThings service, gene annotation data
181 | 
182 | #### mygene_query
183 | 
184 | - **URL**: `https://mygene.info/v3/query`
185 | - **Description**: MyGene.info API for querying gene information
186 | - **Data Types**: gene_annotations
187 | - **Rate Limit**: 10 requests/second
188 | - **Compliance Notes**: Public BioThings service, gene annotation data
189 | 
190 | #### myvariant_query
191 | 
192 | - **URL**: `https://myvariant.info/v1/query`
193 | - **Description**: MyVariant.info API for querying genetic variants
194 | - **Data Types**: genetic_variants
195 | - **Rate Limit**: 1000 requests/hour (anonymous)
196 | - **Compliance Notes**: Public service aggregating variant databases, no patient data
197 | 
198 | #### myvariant_variant
199 | 
200 | - **URL**: `https://myvariant.info/v1/variant`
201 | - **Description**: MyVariant.info API for fetching specific variant details
202 | - **Data Types**: genetic_variants
203 | - **Rate Limit**: 1000 requests/hour (anonymous)
204 | - **Compliance Notes**: Public service aggregating variant databases, no patient data
205 | 
206 | ### Cancer Genomics
207 | 
208 | #### cbioportal_api
209 | 
210 | - **URL**: `https://www.cbioportal.org/api`
211 | - **Description**: cBioPortal API for cancer genomics data
212 | - **Data Types**: cancer_mutations, clinical_trial_data
213 | - **Rate Limit**: 5 requests/second
214 | - **Authentication**: Optional API token for increased rate limits
215 | - **Compliance Notes**: Public MSKCC/Dana-Farber service, aggregate cancer genomics
216 | 
217 | #### cbioportal_cancer_types
218 | 
219 | - **URL**: `https://www.cbioportal.org/api/cancer-types`
220 | - **Description**: cBioPortal API for cancer type hierarchy
221 | - **Data Types**: cancer_mutations
222 | - **Rate Limit**: 5 requests/second
223 | - **Compliance Notes**: Public MSKCC/Dana-Farber service, cancer type metadata
224 | 
225 | #### cbioportal_genes
226 | 
227 | - **URL**: `https://www.cbioportal.org/api/genes`
228 | - **Description**: cBioPortal API for gene information
229 | - **Data Types**: gene_annotations
230 | - **Rate Limit**: 5 requests/second
231 | - **Compliance Notes**: Public MSKCC/Dana-Farber service, gene metadata
232 | 
233 | #### cbioportal_molecular_profiles
234 | 
235 | - **URL**: `https://www.cbioportal.org/api/molecular-profiles`
236 | - **Description**: cBioPortal API for molecular profiles
237 | - **Data Types**: cancer_mutations
238 | - **Rate Limit**: 5 requests/second
239 | - **Compliance Notes**: Public MSKCC/Dana-Farber service, study metadata
240 | 
241 | #### cbioportal_mutations
242 | 
243 | - **URL**: `https://www.cbioportal.org/api/mutations`
244 | - **Description**: cBioPortal API for mutation data
245 | - **Data Types**: cancer_mutations
246 | - **Rate Limit**: 5 requests/second
247 | - **Compliance Notes**: Public MSKCC/Dana-Farber service, aggregate mutation data
248 | 
249 | #### cbioportal_studies
250 | 
251 | - **URL**: `https://www.cbioportal.org/api/studies`
252 | - **Description**: cBioPortal API for cancer studies
253 | - **Data Types**: clinical_trial_data, cancer_mutations
254 | - **Rate Limit**: 5 requests/second
255 | - **Compliance Notes**: Public MSKCC/Dana-Farber service, study metadata
256 | 
257 | ### Regulatory Data
258 | 
259 | #### fda_drug_shortages
260 | 
261 | - **URL**: `https://www.fda.gov/media/169066/download`
262 | - **Description**: FDA Drug Shortages database (cached locally)
263 | - **Data Types**: drug_labels
264 | - **Rate Limit**: Cached with 24-hour TTL
265 | - **Authentication**: None required
266 | - **Compliance Notes**: Public FDA service, drug shortage status information
267 | 
268 | #### openfda_device_events
269 | 
270 | - **URL**: `https://api.fda.gov/device/event.json`
271 | - **Description**: FDA MAUDE database for medical device adverse events
272 | - **Data Types**: device_events
273 | - **Rate Limit**: 40 requests/minute (240 with API key)
274 | - **Authentication**: Optional OPENFDA_API_KEY for increased rate limits
275 | - **Compliance Notes**: Public FDA service, device malfunction and adverse event reports
276 | 
277 | #### openfda_drug_enforcement
278 | 
279 | - **URL**: `https://api.fda.gov/drug/enforcement.json`
280 | - **Description**: FDA Enforcement database for drug recall information
281 | - **Data Types**: adverse_events
282 | - **Rate Limit**: 40 requests/minute (240 with API key)
283 | - **Authentication**: Optional OPENFDA_API_KEY for increased rate limits
284 | - **Compliance Notes**: Public FDA service, drug recall and enforcement actions
285 | 
286 | #### openfda_drug_events
287 | 
288 | - **URL**: `https://api.fda.gov/drug/event.json`
289 | - **Description**: FDA Adverse Event Reporting System (FAERS) for drug safety data
290 | - **Data Types**: adverse_events
291 | - **Rate Limit**: 40 requests/minute (240 with API key)
292 | - **Authentication**: Optional OPENFDA_API_KEY for increased rate limits
293 | - **Compliance Notes**: Public FDA service, voluntary adverse event reports, no PII
294 | 
295 | #### openfda_drug_labels
296 | 
297 | - **URL**: `https://api.fda.gov/drug/label.json`
298 | - **Description**: FDA Structured Product Labeling (SPL) for drug prescribing information
299 | - **Data Types**: drug_labels
300 | - **Rate Limit**: 40 requests/minute (240 with API key)
301 | - **Authentication**: Optional OPENFDA_API_KEY for increased rate limits
302 | - **Compliance Notes**: Public FDA service, official drug labeling data
303 | 
304 | #### openfda_drugsfda
305 | 
306 | - **URL**: `https://api.fda.gov/drug/drugsfda.json`
307 | - **Description**: FDA Drugs@FDA database for drug approval information
308 | - **Data Types**: drug_labels
309 | - **Rate Limit**: 40 requests/minute (240 with API key)
310 | - **Authentication**: Optional OPENFDA_API_KEY for increased rate limits
311 | - **Compliance Notes**: Public FDA service, official drug approval records
312 | 
313 | ## Domain Summary
314 | 
315 | | Domain                       | Category              | Endpoints |
316 | | ---------------------------- | --------------------- | --------- |
317 | | api.biorxiv.org              | biomedical_literature | 2         |
318 | | api.fda.gov                  | regulatory_data       | 5         |
319 | | api.gdc.cancer.gov           | variant_databases     | 2         |
320 | | clinicaltrials.gov           | clinical_trials       | 1         |
321 | | clinicaltrialsapi.cancer.gov | clinical_trials       | 5         |
322 | | mychem.info                  | variant_databases     | 2         |
323 | | mydisease.info               | variant_databases     | 2         |
324 | | mygene.info                  | variant_databases     | 2         |
325 | | myvariant.info               | variant_databases     | 2         |
326 | | rest.ensembl.org             | variant_databases     | 1         |
327 | | www.cbioportal.org           | cancer_genomics       | 6         |
328 | | www.ebi.ac.uk                | biomedical_literature | 1         |
329 | | www.fda.gov                  | regulatory_data       | 1         |
330 | | www.ncbi.nlm.nih.gov         | biomedical_literature | 3         |
331 | 
332 | ## Compliance and Privacy
333 | 
334 | All endpoints accessed by BioMCP:
335 | 
336 | - Use publicly available APIs
337 | - Do not transmit personally identifiable information (PII)
338 | - Access only aggregate or de-identified data
339 | - Comply with respective terms of service
340 | 
341 | ## Network Control
342 | 
343 | For air-gapped or restricted environments, BioMCP supports:
344 | 
345 | - Offline mode via `BIOMCP_OFFLINE=true` environment variable
346 | - Custom proxy configuration via standard HTTP(S)\_PROXY variables
347 | - SSL certificate pinning for enhanced security
348 | 
```

--------------------------------------------------------------------------------
/THIRD_PARTY_ENDPOINTS.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Third-Party Endpoints Used by BioMCP
  2 | 
  3 | _This file is auto-generated from the endpoint registry._
  4 | 
  5 | ## Overview
  6 | 
  7 | BioMCP connects to 14 external domains across 35 endpoints.
  8 | 
  9 | ## Endpoints by Category
 10 | 
 11 | ### Biomedical Literature
 12 | 
 13 | #### biorxiv_api
 14 | 
 15 | - **URL**: `https://api.biorxiv.org/details/biorxiv`
 16 | - **Description**: bioRxiv API for searching biology preprints
 17 | - **Data Types**: research_articles
 18 | - **Rate Limit**: Not specified
 19 | - **Compliance Notes**: Public preprint server, no PII transmitted
 20 | 
 21 | #### europe_pmc
 22 | 
 23 | - **URL**: `https://www.ebi.ac.uk/europepmc/webservices/rest/search`
 24 | - **Description**: Europe PMC REST API for searching biomedical literature
 25 | - **Data Types**: research_articles
 26 | - **Rate Limit**: Not specified
 27 | - **Compliance Notes**: Public EMBL-EBI service, no PII transmitted
 28 | 
 29 | #### medrxiv_api
 30 | 
 31 | - **URL**: `https://api.biorxiv.org/details/medrxiv`
 32 | - **Description**: medRxiv API for searching medical preprints
 33 | - **Data Types**: research_articles
 34 | - **Rate Limit**: Not specified
 35 | - **Compliance Notes**: Public preprint server, no PII transmitted
 36 | 
 37 | #### pubtator3_autocomplete
 38 | 
 39 | - **URL**: `https://www.ncbi.nlm.nih.gov/research/pubtator3-api/entity/autocomplete/`
 40 | - **Description**: PubTator3 API for entity name autocomplete suggestions
 41 | - **Data Types**: gene_annotations
 42 | - **Rate Limit**: 20 requests/second
 43 | - **Compliance Notes**: Public NIH/NCBI service, no PII transmitted
 44 | 
 45 | #### pubtator3_export
 46 | 
 47 | - **URL**: `https://www.ncbi.nlm.nih.gov/research/pubtator3-api/publications/export/biocjson`
 48 | - **Description**: PubTator3 API for fetching full article annotations in BioC-JSON format
 49 | - **Data Types**: research_articles
 50 | - **Rate Limit**: 20 requests/second
 51 | - **Compliance Notes**: Public NIH/NCBI service, no PII transmitted
 52 | 
 53 | #### pubtator3_search
 54 | 
 55 | - **URL**: `https://www.ncbi.nlm.nih.gov/research/pubtator3-api/search/`
 56 | - **Description**: PubTator3 API for searching biomedical literature with entity annotations
 57 | - **Data Types**: research_articles
 58 | - **Rate Limit**: 20 requests/second
 59 | - **Compliance Notes**: Public NIH/NCBI service, no PII transmitted
 60 | 
 61 | ### Clinical Trials
 62 | 
 63 | #### clinicaltrials_search
 64 | 
 65 | - **URL**: `https://clinicaltrials.gov/api/v2/studies`
 66 | - **Description**: ClinicalTrials.gov API v2 for searching clinical trials
 67 | - **Data Types**: clinical_trial_data
 68 | - **Rate Limit**: 10 requests/second
 69 | - **Compliance Notes**: Public NIH service, may contain trial participant criteria
 70 | 
 71 | #### nci_biomarkers
 72 | 
 73 | - **URL**: `https://clinicaltrialsapi.cancer.gov/api/v2/biomarkers`
 74 | - **Description**: NCI API for biomarkers used in clinical trials
 75 | - **Data Types**: clinical_trial_data
 76 | - **Rate Limit**: Not specified
 77 | - **Authentication**: Optional NCI_API_KEY for increased access
 78 | - **Compliance Notes**: Public NCI service, biomarker metadata
 79 | 
 80 | #### nci_diseases
 81 | 
 82 | - **URL**: `https://clinicaltrialsapi.cancer.gov/api/v2/diseases`
 83 | - **Description**: NCI API for cancer disease vocabulary
 84 | - **Data Types**: clinical_trial_data
 85 | - **Rate Limit**: Not specified
 86 | - **Authentication**: Optional NCI_API_KEY for increased access
 87 | - **Compliance Notes**: Public NCI service, disease ontology
 88 | 
 89 | #### nci_interventions
 90 | 
 91 | - **URL**: `https://clinicaltrialsapi.cancer.gov/api/v2/interventions`
 92 | - **Description**: NCI API for cancer treatment interventions
 93 | - **Data Types**: clinical_trial_data
 94 | - **Rate Limit**: Not specified
 95 | - **Authentication**: Optional NCI_API_KEY for increased access
 96 | - **Compliance Notes**: Public NCI service, intervention metadata
 97 | 
 98 | #### nci_organizations
 99 | 
100 | - **URL**: `https://clinicaltrialsapi.cancer.gov/api/v2/organizations`
101 | - **Description**: NCI API for cancer research organizations
102 | - **Data Types**: clinical_trial_data
103 | - **Rate Limit**: Not specified
104 | - **Authentication**: Optional NCI_API_KEY for increased access
105 | - **Compliance Notes**: Public NCI service, organization metadata
106 | 
107 | #### nci_trials
108 | 
109 | - **URL**: `https://clinicaltrialsapi.cancer.gov/api/v2/trials`
110 | - **Description**: NCI Clinical Trials Search API for cancer trials
111 | - **Data Types**: clinical_trial_data
112 | - **Rate Limit**: Not specified
113 | - **Authentication**: Optional NCI_API_KEY for increased access
114 | - **Compliance Notes**: Public NCI service, cancer trial data
115 | 
116 | ### Variant Databases
117 | 
118 | #### ensembl_variation
119 | 
120 | - **URL**: `https://rest.ensembl.org/variation/human`
121 | - **Description**: Ensembl REST API for human genetic variation data
122 | - **Data Types**: genetic_variants
123 | - **Rate Limit**: 15 requests/second
124 | - **Compliance Notes**: Public EMBL-EBI service, population genetics data
125 | 
126 | #### gdc_ssm_occurrences
127 | 
128 | - **URL**: `https://api.gdc.cancer.gov/ssm_occurrences`
129 | - **Description**: NCI GDC API for mutation occurrences in cancer samples
130 | - **Data Types**: cancer_mutations
131 | - **Rate Limit**: Not specified
132 | - **Compliance Notes**: Public NCI service, aggregate cancer genomics data
133 | 
134 | #### gdc_ssms
135 | 
136 | - **URL**: `https://api.gdc.cancer.gov/ssms`
137 | - **Description**: NCI GDC API for somatic mutations
138 | - **Data Types**: cancer_mutations
139 | - **Rate Limit**: Not specified
140 | - **Compliance Notes**: Public NCI service, aggregate cancer genomics data
141 | 
142 | #### mychem_chem
143 | 
144 | - **URL**: `https://mychem.info/v1/chem`
145 | - **Description**: MyChem.info API for fetching specific drug/chemical details
146 | - **Data Types**: gene_annotations
147 | - **Rate Limit**: 10 requests/second
148 | - **Compliance Notes**: Public BioThings service, drug/chemical annotation data
149 | 
150 | #### mychem_query
151 | 
152 | - **URL**: `https://mychem.info/v1/query`
153 | - **Description**: MyChem.info API for querying drug/chemical information
154 | - **Data Types**: gene_annotations
155 | - **Rate Limit**: 10 requests/second
156 | - **Compliance Notes**: Public BioThings service, drug/chemical annotation data
157 | 
158 | #### mydisease_disease
159 | 
160 | - **URL**: `https://mydisease.info/v1/disease`
161 | - **Description**: MyDisease.info API for fetching specific disease details
162 | - **Data Types**: gene_annotations
163 | - **Rate Limit**: 10 requests/second
164 | - **Compliance Notes**: Public BioThings service, disease ontology data
165 | 
166 | #### mydisease_query
167 | 
168 | - **URL**: `https://mydisease.info/v1/query`
169 | - **Description**: MyDisease.info API for querying disease information
170 | - **Data Types**: gene_annotations
171 | - **Rate Limit**: 10 requests/second
172 | - **Compliance Notes**: Public BioThings service, disease ontology data
173 | 
174 | #### mygene_gene
175 | 
176 | - **URL**: `https://mygene.info/v3/gene`
177 | - **Description**: MyGene.info API for fetching specific gene details
178 | - **Data Types**: gene_annotations
179 | - **Rate Limit**: 10 requests/second
180 | - **Compliance Notes**: Public BioThings service, gene annotation data
181 | 
182 | #### mygene_query
183 | 
184 | - **URL**: `https://mygene.info/v3/query`
185 | - **Description**: MyGene.info API for querying gene information
186 | - **Data Types**: gene_annotations
187 | - **Rate Limit**: 10 requests/second
188 | - **Compliance Notes**: Public BioThings service, gene annotation data
189 | 
190 | #### myvariant_query
191 | 
192 | - **URL**: `https://myvariant.info/v1/query`
193 | - **Description**: MyVariant.info API for querying genetic variants
194 | - **Data Types**: genetic_variants
195 | - **Rate Limit**: 1000 requests/hour (anonymous)
196 | - **Compliance Notes**: Public service aggregating variant databases, no patient data
197 | 
198 | #### myvariant_variant
199 | 
200 | - **URL**: `https://myvariant.info/v1/variant`
201 | - **Description**: MyVariant.info API for fetching specific variant details
202 | - **Data Types**: genetic_variants
203 | - **Rate Limit**: 1000 requests/hour (anonymous)
204 | - **Compliance Notes**: Public service aggregating variant databases, no patient data
205 | 
206 | ### Cancer Genomics
207 | 
208 | #### cbioportal_api
209 | 
210 | - **URL**: `https://www.cbioportal.org/api`
211 | - **Description**: cBioPortal API for cancer genomics data
212 | - **Data Types**: cancer_mutations, clinical_trial_data
213 | - **Rate Limit**: 5 requests/second
214 | - **Authentication**: Optional API token for increased rate limits
215 | - **Compliance Notes**: Public MSKCC/Dana-Farber service, aggregate cancer genomics
216 | 
217 | #### cbioportal_cancer_types
218 | 
219 | - **URL**: `https://www.cbioportal.org/api/cancer-types`
220 | - **Description**: cBioPortal API for cancer type hierarchy
221 | - **Data Types**: cancer_mutations
222 | - **Rate Limit**: 5 requests/second
223 | - **Compliance Notes**: Public MSKCC/Dana-Farber service, cancer type metadata
224 | 
225 | #### cbioportal_genes
226 | 
227 | - **URL**: `https://www.cbioportal.org/api/genes`
228 | - **Description**: cBioPortal API for gene information
229 | - **Data Types**: gene_annotations
230 | - **Rate Limit**: 5 requests/second
231 | - **Compliance Notes**: Public MSKCC/Dana-Farber service, gene metadata
232 | 
233 | #### cbioportal_molecular_profiles
234 | 
235 | - **URL**: `https://www.cbioportal.org/api/molecular-profiles`
236 | - **Description**: cBioPortal API for molecular profiles
237 | - **Data Types**: cancer_mutations
238 | - **Rate Limit**: 5 requests/second
239 | - **Compliance Notes**: Public MSKCC/Dana-Farber service, study metadata
240 | 
241 | #### cbioportal_mutations
242 | 
243 | - **URL**: `https://www.cbioportal.org/api/mutations`
244 | - **Description**: cBioPortal API for mutation data
245 | - **Data Types**: cancer_mutations
246 | - **Rate Limit**: 5 requests/second
247 | - **Compliance Notes**: Public MSKCC/Dana-Farber service, aggregate mutation data
248 | 
249 | #### cbioportal_studies
250 | 
251 | - **URL**: `https://www.cbioportal.org/api/studies`
252 | - **Description**: cBioPortal API for cancer studies
253 | - **Data Types**: clinical_trial_data, cancer_mutations
254 | - **Rate Limit**: 5 requests/second
255 | - **Compliance Notes**: Public MSKCC/Dana-Farber service, study metadata
256 | 
257 | ### Regulatory Data
258 | 
259 | #### fda_drug_shortages
260 | 
261 | - **URL**: `https://www.fda.gov/media/169066/download`
262 | - **Description**: FDA Drug Shortages database (cached locally)
263 | - **Data Types**: drug_labels
264 | - **Rate Limit**: Cached with 24-hour TTL
265 | - **Authentication**: None required
266 | - **Compliance Notes**: Public FDA service, drug shortage status information
267 | 
268 | #### openfda_device_events
269 | 
270 | - **URL**: `https://api.fda.gov/device/event.json`
271 | - **Description**: FDA MAUDE database for medical device adverse events
272 | - **Data Types**: device_events
273 | - **Rate Limit**: 40 requests/minute (240 with API key)
274 | - **Authentication**: Optional OPENFDA_API_KEY for increased rate limits
275 | - **Compliance Notes**: Public FDA service, device malfunction and adverse event reports
276 | 
277 | #### openfda_drug_enforcement
278 | 
279 | - **URL**: `https://api.fda.gov/drug/enforcement.json`
280 | - **Description**: FDA Enforcement database for drug recall information
281 | - **Data Types**: adverse_events
282 | - **Rate Limit**: 40 requests/minute (240 with API key)
283 | - **Authentication**: Optional OPENFDA_API_KEY for increased rate limits
284 | - **Compliance Notes**: Public FDA service, drug recall and enforcement actions
285 | 
286 | #### openfda_drug_events
287 | 
288 | - **URL**: `https://api.fda.gov/drug/event.json`
289 | - **Description**: FDA Adverse Event Reporting System (FAERS) for drug safety data
290 | - **Data Types**: adverse_events
291 | - **Rate Limit**: 40 requests/minute (240 with API key)
292 | - **Authentication**: Optional OPENFDA_API_KEY for increased rate limits
293 | - **Compliance Notes**: Public FDA service, voluntary adverse event reports, no PII
294 | 
295 | #### openfda_drug_labels
296 | 
297 | - **URL**: `https://api.fda.gov/drug/label.json`
298 | - **Description**: FDA Structured Product Labeling (SPL) for drug prescribing information
299 | - **Data Types**: drug_labels
300 | - **Rate Limit**: 40 requests/minute (240 with API key)
301 | - **Authentication**: Optional OPENFDA_API_KEY for increased rate limits
302 | - **Compliance Notes**: Public FDA service, official drug labeling data
303 | 
304 | #### openfda_drugsfda
305 | 
306 | - **URL**: `https://api.fda.gov/drug/drugsfda.json`
307 | - **Description**: FDA Drugs@FDA database for drug approval information
308 | - **Data Types**: drug_labels
309 | - **Rate Limit**: 40 requests/minute (240 with API key)
310 | - **Authentication**: Optional OPENFDA_API_KEY for increased rate limits
311 | - **Compliance Notes**: Public FDA service, official drug approval records
312 | 
313 | ## Domain Summary
314 | 
315 | | Domain                       | Category              | Endpoints |
316 | | ---------------------------- | --------------------- | --------- |
317 | | api.biorxiv.org              | biomedical_literature | 2         |
318 | | api.fda.gov                  | regulatory_data       | 5         |
319 | | api.gdc.cancer.gov           | variant_databases     | 2         |
320 | | clinicaltrials.gov           | clinical_trials       | 1         |
321 | | clinicaltrialsapi.cancer.gov | clinical_trials       | 5         |
322 | | mychem.info                  | variant_databases     | 2         |
323 | | mydisease.info               | variant_databases     | 2         |
324 | | mygene.info                  | variant_databases     | 2         |
325 | | myvariant.info               | variant_databases     | 2         |
326 | | rest.ensembl.org             | variant_databases     | 1         |
327 | | www.cbioportal.org           | cancer_genomics       | 6         |
328 | | www.ebi.ac.uk                | biomedical_literature | 1         |
329 | | www.fda.gov                  | regulatory_data       | 1         |
330 | | www.ncbi.nlm.nih.gov         | biomedical_literature | 3         |
331 | 
332 | ## Compliance and Privacy
333 | 
334 | All endpoints accessed by BioMCP:
335 | 
336 | - Use publicly available APIs
337 | - Do not transmit personally identifiable information (PII)
338 | - Access only aggregate or de-identified data
339 | - Comply with respective terms of service
340 | 
341 | ## Network Control
342 | 
343 | For air-gapped or restricted environments, BioMCP supports:
344 | 
345 | - Offline mode via `BIOMCP_OFFLINE=true` environment variable
346 | - Custom proxy configuration via standard HTTP(S)\_PROXY variables
347 | - SSL certificate pinning for enhanced security
348 | 
```

--------------------------------------------------------------------------------
/src/biomcp/openfda/drug_shortages.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | FDA drug shortages integration with caching.
  3 | 
  4 | Note: FDA does not yet provide an OpenFDA endpoint for drug shortages.
  5 | This module fetches from the FDA Drug Shortages JSON feed and caches it locally.
  6 | """
  7 | 
  8 | import json
  9 | import logging
 10 | import os
 11 | import tempfile
 12 | from datetime import datetime, timedelta
 13 | from pathlib import Path
 14 | from typing import Any
 15 | 
 16 | # Platform-specific file locking
 17 | try:
 18 |     import fcntl
 19 | 
 20 |     HAS_FCNTL = True
 21 | except ImportError:
 22 |     # Windows doesn't have fcntl
 23 |     HAS_FCNTL = False
 24 | 
 25 | from ..http_client import request_api
 26 | from .constants import OPENFDA_DEFAULT_LIMIT, OPENFDA_SHORTAGE_DISCLAIMER
 27 | from .drug_shortages_detail_helpers import (
 28 |     format_shortage_details_section,
 29 |     format_shortage_names,
 30 |     format_shortage_status,
 31 |     format_shortage_timeline,
 32 | )
 33 | from .drug_shortages_helpers import (
 34 |     filter_shortages,
 35 |     format_shortage_search_header,
 36 | )
 37 | from .utils import clean_text, format_count, truncate_text
 38 | 
 39 | logger = logging.getLogger(__name__)
 40 | 
 41 | # FDA Drug Shortages feed URL
 42 | FDA_SHORTAGES_URL = (
 43 |     "https://www.accessdata.fda.gov/scripts/drugshortages/default.cfm"
 44 | )
 45 | # Alternative: Direct JSON feed if available
 46 | FDA_SHORTAGES_JSON_URL = "https://www.fda.gov/media/169066/download"  # Example URL, update as needed
 47 | 
 48 | # Cache configuration
 49 | CACHE_DIR = Path(tempfile.gettempdir()) / "biomcp_cache"
 50 | CACHE_FILE = CACHE_DIR / "drug_shortages.json"
 51 | CACHE_TTL_HOURS = int(os.environ.get("BIOMCP_SHORTAGE_CACHE_TTL", "24"))
 52 | 
 53 | 
 54 | async def _fetch_shortage_data() -> dict[str, Any] | None:
 55 |     """
 56 |     Fetch drug shortage data from FDA.
 57 | 
 58 |     Returns:
 59 |         Dictionary with shortage data or None if fetch fails
 60 |     """
 61 |     try:
 62 |         # Try to fetch the JSON feed
 63 |         # Note: The actual URL may need to be updated based on FDA's current API
 64 |         response, error = await request_api(
 65 |             url=FDA_SHORTAGES_JSON_URL,
 66 |             request={},
 67 |             method="GET",
 68 |             domain="fda_drug_shortages",
 69 |         )
 70 | 
 71 |         if error:
 72 |             logger.error(f"API error: {error}")
 73 |             return None  # Don't return mock data in production
 74 | 
 75 |         if response and hasattr(response, "model_dump"):
 76 |             data = response.model_dump()
 77 |         elif isinstance(response, dict):
 78 |             data = response
 79 |         else:
 80 |             data = {}
 81 | 
 82 |         # Add fetch timestamp
 83 |         data["_fetched_at"] = datetime.now().isoformat()
 84 | 
 85 |         return data
 86 | 
 87 |     except Exception as e:
 88 |         logger.error(f"Failed to fetch shortage data: {e}")
 89 |         return None  # Don't return mock data in production
 90 | 
 91 | 
 92 | def _read_cache_file() -> dict[str, Any] | None:
 93 |     """Read and validate cache file if it exists and is recent."""
 94 |     if not CACHE_FILE.exists():
 95 |         return None
 96 | 
 97 |     try:
 98 |         with open(CACHE_FILE) as f:
 99 |             # Acquire shared lock for reading (Unix only)
100 |             if HAS_FCNTL:
101 |                 fcntl.flock(f.fileno(), fcntl.LOCK_SH)
102 |             try:
103 |                 data = json.load(f)
104 |             finally:
105 |                 # Release lock (Unix only)
106 |                 if HAS_FCNTL:
107 |                     fcntl.flock(f.fileno(), fcntl.LOCK_UN)
108 | 
109 |         # Check cache age
110 |         fetched_at = datetime.fromisoformat(data.get("_fetched_at", ""))
111 |         cache_age = datetime.now() - fetched_at
112 | 
113 |         if cache_age < timedelta(hours=CACHE_TTL_HOURS):
114 |             logger.debug(f"Using cached shortage data (age: {cache_age})")
115 |             return data
116 | 
117 |         logger.debug(f"Cache expired (age: {cache_age}), fetching new data")
118 |         return None
119 |     except (OSError, json.JSONDecodeError, ValueError) as e:
120 |         logger.warning(f"Failed to read cache: {e}")
121 |         return None
122 | 
123 | 
124 | def _write_cache_file(data: dict[str, Any]) -> None:
125 |     """Write data to cache file with atomic operation."""
126 |     temp_file = CACHE_FILE.with_suffix(".tmp")
127 |     try:
128 |         with open(temp_file, "w") as f:
129 |             # Acquire exclusive lock for writing (Unix only)
130 |             if HAS_FCNTL:
131 |                 fcntl.flock(f.fileno(), fcntl.LOCK_EX)
132 |             try:
133 |                 json.dump(data, f, indent=2)
134 |             finally:
135 |                 # Release lock (Unix only)
136 |                 if HAS_FCNTL:
137 |                     fcntl.flock(f.fileno(), fcntl.LOCK_UN)
138 | 
139 |         # Atomic rename
140 |         temp_file.replace(CACHE_FILE)
141 |         logger.debug(f"Saved shortage data to cache: {CACHE_FILE}")
142 |     except (OSError, json.JSONDecodeError) as e:
143 |         logger.warning(f"Failed to save cache: {e}")
144 |         # Clean up temp file if it exists
145 |         if temp_file.exists():
146 |             temp_file.unlink()
147 | 
148 | 
149 | async def _get_cached_shortage_data() -> dict[str, Any] | None:
150 |     """
151 |     Get shortage data from cache if valid, otherwise fetch new data.
152 | 
153 |     Returns:
154 |         Dictionary with shortage data or None if unavailable
155 |     """
156 |     # Ensure cache directory exists
157 |     CACHE_DIR.mkdir(parents=True, exist_ok=True)
158 | 
159 |     # Try to read from cache
160 |     cached_data = _read_cache_file()
161 |     if cached_data:
162 |         return cached_data
163 | 
164 |     # Fetch new data
165 |     data = await _fetch_shortage_data()
166 | 
167 |     # Save to cache if we got data
168 |     if data:
169 |         _write_cache_file(data)
170 | 
171 |     return data
172 | 
173 | 
174 | async def search_drug_shortages(
175 |     drug: str | None = None,
176 |     status: str | None = None,
177 |     therapeutic_category: str | None = None,
178 |     limit: int = OPENFDA_DEFAULT_LIMIT,
179 |     skip: int = 0,
180 |     api_key: str | None = None,
181 | ) -> str:
182 |     """
183 |     Search FDA drug shortage records.
184 | 
185 |     Args:
186 |         drug: Drug name (generic or brand) to search for
187 |         status: Shortage status (current, resolved, discontinued)
188 |         therapeutic_category: Therapeutic category to filter by
189 |         limit: Maximum number of results to return
190 |         skip: Number of results to skip (for pagination)
191 |         api_key: Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)
192 | 
193 |     Returns:
194 |         Formatted string with drug shortage information
195 |     """
196 |     # Get shortage data (from cache or fresh)
197 |     data = await _get_cached_shortage_data()
198 | 
199 |     if not data:
200 |         return (
201 |             "⚠️ **Drug Shortage Data Temporarily Unavailable**\n\n"
202 |             "The FDA drug shortage database cannot be accessed at this time. "
203 |             "This feature requires FDA to provide a machine-readable API endpoint.\n\n"
204 |             "**Alternative Options:**\n"
205 |             "• Visit FDA Drug Shortages Database: https://www.accessdata.fda.gov/scripts/drugshortages/\n"
206 |             "• Check ASHP Drug Shortages: https://www.ashp.org/drug-shortages/current-shortages\n\n"
207 |             "Note: FDA currently provides shortage data only as PDF/HTML, not as a queryable API."
208 |         )
209 | 
210 |     shortages = data.get("shortages", [])
211 | 
212 |     # Filter results based on criteria
213 |     filtered = filter_shortages(shortages, drug, status, therapeutic_category)
214 | 
215 |     # Apply pagination
216 |     total = len(filtered)
217 |     filtered = filtered[skip : skip + limit]
218 | 
219 |     if not filtered:
220 |         return "No drug shortages found matching your criteria."
221 | 
222 |     # Format the results
223 |     output = ["## FDA Drug Shortage Information\n"]
224 | 
225 |     # Add header information
226 |     last_updated = data.get("last_updated") or data.get("_fetched_at")
227 |     output.extend(
228 |         format_shortage_search_header(
229 |             drug, status, therapeutic_category, last_updated
230 |         )
231 |     )
232 | 
233 |     output.append(
234 |         f"**Total Shortages Found**: {format_count(total, 'shortage')}\n"
235 |     )
236 | 
237 |     # Summary by status
238 |     if len(filtered) > 1:
239 |         output.extend(_format_shortage_summary(filtered))
240 | 
241 |     # Show results
242 |     output.append(f"### Shortages (showing {len(filtered)} of {total}):\n")
243 | 
244 |     for i, shortage in enumerate(filtered, 1):
245 |         output.extend(_format_shortage_entry(shortage, i))
246 | 
247 |     output.append(f"\n---\n{OPENFDA_SHORTAGE_DISCLAIMER}")
248 | 
249 |     return "\n".join(output)
250 | 
251 | 
252 | async def get_drug_shortage(
253 |     drug: str,
254 |     api_key: str | None = None,
255 | ) -> str:
256 |     """
257 |     Get detailed shortage information for a specific drug.
258 | 
259 |     Args:
260 |         drug: Generic or brand name of the drug
261 |         api_key: Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)
262 | 
263 |     Returns:
264 |         Formatted string with detailed shortage information
265 |     """
266 |     # Get shortage data
267 |     data = await _get_cached_shortage_data()
268 | 
269 |     if not data:
270 |         return (
271 |             "⚠️ **Drug Shortage Data Temporarily Unavailable**\n\n"
272 |             "The FDA drug shortage database cannot be accessed at this time. "
273 |             "This feature requires FDA to provide a machine-readable API endpoint.\n\n"
274 |             "**Alternative Options:**\n"
275 |             "• Visit FDA Drug Shortages Database: https://www.accessdata.fda.gov/scripts/drugshortages/\n"
276 |             "• Check ASHP Drug Shortages: https://www.ashp.org/drug-shortages/current-shortages\n\n"
277 |             "Note: FDA currently provides shortage data only as PDF/HTML, not as a queryable API."
278 |         )
279 | 
280 |     shortages = data.get("shortages", [])
281 | 
282 |     # Find the specific drug
283 |     drug_lower = drug.lower()
284 |     matched = None
285 | 
286 |     for shortage in shortages:
287 |         generic = shortage.get("generic_name", "").lower()
288 |         brands = [b.lower() for b in shortage.get("brand_names", [])]
289 | 
290 |         if drug_lower in generic or any(drug_lower in b for b in brands):
291 |             matched = shortage
292 |             break
293 | 
294 |     if not matched:
295 |         return f"No shortage information found for {drug}"
296 | 
297 |     # Format detailed information
298 |     output = [
299 |         f"## Drug Shortage Details: {matched.get('generic_name', drug)}\n"
300 |     ]
301 | 
302 |     # Last updated
303 |     last_updated = data.get("last_updated") or data.get("_fetched_at")
304 |     if last_updated:
305 |         try:
306 |             updated_dt = datetime.fromisoformat(last_updated)
307 |             output.append(
308 |                 f"*Data Updated: {updated_dt.strftime('%Y-%m-%d %H:%M')}*\n"
309 |             )
310 |         except (ValueError, TypeError):
311 |             pass
312 | 
313 |     output.extend(_format_shortage_detail(matched))
314 | 
315 |     output.append(f"\n---\n{OPENFDA_SHORTAGE_DISCLAIMER}")
316 | 
317 |     return "\n".join(output)
318 | 
319 | 
320 | def _format_shortage_summary(shortages: list[dict[str, Any]]) -> list[str]:
321 |     """Format summary of shortage statuses."""
322 |     output = []
323 | 
324 |     # Count by status
325 |     current_count = sum(
326 |         1 for s in shortages if "current" in s.get("status", "").lower()
327 |     )
328 |     resolved_count = sum(
329 |         1 for s in shortages if "resolved" in s.get("status", "").lower()
330 |     )
331 | 
332 |     if current_count or resolved_count:
333 |         output.append("### Status Summary:")
334 |         if current_count:
335 |             output.append(f"- **Current Shortages**: {current_count}")
336 |         if resolved_count:
337 |             output.append(f"- **Resolved**: {resolved_count}")
338 |         output.append("")
339 | 
340 |     return output
341 | 
342 | 
343 | def _format_shortage_entry(shortage: dict[str, Any], num: int) -> list[str]:
344 |     """Format a single shortage entry."""
345 |     output = []
346 | 
347 |     generic = shortage.get("generic_name", "Unknown Drug")
348 |     status = shortage.get("status", "Unknown")
349 | 
350 |     # Status indicator
351 |     status_emoji = "🔴" if "current" in status.lower() else "🟢"
352 | 
353 |     output.append(f"#### {num}. {generic}")
354 |     output.append(f"{status_emoji} **Status**: {status}")
355 | 
356 |     # Brand names
357 |     brands = shortage.get("brand_names")
358 |     if brands and brands[0]:  # Check for non-empty brands
359 |         output.append(f"**Brand Names**: {', '.join(brands)}")
360 | 
361 |     # Dates
362 |     if start_date := shortage.get("shortage_start_date"):
363 |         output.append(f"**Shortage Started**: {start_date}")
364 | 
365 |     if resolution_date := shortage.get("resolution_date"):
366 |         output.append(f"**Resolved**: {resolution_date}")
367 |     elif estimated := shortage.get("estimated_resolution"):
368 |         output.append(f"**Estimated Resolution**: {estimated}")
369 | 
370 |     # Reason
371 |     if reason := shortage.get("reason"):
372 |         output.append(f"**Reason**: {reason}")
373 | 
374 |     # Therapeutic category
375 |     if category := shortage.get("therapeutic_category"):
376 |         output.append(f"**Therapeutic Category**: {category}")
377 | 
378 |     # Notes
379 |     if notes := shortage.get("notes"):
380 |         cleaned_notes = truncate_text(clean_text(notes), 200)
381 |         output.append(f"\n**Notes**: {cleaned_notes}")
382 | 
383 |     output.append("")
384 |     return output
385 | 
386 | 
387 | def _format_shortage_detail(shortage: dict[str, Any]) -> list[str]:
388 |     """Format detailed shortage information."""
389 |     output = ["### Shortage Information"]
390 | 
391 |     # Status
392 |     output.extend(format_shortage_status(shortage))
393 | 
394 |     # Names
395 |     output.extend(format_shortage_names(shortage))
396 | 
397 |     # Manufacturers
398 |     if manufacturers := shortage.get("manufacturers"):
399 |         output.append(f"**Manufacturers**: {', '.join(manufacturers)}")
400 | 
401 |     # Therapeutic category
402 |     if category := shortage.get("therapeutic_category"):
403 |         output.append(f"**Therapeutic Category**: {category}")
404 | 
405 |     # Timeline
406 |     output.append("")
407 |     output.extend(format_shortage_timeline(shortage))
408 | 
409 |     # Details
410 |     output.append("")
411 |     output.extend(format_shortage_details_section(shortage))
412 | 
413 |     # Alternatives if available
414 |     if alternatives := shortage.get("alternatives"):
415 |         output.append("\n### Alternative Products")
416 |         if isinstance(alternatives, list):
417 |             output.append(", ".join(alternatives))
418 |         else:
419 |             output.append(str(alternatives))
420 | 
421 |     return output
422 | 
```