beehiveinnovations/gemini-mcp-server # codebase.md

This is page 1 of 19. Use http://codebase.md/beehiveinnovations/gemini-mcp-server?page={x} to view the full context.

# Directory Structure

```
├── .claude
│   ├── commands
│   │   └── fix-github-issue.md
│   └── settings.json
├── .coveragerc
├── .dockerignore
├── .env.example
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE
│   │   ├── bug_report.yml
│   │   ├── config.yml
│   │   ├── documentation.yml
│   │   ├── feature_request.yml
│   │   └── tool_addition.yml
│   ├── pull_request_template.md
│   └── workflows
│       ├── docker-pr.yml
│       ├── docker-release.yml
│       ├── semantic-pr.yml
│       ├── semantic-release.yml
│       └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── AGENTS.md
├── CHANGELOG.md
├── claude_config_example.json
├── CLAUDE.md
├── clink
│   ├── __init__.py
│   ├── agents
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── claude.py
│   │   ├── codex.py
│   │   └── gemini.py
│   ├── constants.py
│   ├── models.py
│   ├── parsers
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── claude.py
│   │   ├── codex.py
│   │   └── gemini.py
│   └── registry.py
├── code_quality_checks.ps1
├── code_quality_checks.sh
├── communication_simulator_test.py
├── conf
│   ├── __init__.py
│   ├── azure_models.json
│   ├── cli_clients
│   │   ├── claude.json
│   │   ├── codex.json
│   │   └── gemini.json
│   ├── custom_models.json
│   ├── dial_models.json
│   ├── gemini_models.json
│   ├── openai_models.json
│   ├── openrouter_models.json
│   └── xai_models.json
├── config.py
├── docker
│   ├── README.md
│   └── scripts
│       ├── build.ps1
│       ├── build.sh
│       ├── deploy.ps1
│       ├── deploy.sh
│       └── healthcheck.py
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── adding_providers.md
│   ├── adding_tools.md
│   ├── advanced-usage.md
│   ├── ai_banter.md
│   ├── ai-collaboration.md
│   ├── azure_openai.md
│   ├── configuration.md
│   ├── context-revival.md
│   ├── contributions.md
│   ├── custom_models.md
│   ├── docker-deployment.md
│   ├── gemini-setup.md
│   ├── getting-started.md
│   ├── index.md
│   ├── locale-configuration.md
│   ├── logging.md
│   ├── model_ranking.md
│   ├── testing.md
│   ├── tools
│   │   ├── analyze.md
│   │   ├── apilookup.md
│   │   ├── challenge.md
│   │   ├── chat.md
│   │   ├── clink.md
│   │   ├── codereview.md
│   │   ├── consensus.md
│   │   ├── debug.md
│   │   ├── docgen.md
│   │   ├── listmodels.md
│   │   ├── planner.md
│   │   ├── precommit.md
│   │   ├── refactor.md
│   │   ├── secaudit.md
│   │   ├── testgen.md
│   │   ├── thinkdeep.md
│   │   ├── tracer.md
│   │   └── version.md
│   ├── troubleshooting.md
│   ├── vcr-testing.md
│   └── wsl-setup.md
├── examples
│   ├── claude_config_macos.json
│   └── claude_config_wsl.json
├── LICENSE
├── providers
│   ├── __init__.py
│   ├── azure_openai.py
│   ├── base.py
│   ├── custom.py
│   ├── dial.py
│   ├── gemini.py
│   ├── openai_compatible.py
│   ├── openai.py
│   ├── openrouter.py
│   ├── registries
│   │   ├── __init__.py
│   │   ├── azure.py
│   │   ├── base.py
│   │   ├── custom.py
│   │   ├── dial.py
│   │   ├── gemini.py
│   │   ├── openai.py
│   │   ├── openrouter.py
│   │   └── xai.py
│   ├── registry_provider_mixin.py
│   ├── registry.py
│   ├── shared
│   │   ├── __init__.py
│   │   ├── model_capabilities.py
│   │   ├── model_response.py
│   │   ├── provider_type.py
│   │   └── temperature.py
│   └── xai.py
├── pyproject.toml
├── pytest.ini
├── README.md
├── requirements-dev.txt
├── requirements.txt
├── run_integration_tests.ps1
├── run_integration_tests.sh
├── run-server.ps1
├── run-server.sh
├── scripts
│   └── sync_version.py
├── server.py
├── simulator_tests
│   ├── __init__.py
│   ├── base_test.py
│   ├── conversation_base_test.py
│   ├── log_utils.py
│   ├── test_analyze_validation.py
│   ├── test_basic_conversation.py
│   ├── test_chat_simple_validation.py
│   ├── test_codereview_validation.py
│   ├── test_consensus_conversation.py
│   ├── test_consensus_three_models.py
│   ├── test_consensus_workflow_accurate.py
│   ├── test_content_validation.py
│   ├── test_conversation_chain_validation.py
│   ├── test_cross_tool_comprehensive.py
│   ├── test_cross_tool_continuation.py
│   ├── test_debug_certain_confidence.py
│   ├── test_debug_validation.py
│   ├── test_line_number_validation.py
│   ├── test_logs_validation.py
│   ├── test_model_thinking_config.py
│   ├── test_o3_model_selection.py
│   ├── test_o3_pro_expensive.py
│   ├── test_ollama_custom_url.py
│   ├── test_openrouter_fallback.py
│   ├── test_openrouter_models.py
│   ├── test_per_tool_deduplication.py
│   ├── test_planner_continuation_history.py
│   ├── test_planner_validation_old.py
│   ├── test_planner_validation.py
│   ├── test_precommitworkflow_validation.py
│   ├── test_prompt_size_limit_bug.py
│   ├── test_refactor_validation.py
│   ├── test_secaudit_validation.py
│   ├── test_testgen_validation.py
│   ├── test_thinkdeep_validation.py
│   ├── test_token_allocation_validation.py
│   ├── test_vision_capability.py
│   └── test_xai_models.py
├── systemprompts
│   ├── __init__.py
│   ├── analyze_prompt.py
│   ├── chat_prompt.py
│   ├── clink
│   │   ├── codex_codereviewer.txt
│   │   ├── default_codereviewer.txt
│   │   ├── default_planner.txt
│   │   └── default.txt
│   ├── codereview_prompt.py
│   ├── consensus_prompt.py
│   ├── debug_prompt.py
│   ├── docgen_prompt.py
│   ├── generate_code_prompt.py
│   ├── planner_prompt.py
│   ├── precommit_prompt.py
│   ├── refactor_prompt.py
│   ├── secaudit_prompt.py
│   ├── testgen_prompt.py
│   ├── thinkdeep_prompt.py
│   └── tracer_prompt.py
├── tests
│   ├── __init__.py
│   ├── CASSETTE_MAINTENANCE.md
│   ├── conftest.py
│   ├── gemini_cassettes
│   │   ├── chat_codegen
│   │   │   └── gemini25_pro_calculator
│   │   │       └── mldev.json
│   │   ├── chat_cross
│   │   │   └── step1_gemini25_flash_number
│   │   │       └── mldev.json
│   │   └── consensus
│   │       └── step2_gemini25_flash_against
│   │           └── mldev.json
│   ├── http_transport_recorder.py
│   ├── mock_helpers.py
│   ├── openai_cassettes
│   │   ├── chat_cross_step2_gpt5_reminder.json
│   │   ├── chat_gpt5_continuation.json
│   │   ├── chat_gpt5_moon_distance.json
│   │   ├── consensus_step1_gpt5_for.json
│   │   └── o3_pro_basic_math.json
│   ├── pii_sanitizer.py
│   ├── sanitize_cassettes.py
│   ├── test_alias_target_restrictions.py
│   ├── test_auto_mode_comprehensive.py
│   ├── test_auto_mode_custom_provider_only.py
│   ├── test_auto_mode_model_listing.py
│   ├── test_auto_mode_provider_selection.py
│   ├── test_auto_mode.py
│   ├── test_auto_model_planner_fix.py
│   ├── test_azure_openai_provider.py
│   ├── test_buggy_behavior_prevention.py
│   ├── test_cassette_semantic_matching.py
│   ├── test_challenge.py
│   ├── test_chat_codegen_integration.py
│   ├── test_chat_cross_model_continuation.py
│   ├── test_chat_openai_integration.py
│   ├── test_chat_simple.py
│   ├── test_clink_claude_agent.py
│   ├── test_clink_claude_parser.py
│   ├── test_clink_codex_agent.py
│   ├── test_clink_gemini_agent.py
│   ├── test_clink_gemini_parser.py
│   ├── test_clink_integration.py
│   ├── test_clink_parsers.py
│   ├── test_clink_tool.py
│   ├── test_collaboration.py
│   ├── test_config.py
│   ├── test_consensus_integration.py
│   ├── test_consensus_schema.py
│   ├── test_consensus.py
│   ├── test_conversation_continuation_integration.py
│   ├── test_conversation_field_mapping.py
│   ├── test_conversation_file_features.py
│   ├── test_conversation_memory.py
│   ├── test_conversation_missing_files.py
│   ├── test_custom_openai_temperature_fix.py
│   ├── test_custom_provider.py
│   ├── test_debug.py
│   ├── test_deploy_scripts.py
│   ├── test_dial_provider.py
│   ├── test_directory_expansion_tracking.py
│   ├── test_disabled_tools.py
│   ├── test_docker_claude_desktop_integration.py
│   ├── test_docker_config_complete.py
│   ├── test_docker_healthcheck.py
│   ├── test_docker_implementation.py
│   ├── test_docker_mcp_validation.py
│   ├── test_docker_security.py
│   ├── test_docker_volume_persistence.py
│   ├── test_file_protection.py
│   ├── test_gemini_token_usage.py
│   ├── test_image_support_integration.py
│   ├── test_image_validation.py
│   ├── test_integration_utf8.py
│   ├── test_intelligent_fallback.py
│   ├── test_issue_245_simple.py
│   ├── test_large_prompt_handling.py
│   ├── test_line_numbers_integration.py
│   ├── test_listmodels_restrictions.py
│   ├── test_listmodels.py
│   ├── test_mcp_error_handling.py
│   ├── test_model_enumeration.py
│   ├── test_model_metadata_continuation.py
│   ├── test_model_resolution_bug.py
│   ├── test_model_restrictions.py
│   ├── test_o3_pro_output_text_fix.py
│   ├── test_o3_temperature_fix_simple.py
│   ├── test_openai_compatible_token_usage.py
│   ├── test_openai_provider.py
│   ├── test_openrouter_provider.py
│   ├── test_openrouter_registry.py
│   ├── test_parse_model_option.py
│   ├── test_per_tool_model_defaults.py
│   ├── test_pii_sanitizer.py
│   ├── test_pip_detection_fix.py
│   ├── test_planner.py
│   ├── test_precommit_workflow.py
│   ├── test_prompt_regression.py
│   ├── test_prompt_size_limit_bug_fix.py
│   ├── test_provider_retry_logic.py
│   ├── test_provider_routing_bugs.py
│   ├── test_provider_utf8.py
│   ├── test_providers.py
│   ├── test_rate_limit_patterns.py
│   ├── test_refactor.py
│   ├── test_secaudit.py
│   ├── test_server.py
│   ├── test_supported_models_aliases.py
│   ├── test_thinking_modes.py
│   ├── test_tools.py
│   ├── test_tracer.py
│   ├── test_utf8_localization.py
│   ├── test_utils.py
│   ├── test_uvx_resource_packaging.py
│   ├── test_uvx_support.py
│   ├── test_workflow_file_embedding.py
│   ├── test_workflow_metadata.py
│   ├── test_workflow_prompt_size_validation_simple.py
│   ├── test_workflow_utf8.py
│   ├── test_xai_provider.py
│   ├── transport_helpers.py
│   └── triangle.png
├── tools
│   ├── __init__.py
│   ├── analyze.py
│   ├── apilookup.py
│   ├── challenge.py
│   ├── chat.py
│   ├── clink.py
│   ├── codereview.py
│   ├── consensus.py
│   ├── debug.py
│   ├── docgen.py
│   ├── listmodels.py
│   ├── models.py
│   ├── planner.py
│   ├── precommit.py
│   ├── refactor.py
│   ├── secaudit.py
│   ├── shared
│   │   ├── __init__.py
│   │   ├── base_models.py
│   │   ├── base_tool.py
│   │   ├── exceptions.py
│   │   └── schema_builders.py
│   ├── simple
│   │   ├── __init__.py
│   │   └── base.py
│   ├── testgen.py
│   ├── thinkdeep.py
│   ├── tracer.py
│   ├── version.py
│   └── workflow
│       ├── __init__.py
│       ├── base.py
│       ├── schema_builders.py
│       └── workflow_mixin.py
├── utils
│   ├── __init__.py
│   ├── client_info.py
│   ├── conversation_memory.py
│   ├── env.py
│   ├── file_types.py
│   ├── file_utils.py
│   ├── image_utils.py
│   ├── model_context.py
│   ├── model_restrictions.py
│   ├── security_config.py
│   ├── storage_backend.py
│   └── token_utils.py
└── zen-mcp-server
```

# Files

--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------

```
[run]
source = gemini_server
omit = 
    */tests/*
    */venv/*
    */__pycache__/*
    */site-packages/*

[report]
exclude_lines =
    pragma: no cover
    def __repr__
    if self.debug:
    if settings.DEBUG
    raise AssertionError
    raise NotImplementedError
    if 0:
    if __name__ == .__main__.:
    if TYPE_CHECKING:
    class .*\bProtocol\):
    @(abc\.)?abstractmethod

[html]
directory = htmlcov
```

--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------

```
# Ensure shell scripts always have LF line endings on checkout
*.sh text eol=lf
*.bash text eol=lf

# Python files
*.py text eol=lf

# Shell script without extension
run-server text eol=lf
code_quality_checks text eol=lf
run_integration_tests text eol=lf

# General text files
*.md text
*.txt text
*.yml text
*.yaml text
*.json text
*.xml text

# Binary files
*.png binary
*.jpg binary
*.jpeg binary
*.gif binary
*.ico binary
*.pdf binary
```

--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------

```
# Git
.git
.gitignore

# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
env/
venv/
.venv/
.zen_venv/
ENV/
env.bak/
venv.bak/

# IDE
.vscode/
.idea/
*.swp
*.swo

# OS
.DS_Store
Thumbs.db

# Logs
logs/*.log*
*.log

# Docker
Dockerfile*
docker-compose*
.dockerignore

# Documentation
docs/
README.md
*.md

# Tests
tests/
simulator_tests/
test_simulation_files/
pytest.ini

# Development
.env
.env.local
examples/
code_quality_checks.sh
run_integration_tests.sh

# Security - Sensitive files
*.key
*.pem
*.p12
*.pfx
*.crt
*.csr
secrets/
private/

```

--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------

```yaml
---
default_stages: [pre-commit, pre-push]
repos:
  - repo: https://github.com/psf/black
    rev: 25.1.0
    hooks:
      - id: black

  - repo: https://github.com/pycqa/isort
    rev: 6.0.1
    hooks:
      - id: isort
        args: ["--profile", "black"]

  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.12.8
    hooks:
      - id: ruff
        args: [--fix]

# Configuration for specific tools
default_language_version:
  python: python3

# Exclude patterns
exclude: |
  (?x)^(
    \.git/|
    \.venv/|
    venv/|
    \.zen_venv/|
    __pycache__/|
    \.pytest_cache/|
    logs/|
    dist/|
    build/|
    test_simulation_files/
  )

```

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
Pipfile.lock

# poetry
poetry.lock

# pdm
.pdm.toml
.pdm-python
pdm.lock

# PEP 582
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.env~
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
.idea/

# VS Code
.vscode/

# macOS
.DS_Store

# API Keys and secrets
*.key
*.pem
.env.local
.env.*.local

# Test outputs
test_output/
*.test.log
.coverage
htmlcov/
coverage.xml
.pytest_cache/

# Test simulation artifacts (dynamically created during testing)
test_simulation_files/.claude/

# Temporary test directories
test-setup/

# Scratch feature documentation files
FEATURE_*.md
# Temporary files
/tmp/

# Local user instructions
CLAUDE.local.md

# Claude Code personal settings
.claude/settings.local.json

# Standalone mode files
.zen_venv/
.docker_cleaned
logs/
*.backup
/.desktop_configured

/worktrees/
test_simulation_files/
.mcp.json

```

--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------

```
# Zen MCP Server Environment Configuration
# Copy this file to .env and fill in your values

# API Keys - At least one is required
#
# IMPORTANT: Choose ONE approach:
# - Native APIs (Gemini/OpenAI/XAI) for direct access
# - DIAL for unified enterprise access
# - OpenRouter for unified cloud access
# Having multiple unified providers creates ambiguity about which serves each model.
#
# Option 1: Use native APIs (recommended for direct access)
# Get your Gemini API key from: https://makersuite.google.com/app/apikey
GEMINI_API_KEY=your_gemini_api_key_here
# GEMINI_BASE_URL=                            # Optional: Custom Gemini endpoint (defaults to Google's API)

# Get your OpenAI API key from: https://platform.openai.com/api-keys
OPENAI_API_KEY=your_openai_api_key_here

# Azure OpenAI mirrors OpenAI models through Azure-hosted deployments
# Set the endpoint from Azure Portal. Models are defined in conf/azure_models.json
# (or the file referenced by AZURE_MODELS_CONFIG_PATH).
AZURE_OPENAI_API_KEY=your_azure_openai_key_here
AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
# AZURE_OPENAI_API_VERSION=2024-02-15-preview
# AZURE_OPENAI_ALLOWED_MODELS=gpt-4o,gpt-4o-mini
# AZURE_MODELS_CONFIG_PATH=/absolute/path/to/custom_azure_models.json

# Get your X.AI API key from: https://console.x.ai/
XAI_API_KEY=your_xai_api_key_here

# Get your DIAL API key and configure host URL
# DIAL provides unified access to multiple AI models through a single API
DIAL_API_KEY=your_dial_api_key_here
# DIAL_API_HOST=https://core.dialx.ai        # Optional: Base URL without /openai suffix (auto-appended)
# DIAL_API_VERSION=2025-01-01-preview        # Optional: API version header for DIAL requests

# Option 2: Use OpenRouter for access to multiple models through one API
# Get your OpenRouter API key from: https://openrouter.ai/
# If using OpenRouter, comment out the native API keys above
OPENROUTER_API_KEY=your_openrouter_api_key_here

# Option 3: Use custom API endpoints for local models (Ollama, vLLM, LM Studio, etc.)
# CUSTOM_API_URL=http://localhost:11434/v1  # Ollama example
# CUSTOM_API_KEY=                                      # Empty for Ollama (no auth needed)
# CUSTOM_MODEL_NAME=llama3.2                          # Default model name

# Optional: HTTP timeout tuning for OpenAI-compatible endpoints (OpenRouter/custom/local)
# Values are seconds; defaults are 45s connect / 900s read/write/pool for remote URLs
# and 60s/1800s when pointing at localhost. Raise these if long-running models time out.
# CUSTOM_CONNECT_TIMEOUT=45.0
# CUSTOM_READ_TIMEOUT=900.0
# CUSTOM_WRITE_TIMEOUT=900.0
# CUSTOM_POOL_TIMEOUT=900.0

# Optional: Default model to use
# Options: 'auto' (Claude picks best model), 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', 'o4-mini-high',
#          'gpt-5', 'gpt-5-mini', 'grok', 'opus-4.1', 'sonnet-4.1', or any DIAL model if DIAL is configured
# When set to 'auto', Claude will select the best model for each task
# Defaults to 'auto' if not specified
DEFAULT_MODEL=auto

# Optional: Default thinking mode for ThinkDeep tool
# NOTE: Only applies to models that support extended thinking (e.g., Gemini 2.5 Pro, GPT-5 models)
#       Flash models (2.0) will use system prompt engineering instead
# Token consumption per mode:
#   minimal: 128 tokens   - Quick analysis, fastest response
#   low:     2,048 tokens - Light reasoning tasks  
#   medium:  8,192 tokens - Balanced reasoning (good for most cases)
#   high:    16,384 tokens - Complex analysis (recommended for thinkdeep)
#   max:     32,768 tokens - Maximum reasoning depth, slowest but most thorough
# Defaults to 'high' if not specified
DEFAULT_THINKING_MODE_THINKDEEP=high

# Optional: Model usage restrictions
# Limit which models can be used from each provider for cost control, compliance, or standardization
# Format: Comma-separated list of allowed model names (case-insensitive, whitespace tolerant)
# Empty or unset = all models allowed (default behavior)
# If you want to disable a provider entirely, don't set its API key
#
# Supported OpenAI models:
#   - o3               (200K context, high reasoning)
#   - o3-mini          (200K context, balanced)
#   - o4-mini          (200K context, latest balanced, temperature=1.0 only)
#   - o4-mini-high     (200K context, enhanced reasoning, temperature=1.0 only)
#   - gpt-5            (400K context, 128K output, reasoning tokens)
#   - gpt-5-mini       (400K context, 128K output, reasoning tokens)
#   - mini             (shorthand for o4-mini)
#
# Supported Google/Gemini models:
#   - gemini-2.5-flash   (1M context, fast, supports thinking)
#   - gemini-2.5-pro     (1M context, powerful, supports thinking)
#   - flash                             (shorthand for gemini-2.5-flash)
#   - pro                               (shorthand for gemini-2.5-pro)
#
# Supported X.AI GROK models:
#   - grok-3          (131K context, advanced reasoning)
#   - grok-3-fast     (131K context, higher performance but more expensive)
#   - grok            (shorthand for grok-3)
#   - grok3           (shorthand for grok-3)
#   - grokfast        (shorthand for grok-3-fast)
#
# Supported DIAL models (when available in your DIAL deployment):
#   - o3-2025-04-16   (200K context, latest O3 release)
#   - o4-mini-2025-04-16 (200K context, latest O4 mini)
#   - o3              (shorthand for o3-2025-04-16)
#   - o4-mini         (shorthand for o4-mini-2025-04-16)
#   - anthropic.claude-sonnet-4.1-20250805-v1:0 (200K context, Claude 4.1 Sonnet)
#   - anthropic.claude-sonnet-4.1-20250805-v1:0-with-thinking (200K context, Claude 4.1 Sonnet with thinking mode)
#   - anthropic.claude-opus-4.1-20250805-v1:0 (200K context, Claude 4.1 Opus)
#   - anthropic.claude-opus-4.1-20250805-v1:0-with-thinking (200K context, Claude 4.1 Opus with thinking mode)
#   - sonnet-4.1        (shorthand for Claude 4.1 Sonnet)
#   - sonnet-4.1-thinking (shorthand for Claude 4.1 Sonnet with thinking)
#   - opus-4.1          (shorthand for Claude 4.1 Opus)
#   - opus-4.1-thinking (shorthand for Claude 4.1 Opus with thinking)
#   - gemini-2.5-pro-preview-03-25-google-search (1M context, with Google Search)
#   - gemini-2.5-pro-preview-05-06 (1M context, latest preview)
#   - gemini-2.5-flash-preview-05-20 (1M context, latest flash preview)
#   - gemini-2.5-pro  (shorthand for gemini-2.5-pro-preview-05-06)
#   - gemini-2.5-pro-search (shorthand for gemini-2.5-pro-preview-03-25-google-search)
#   - gemini-2.5-flash (shorthand for gemini-2.5-flash-preview-05-20)
#
# Examples:
#   OPENAI_ALLOWED_MODELS=o3-mini,o4-mini,mini  # Only allow mini models (cost control)
#   GOOGLE_ALLOWED_MODELS=flash                  # Only allow Flash (fast responses)
#   XAI_ALLOWED_MODELS=grok-3                    # Only allow standard GROK (not fast variant)
#   OPENAI_ALLOWED_MODELS=o4-mini                # Single model standardization
#   GOOGLE_ALLOWED_MODELS=flash,pro              # Allow both Gemini models
#   XAI_ALLOWED_MODELS=grok,grok-3-fast          # Allow both GROK variants
#   DIAL_ALLOWED_MODELS=o3,o4-mini                       # Only allow O3/O4 models via DIAL
#   DIAL_ALLOWED_MODELS=opus-4.1,sonnet-4.1                  # Only Claude 4.1 models (without thinking)
#   DIAL_ALLOWED_MODELS=opus-4.1-thinking,sonnet-4.1-thinking # Only Claude 4.1 with thinking mode
#   DIAL_ALLOWED_MODELS=gemini-2.5-pro,gemini-2.5-flash  # Only Gemini 2.5 models via DIAL
#
# Note: These restrictions apply even in 'auto' mode - Claude will only pick from allowed models
# OPENAI_ALLOWED_MODELS=
# GOOGLE_ALLOWED_MODELS=
# XAI_ALLOWED_MODELS=
# DIAL_ALLOWED_MODELS=

# Optional: Custom model configuration file path
# Override the default location of custom_models.json
# CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json

# Note: Conversations are stored in memory during the session

# Optional: Conversation timeout (hours)
# How long AI-to-AI conversation threads persist before expiring
# Longer timeouts use more memory but allow resuming conversations later
# Defaults to 24 hours if not specified
CONVERSATION_TIMEOUT_HOURS=24

# Optional: Max conversation turns
# Maximum number of turns allowed in an AI-to-AI conversation thread
# Each exchange (Claude asks, Gemini responds) counts as 2 turns
# So 20 turns = 10 exchanges. Defaults to 40 if not specified
MAX_CONVERSATION_TURNS=40

# Optional: Logging level (DEBUG, INFO, WARNING, ERROR)
# DEBUG: Shows detailed operational messages for troubleshooting (default)
# INFO: Shows general operational messages
# WARNING: Shows only warnings and errors
# ERROR: Shows only errors
LOG_LEVEL=DEBUG

# Optional: Tool Selection
# Comma-separated list of tools to disable. If not set, all tools are enabled.
# Essential tools (version, listmodels) cannot be disabled.
# Available tools: chat, thinkdeep, planner, consensus, codereview, precommit,
#                  debug, docgen, analyze, refactor, tracer, testgen, challenge, secaudit
# 
# DEFAULT CONFIGURATION: To optimize context window usage, non-essential tools
# are disabled by default. Only the essential tools remain enabled:
# - chat, thinkdeep, planner, consensus (collaboration tools)
# - codereview, precommit, debug (code quality tools)  
# - challenge (critical thinking utility)
#
# To enable additional tools, remove them from the DISABLED_TOOLS list below.
DISABLED_TOOLS=analyze,refactor,testgen,secaudit,docgen,tracer

# Optional: Language/Locale for AI responses
# When set, all AI tools will respond in the specified language
# while maintaining their analytical capabilities
# Examples: "fr-FR", "en-US", "zh-CN", "zh-TW", "ja-JP", "ko-KR", "es-ES"
# Leave empty for default language (English)
# LOCALE=fr-FR

# ===========================================
# Zen MCP Server Configuration  
# ===========================================

# Force .env file values to override system environment variables
# This prevents issues where different AI tools (Claude Code, etc.) pass 
# conflicting or cached environment variables that override each other
# 
# When enabled (true):
#   - .env file values take absolute precedence
#   - Prevents MCP clients from passing outdated/cached API keys
#   - Ensures consistent configuration across different AI tool integrations
#   - Solves environment variable conflicts between multiple AI applications
#
# When disabled (false):
#   - System environment variables take precedence (standard behavior)  
#   - Suitable for production deployments with secure environment injection
#   - Respects container orchestrator and CI/CD pipeline configurations
#
# Recommended settings:
#   Development with multiple AI tools: true (prevents tool conflicts)
#   Production/Container deployments: false (preserves security practices)
#   CI/CD environments: false (respects pipeline secrets)
ZEN_MCP_FORCE_ENV_OVERRIDE=false

# ===========================================
# Docker Configuration
# ===========================================

# Container name for Docker Compose
# Used when running with docker-compose.yml
COMPOSE_PROJECT_NAME=zen-mcp

# Timezone for Docker containers
# Ensures consistent time handling in containerized environments
TZ=UTC

# Maximum log file size (default: 10MB)
# Applicable when using file-based logging
LOG_MAX_SIZE=10MB

```

--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------

```markdown
# Zen MCP Server - Docker Setup

## Quick Start

### 1. Prerequisites

- Docker installed (Docker Compose optional)
- At least one API key (Gemini, OpenAI, xAI, etc.)

### 2. Configuration

```bash
# Copy environment template
cp .env.example .env

# Edit with your API keys (at least one required)
# Required: GEMINI_API_KEY or OPENAI_API_KEY or XAI_API_KEY
nano .env
```

### 3. Build Image

```bash
# Build the Docker image
docker build -t zen-mcp-server:latest .

# Or use the build script (Bash)
chmod +x docker/scripts/build.sh
./docker/scripts/build.sh

# Build with PowerShell
docker/scripts/build.ps1

```

### 4. Usage Options

#### A. Direct Docker Run (Recommended for MCP)

```bash
# Run with environment file
docker run --rm -i --env-file .env \
  -v $(pwd)/logs:/app/logs \
  zen-mcp-server:latest

# Run with inline environment variables
docker run --rm -i \
  -e GEMINI_API_KEY="your_key_here" \
  -e LOG_LEVEL=INFO \
  -v $(pwd)/logs:/app/logs \
  zen-mcp-server:latest
```

#### B. Docker Compose (For Development/Monitoring)

```bash
# Deploy with Docker Compose
chmod +x docker/scripts/deploy.sh
./docker/scripts/deploy.sh

# Or use PowerShell script
docker/scripts/deploy.ps1

# Interactive stdio mode
docker-compose exec zen-mcp python server.py
```

## Service Management

### Docker Commands

```bash
# View running containers
docker ps

# View logs from container
docker logs <container_id>

# Stop all zen-mcp containers
docker stop $(docker ps -q --filter "ancestor=zen-mcp-server:latest")

# Remove old containers and images
docker container prune
docker image prune
```

### Docker Compose Management (Optional)

```bash
# View logs
docker-compose logs -f zen-mcp

# Check status
docker-compose ps

# Restart service
docker-compose restart zen-mcp

# Stop services
docker-compose down

# Rebuild and update
docker-compose build --no-cache zen-mcp
docker-compose up -d zen-mcp
```

## Health Monitoring

The container includes health checks that verify:
- Server process is running
- Python modules can be imported
- Log directory is writable  
- API keys are configured

## Volumes and Persistent Data

The Docker setup includes persistent volumes to preserve data between container runs:

- **`./logs:/app/logs`** - Persistent log storage (local folder mount)
- **`zen-mcp-config:/app/conf`** - Configuration persistence (named Docker volume)
- **`/etc/localtime:/etc/localtime:ro`** - Host timezone synchronization (read-only)

### How Persistent Volumes Work

The `zen-mcp` service (used by `zen-docker-compose` and Docker Compose commands) mounts the named volume `zen-mcp-config` persistently. All data placed in `/app/conf` inside the container is preserved between runs thanks to this Docker volume.

In the `docker-compose.yml` file, you will find:

```yaml
volumes:
  - ./logs:/app/logs
  - zen-mcp-config:/app/conf
  - /etc/localtime:/etc/localtime:ro
```

and the named volume definition:

```yaml
volumes:
  zen-mcp-config:
    driver: local
```

## Security

- Runs as non-root user `zenuser`
- Read-only filesystem with tmpfs for temporary files
- No network ports exposed (stdio communication only)
- Secrets managed via environment variables

## Troubleshooting

### Container won't start

```bash
# Check if image exists
docker images zen-mcp-server

# Test container interactively
docker run --rm -it --env-file .env zen-mcp-server:latest bash

# Check environment variables
docker run --rm --env-file .env zen-mcp-server:latest env | grep API

# Test with minimal configuration
docker run --rm -i -e GEMINI_API_KEY="test" zen-mcp-server:latest python server.py
```

### MCP Connection Issues

```bash
# Test Docker connectivity
docker run --rm hello-world

# Verify container stdio
echo '{"jsonrpc": "2.0", "method": "ping"}' | docker run --rm -i --env-file .env zen-mcp-server:latest python server.py

# Check Claude Desktop logs for connection errors
```

### API Key Problems

```bash
# Verify API keys are loaded
docker run --rm --env-file .env zen-mcp-server:latest python -c "import os; print('GEMINI_API_KEY:', bool(os.getenv('GEMINI_API_KEY')))"

# Test API connectivity
docker run --rm --env-file .env zen-mcp-server:latest python /usr/local/bin/healthcheck.py
```

### Permission Issues

```bash
# Fix log directory permissions (Linux/macOS)
sudo chown -R $USER:$USER logs/
chmod 755 logs/

# Windows: Run Docker Desktop as Administrator if needed
```

### Memory/Performance Issues

```bash
# Check container resource usage
docker stats

# Run with memory limits
docker run --rm -i --memory="512m" --env-file .env zen-mcp-server:latest

# Monitor Docker logs
docker run --rm -i --env-file .env zen-mcp-server:latest 2>&1 | tee docker.log
```

## MCP Integration (Claude Desktop)

### Recommended Configuration (docker run)

```json
{
  "servers": {
    "zen-docker": {
      "command": "docker",
      "args": [
        "run",
        "--rm",
        "-i",
        "--env-file",
        "/absolute/path/to/zen-mcp-server/.env",
        "-v",
        "/absolute/path/to/zen-mcp-server/logs:/app/logs",
        "zen-mcp-server:latest"
      ]
    }
  }
}
```

### Windows Example

```json
{
  "servers": {
    "zen-docker": {
      "command": "docker",
      "args": [
        "run",
        "--rm",
        "-i",
        "--env-file",
        "C:/Users/YourName/path/to/zen-mcp-server/.env",
        "-v",
        "C:/Users/YourName/path/to/zen-mcp-server/logs:/app/logs",
        "zen-mcp-server:latest"
      ]
    }
  }
}
```

### Advanced Option: docker-compose run (uses compose configuration)

```json
{
  "servers": {
    "zen-docker": {
      "command": "docker-compose",
      "args": [
        "-f",
        "/absolute/path/to/zen-mcp-server/docker-compose.yml",
        "run",
        "--rm",
        "zen-mcp"
      ]
    }
  }
}
```

### Environment File Template

Create a `.env` file with at least one API key:

```bash
# Required: At least one API key
GEMINI_API_KEY=your_gemini_key_here
OPENAI_API_KEY=your_openai_key_here

# Optional configuration
LOG_LEVEL=INFO
DEFAULT_MODEL=auto
DEFAULT_THINKING_MODE_THINKDEEP=high

# Optional API keys (leave empty if not used)
ANTHROPIC_API_KEY=
XAI_API_KEY=
DIAL_API_KEY=
OPENROUTER_API_KEY=
CUSTOM_API_URL=
```

## Quick Test & Validation

### 1. Test Docker Image

```bash
# Test container starts correctly
docker run --rm zen-mcp-server:latest python --version

# Test health check
docker run --rm -e GEMINI_API_KEY="test" zen-mcp-server:latest python /usr/local/bin/healthcheck.py
```

### 2. Test MCP Protocol

```bash
# Test basic MCP communication
echo '{"jsonrpc": "2.0", "method": "initialize", "params": {}}' | \
  docker run --rm -i --env-file .env zen-mcp-server:latest python server.py
```

### 3. Validate Configuration

```bash
# Run validation script
python test_mcp_config.py

# Or validate JSON manually
python -m json.tool .vscode/mcp.json
```

## Available Tools

The Zen MCP Server provides these tools when properly configured:

- **chat** - General AI conversation and collaboration
- **thinkdeep** - Multi-stage investigation and reasoning  
- **planner** - Interactive sequential planning
- **consensus** - Multi-model consensus workflow
- **codereview** - Comprehensive code review
- **debug** - Root cause analysis and debugging
- **analyze** - Code analysis and assessment
- **refactor** - Refactoring analysis and suggestions
- **secaudit** - Security audit workflow
- **testgen** - Test generation with edge cases
- **docgen** - Documentation generation
- **tracer** - Code tracing and dependency mapping
- **precommit** - Pre-commit validation workflow
- **listmodels** - Available AI models information
- **version** - Server version and configuration

## Performance Notes

- **Image size**: ~293MB optimized multi-stage build
- **Memory usage**: ~256MB base + model overhead
- **Startup time**: ~2-3 seconds for container initialization
- **API response**: Varies by model and complexity (1-30 seconds)

For production use, consider:
- Using specific API keys for rate limiting
- Monitoring container resource usage
- Setting up log rotation for persistent logs
- Using Docker health checks for reliability

```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
# Zen MCP: Many Workflows. One Context.

<div align="center">

  [Zen in action](https://github.com/user-attachments/assets/0d26061e-5f21-4ab1-b7d0-f883ddc2c3da)

👉 **[Watch more examples](#-watch-tools-in-action)**

### Your CLI + Multiple Models = Your AI Dev Team

**Use the 🤖 CLI you love:**  
[Claude Code](https://www.anthropic.com/claude-code) · [Gemini CLI](https://github.com/google-gemini/gemini-cli) · [Codex CLI](https://github.com/openai/codex) · [Qwen Code CLI](https://qwenlm.github.io/qwen-code-docs/) · [Cursor](https://cursor.com) · _and more_

**With multiple models within a single prompt:**  
Gemini · OpenAI · Anthropic · Grok · Azure · Ollama · OpenRouter · DIAL · On-Device Model

</div>

---

## 🆕 Now with CLI-to-CLI Bridge

The new **[`clink`](docs/tools/clink.md)** (CLI + Link) tool connects external AI CLIs directly into your workflow:

- **Connect external CLIs** like [Gemini CLI](https://github.com/google-gemini/gemini-cli), [Codex CLI](https://github.com/openai/codex), and [Claude Code](https://www.anthropic.com/claude-code) directly into your workflow
- **CLI Subagents** - Launch isolated CLI instances from _within_ your current CLI! Claude Code can spawn Codex subagents, Codex can spawn Gemini CLI subagents, etc. Offload heavy tasks (code reviews, bug hunting) to fresh contexts while your main session's context window remains unpolluted. Each subagent returns only final results.
- **Context Isolation** - Run separate investigations without polluting your primary workspace
- **Role Specialization** - Spawn `planner`, `codereviewer`, or custom role agents with specialized system prompts
- **Full CLI Capabilities** - Web search, file inspection, MCP tool access, latest documentation lookups
- **Seamless Continuity** - Sub-CLIs participate as first-class members with full conversation context between tools

```bash
# Codex spawns Codex subagent for isolated code review in fresh context
clink with codex codereviewer to audit auth module for security issues
# Subagent reviews in isolation, returns final report without cluttering your context as codex reads each file and walks the directory structure

# Consensus from different AI models → Implementation handoff with full context preservation between tools
Use consensus with gpt-5 and gemini-pro to decide: dark mode or offline support next
Continue with clink gemini - implement the recommended feature
# Gemini receives full debate context and starts coding immediately
```

👉 **[Learn more about clink](docs/tools/clink.md)**

---

## Why Zen MCP?

**Why rely on one AI model when you can orchestrate them all?**

A Model Context Protocol server that supercharges tools like [Claude Code](https://www.anthropic.com/claude-code), [Codex CLI](https://developers.openai.com/codex/cli), and IDE clients such
as [Cursor](https://cursor.com) or the [Claude Dev VS Code extension](https://marketplace.visualstudio.com/items?itemName=Anthropic.claude-vscode). **Zen MCP connects your favorite AI tool
to multiple AI models** for enhanced code analysis, problem-solving, and collaborative development.

### True AI Collaboration with Conversation Continuity

Zen supports **conversation threading** so your CLI can **discuss ideas with multiple AI models, exchange reasoning, get second opinions, and even run collaborative debates between models** to help you reach deeper insights and better solutions.

Your CLI always stays in control but gets perspectives from the best AI for each subtask. Context carries forward seamlessly across tools and models, enabling complex workflows like: code reviews with multiple models → automated planning → implementation → pre-commit validation.

> **You're in control.** Your CLI of choice orchestrates the AI team, but you decide the workflow. Craft powerful prompts that bring in Gemini Pro, GPT 5, Flash, or local offline models exactly when needed.

<details>
<summary><b>Reasons to Use Zen MCP</b></summary>

A typical workflow with Claude Code as an example:

1. **Multi-Model Orchestration** - Claude coordinates with Gemini Pro, O3, GPT-5, and 50+ other models to get the best analysis for each task

2. **Context Revival Magic** - Even after Claude's context resets, continue conversations seamlessly by having other models "remind" Claude of the discussion

3. **Guided Workflows** - Enforces systematic investigation phases that prevent rushed analysis and ensure thorough code examination

4. **Extended Context Windows** - Break Claude's limits by delegating to Gemini (1M tokens) or O3 (200K tokens) for massive codebases

5. **True Conversation Continuity** - Full context flows across tools and models - Gemini remembers what O3 said 10 steps ago

6. **Model-Specific Strengths** - Extended thinking with Gemini Pro, blazing speed with Flash, strong reasoning with O3, privacy with local Ollama

7. **Professional Code Reviews** - Multi-pass analysis with severity levels, actionable feedback, and consensus from multiple AI experts

8. **Smart Debugging Assistant** - Systematic root cause analysis with hypothesis tracking and confidence levels

9. **Automatic Model Selection** - Claude intelligently picks the right model for each subtask (or you can specify)

10. **Vision Capabilities** - Analyze screenshots, diagrams, and visual content with vision-enabled models

11. **Local Model Support** - Run Llama, Mistral, or other models locally for complete privacy and zero API costs

12. **Bypass MCP Token Limits** - Automatically works around MCP's 25K limit for large prompts and responses

**The Killer Feature:** When Claude's context resets, just ask to "continue with O3" - the other model's response magically revives Claude's understanding without re-ingesting documents!

#### Example: Multi-Model Code Review Workflow

1. `Perform a codereview using gemini pro and o3 and use planner to generate a detailed plan, implement the fixes and do a final precommit check by continuing from the previous codereview`
2. This triggers a [`codereview`](docs/tools/codereview.md) workflow where Claude walks the code, looking for all kinds of issues
3. After multiple passes, collects relevant code and makes note of issues along the way
4. Maintains a `confidence` level between `exploring`, `low`, `medium`, `high` and `certain` to track how confidently it's been able to find and identify issues
5. Generates a detailed list of critical -> low issues
6. Shares the relevant files, findings, etc with **Gemini Pro** to perform a deep dive for a second [`codereview`](docs/tools/codereview.md)
7. Comes back with a response and next does the same with o3, adding to the prompt if a new discovery comes to light
8. When done, Claude takes in all the feedback and combines a single list of all critical -> low issues, including good patterns in your code. The final list includes new findings or revisions in case Claude misunderstood or missed something crucial and one of the other models pointed this out
9. It then uses the [`planner`](docs/tools/planner.md) workflow to break the work down into simpler steps if a major refactor is required
10. Claude then performs the actual work of fixing highlighted issues
11. When done, Claude returns to Gemini Pro for a [`precommit`](docs/tools/precommit.md) review

All within a single conversation thread! Gemini Pro in step 11 _knows_ what was recommended by O3 in step 7! Taking that context
and review into consideration to aid with its final pre-commit review.

**Think of it as Claude Code _for_ Claude Code.** This MCP isn't magic. It's just **super-glue**.

> **Remember:** Claude stays in full control — but **YOU** call the shots.
> Zen is designed to have Claude engage other models only when needed — and to follow through with meaningful back-and-forth.
> **You're** the one who crafts the powerful prompt that makes Claude bring in Gemini, Flash, O3 — or fly solo.
> You're the guide. The prompter. The puppeteer.
> #### You are the AI - **Actually Intelligent**.
</details>

#### Recommended AI Stack

<details>
<summary>For Claude Code Users</summary>

For best results when using [Claude Code](https://claude.ai/code):  

- **Sonnet 4.5** - All agentic work and orchestration
- **Gemini 2.5 Pro** OR **GPT-5-Pro** - Deep thinking, additional code reviews, debugging and validations, pre-commit analysis
</details>

<details>
<summary>For Codex Users</summary>

For best results when using [Codex CLI](https://developers.openai.com/codex/cli):  

- **GPT-5 Codex Medium** - All agentic work and orchestration
- **Gemini 2.5 Pro** OR **GPT-5-Pro** - Deep thinking, additional code reviews, debugging and validations, pre-commit analysis
</details>

## Quick Start (5 minutes)

**Prerequisites:** Python 3.10+, Git, [uv installed](https://docs.astral.sh/uv/getting-started/installation/)

**1. Get API Keys** (choose one or more):
- **[OpenRouter](https://openrouter.ai/)** - Access multiple models with one API
- **[Gemini](https://makersuite.google.com/app/apikey)** - Google's latest models
- **[OpenAI](https://platform.openai.com/api-keys)** - O3, GPT-5 series
- **[Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/)** - Enterprise deployments of GPT-4o, GPT-4.1, GPT-5 family
- **[X.AI](https://console.x.ai/)** - Grok models
- **[DIAL](https://dialx.ai/)** - Vendor-agnostic model access
- **[Ollama](https://ollama.ai/)** - Local models (free)

**2. Install** (choose one):

**Option A: Clone and Automatic Setup** (recommended)
```bash
git clone https://github.com/BeehiveInnovations/zen-mcp-server.git
cd zen-mcp-server

# Handles everything: setup, config, API keys from system environment. 
# Auto-configures Claude Desktop, Claude Code, Gemini CLI, Codex CLI, Qwen CLI
# Enable / disable additional settings in .env
./run-server.sh  
```

**Option B: Instant Setup with [uvx](https://docs.astral.sh/uv/getting-started/installation/)**
```json
// Add to ~/.claude/settings.json or .mcp.json
// Don't forget to add your API keys under env
{
  "mcpServers": {
    "zen": {
      "command": "bash",
      "args": ["-c", "for p in $(which uvx 2>/dev/null) $HOME/.local/bin/uvx /opt/homebrew/bin/uvx /usr/local/bin/uvx uvx; do [ -x \"$p\" ] && exec \"$p\" --from git+https://github.com/BeehiveInnovations/zen-mcp-server.git zen-mcp-server; done; echo 'uvx not found' >&2; exit 1"],
      "env": {
        "PATH": "/usr/local/bin:/usr/bin:/bin:/opt/homebrew/bin:~/.local/bin",
        "GEMINI_API_KEY": "your-key-here",
        "DISABLED_TOOLS": "analyze,refactor,testgen,secaudit,docgen,tracer",
        "DEFAULT_MODEL": "auto"
      }
    }
  }
}
```

**3. Start Using!**
```
"Use zen to analyze this code for security issues with gemini pro"
"Debug this error with o3 and then get flash to suggest optimizations"
"Plan the migration strategy with zen, get consensus from multiple models"
"clink with cli_name=\"gemini\" role=\"planner\" to draft a phased rollout plan"
```

👉 **[Complete Setup Guide](docs/getting-started.md)** with detailed installation, configuration for Gemini / Codex / Qwen, and troubleshooting
👉 **[Cursor & VS Code Setup](docs/getting-started.md#ide-clients)** for IDE integration instructions
📺 **[Watch tools in action](#-watch-tools-in-action)** to see real-world examples

## Provider Configuration

Zen activates any provider that has credentials in your `.env`. See `.env.example` for deeper customization.

## Core Tools

> **Note:** Each tool comes with its own multi-step workflow, parameters, and descriptions that consume valuable context window space even when not in use. To optimize performance, some tools are disabled by default. See [Tool Configuration](#tool-configuration) below to enable them.

**Collaboration & Planning** *(Enabled by default)*
- **[`clink`](docs/tools/clink.md)** - Bridge requests to external AI CLIs (Gemini planner, codereviewer, etc.)
- **[`chat`](docs/tools/chat.md)** - Brainstorm ideas, get second opinions, validate approaches. With capable models (GPT-5 Pro, Gemini 2.5 Pro), generates complete code / implementation
- **[`thinkdeep`](docs/tools/thinkdeep.md)** - Extended reasoning, edge case analysis, alternative perspectives
- **[`planner`](docs/tools/planner.md)** - Break down complex projects into structured, actionable plans
- **[`consensus`](docs/tools/consensus.md)** - Get expert opinions from multiple AI models with stance steering

**Code Analysis & Quality**
- **[`debug`](docs/tools/debug.md)** - Systematic investigation and root cause analysis
- **[`precommit`](docs/tools/precommit.md)** - Validate changes before committing, prevent regressions
- **[`codereview`](docs/tools/codereview.md)** - Professional reviews with severity levels and actionable feedback
- **[`analyze`](docs/tools/analyze.md)** *(disabled by default - [enable](#tool-configuration))* - Understand architecture, patterns, dependencies across entire codebases

**Development Tools** *(Disabled by default - [enable](#tool-configuration))*
- **[`refactor`](docs/tools/refactor.md)** - Intelligent code refactoring with decomposition focus
- **[`testgen`](docs/tools/testgen.md)** - Comprehensive test generation with edge cases
- **[`secaudit`](docs/tools/secaudit.md)** - Security audits with OWASP Top 10 analysis
- **[`docgen`](docs/tools/docgen.md)** - Generate documentation with complexity analysis

**Utilities**
- **[`apilookup`](docs/tools/apilookup.md)** - Forces current-year API/SDK documentation lookups in a sub-process (saves tokens within the current context window), prevents outdated training data responses
- **[`challenge`](docs/tools/challenge.md)** - Prevent "You're absolutely right!" responses with critical analysis
- **[`tracer`](docs/tools/tracer.md)** *(disabled by default - [enable](#tool-configuration))* - Static analysis prompts for call-flow mapping

<details>
<summary><b id="tool-configuration">👉 Tool Configuration</b></summary>

### Default Configuration

To optimize context window usage, only essential tools are enabled by default:

**Enabled by default:**
- `chat`, `thinkdeep`, `planner`, `consensus` - Core collaboration tools
- `codereview`, `precommit`, `debug` - Essential code quality tools
- `apilookup` - Rapid API/SDK information lookup
- `challenge` - Critical thinking utility

**Disabled by default:**
- `analyze`, `refactor`, `testgen`, `secaudit`, `docgen`, `tracer`

### Enabling Additional Tools

To enable additional tools, remove them from the `DISABLED_TOOLS` list:

**Option 1: Edit your .env file**
```bash
# Default configuration (from .env.example)
DISABLED_TOOLS=analyze,refactor,testgen,secaudit,docgen,tracer

# To enable specific tools, remove them from the list
# Example: Enable analyze tool
DISABLED_TOOLS=refactor,testgen,secaudit,docgen,tracer

# To enable ALL tools
DISABLED_TOOLS=
```

**Option 2: Configure in MCP settings**
```json
// In ~/.claude/settings.json or .mcp.json
{
  "mcpServers": {
    "zen": {
      "env": {
        // Tool configuration
        "DISABLED_TOOLS": "refactor,testgen,secaudit,docgen,tracer",
        "DEFAULT_MODEL": "pro",
        "DEFAULT_THINKING_MODE_THINKDEEP": "high",
        
        // API configuration
        "GEMINI_API_KEY": "your-gemini-key",
        "OPENAI_API_KEY": "your-openai-key",
        "OPENROUTER_API_KEY": "your-openrouter-key",
        
        // Logging and performance
        "LOG_LEVEL": "INFO",
        "CONVERSATION_TIMEOUT_HOURS": "6",
        "MAX_CONVERSATION_TURNS": "50"
      }
    }
  }
}
```

**Option 3: Enable all tools**
```json
// Remove or empty the DISABLED_TOOLS to enable everything
{
  "mcpServers": {
    "zen": {
      "env": {
        "DISABLED_TOOLS": ""
      }
    }
  }
}
```

**Note:**
- Essential tools (`version`, `listmodels`) cannot be disabled
- After changing tool configuration, restart your Claude session for changes to take effect
- Each tool adds to context window usage, so only enable what you need

</details>

## 📺 Watch Tools In Action

<details>
<summary><b>Chat Tool</b> - Collaborative decision making and multi-turn conversations</summary>

**Picking Redis vs Memcached:**

[Chat Redis or Memcached_web.webm](https://github.com/user-attachments/assets/41076cfe-dd49-4dfc-82f5-d7461b34705d)

**Multi-turn conversation with continuation:**

[Chat With Gemini_web.webm](https://github.com/user-attachments/assets/37bd57ca-e8a6-42f7-b5fb-11de271e95db)

</details>

<details>
<summary><b>Consensus Tool</b> - Multi-model debate and decision making</summary>

**Multi-model consensus debate:**

[Zen Consensus Debate](https://github.com/user-attachments/assets/76a23dd5-887a-4382-9cf0-642f5cf6219e)

</details>

<details>
<summary><b>PreCommit Tool</b> - Comprehensive change validation</summary>

**Pre-commit validation workflow:**

<div align="center">
  <img src="https://github.com/user-attachments/assets/584adfa6-d252-49b4-b5b0-0cd6e97fb2c6" width="950">
</div>

</details>

<details>
<summary><b>API Lookup Tool</b> - Current vs outdated API documentation</summary>

**Without Zen - outdated APIs:**

[API without Zen](https://github.com/user-attachments/assets/01a79dc9-ad16-4264-9ce1-76a56c3580ee)

**With Zen - current APIs:**

[API with Zen](https://github.com/user-attachments/assets/5c847326-4b66-41f7-8f30-f380453dce22)

</details>

<details>
<summary><b>Challenge Tool</b> - Critical thinking vs reflexive agreement</summary>

**Without Zen:**

![without_zen@2x](https://github.com/user-attachments/assets/64f3c9fb-7ca9-4876-b687-25e847edfd87)

**With Zen:**

![with_zen@2x](https://github.com/user-attachments/assets/9d72f444-ba53-4ab1-83e5-250062c6ee70)

</details>

## Key Features

**AI Orchestration**
- **Auto model selection** - Claude picks the right AI for each task
- **Multi-model workflows** - Chain different models in single conversations
- **Conversation continuity** - Context preserved across tools and models
- **[Context revival](docs/context-revival.md)** - Continue conversations even after context resets

**Model Support**
- **Multiple providers** - Gemini, OpenAI, Azure, X.AI, OpenRouter, DIAL, Ollama
- **Latest models** - GPT-5, Gemini 2.5 Pro, O3, Grok-4, local Llama
- **[Thinking modes](docs/advanced-usage.md#thinking-modes)** - Control reasoning depth vs cost
- **Vision support** - Analyze images, diagrams, screenshots

**Developer Experience**
- **Guided workflows** - Systematic investigation prevents rushed analysis
- **Smart file handling** - Auto-expand directories, manage token limits
- **Web search integration** - Access current documentation and best practices
- **[Large prompt support](docs/advanced-usage.md#working-with-large-prompts)** - Bypass MCP's 25K token limit

## Example Workflows

**Multi-model Code Review:**
```
"Perform a codereview using gemini pro and o3, then use planner to create a fix strategy"
```
→ Claude reviews code systematically → Consults Gemini Pro → Gets O3's perspective → Creates unified action plan

**Collaborative Debugging:**
```
"Debug this race condition with max thinking mode, then validate the fix with precommit"
```
→ Deep investigation → Expert analysis → Solution implementation → Pre-commit validation

**Architecture Planning:**
```
"Plan our microservices migration, get consensus from pro and o3 on the approach"
```
→ Structured planning → Multiple expert opinions → Consensus building → Implementation roadmap

👉 **[Advanced Usage Guide](docs/advanced-usage.md)** for complex workflows, model configuration, and power-user features

## Quick Links

**📖 Documentation**
- [Docs Overview](docs/index.md) - High-level map of major guides
- [Getting Started](docs/getting-started.md) - Complete setup guide
- [Tools Reference](docs/tools/) - All tools with examples
- [Advanced Usage](docs/advanced-usage.md) - Power user features
- [Configuration](docs/configuration.md) - Environment variables, restrictions
- [Adding Providers](docs/adding_providers.md) - Provider-specific setup (OpenAI, Azure, custom gateways)
- [Model Ranking Guide](docs/model_ranking.md) - How intelligence scores drive auto-mode suggestions

**🔧 Setup & Support**
- [WSL Setup](docs/wsl-setup.md) - Windows users
- [Troubleshooting](docs/troubleshooting.md) - Common issues
- [Contributing](docs/contributions.md) - Code standards, PR process

## License

Apache 2.0 License - see [LICENSE](LICENSE) file for details.

## Acknowledgments

Built with the power of **Multi-Model AI** collaboration 🤝
- **A**ctual **I**ntelligence by real Humans
- [MCP (Model Context Protocol)](https://modelcontextprotocol.com)
- [Codex CLI](https://developers.openai.com/codex/cli)
- [Claude Code](https://claude.ai/code)
- [Gemini](https://ai.google.dev/)
- [OpenAI](https://openai.com/)
- [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/)

### Star History

[![Star History Chart](https://api.star-history.com/svg?repos=BeehiveInnovations/zen-mcp-server&type=Date)](https://www.star-history.com/#BeehiveInnovations/zen-mcp-server&Date)

```

--------------------------------------------------------------------------------
/AGENTS.md:
--------------------------------------------------------------------------------

```markdown
# Repository Guidelines

See `requirements.txt` and `requirements-dev.txt`

Also read CLAUDE.md and CLAUDE.local.md if available.

## Project Structure & Module Organization
Zen MCP Server centers on `server.py`, which exposes MCP entrypoints and coordinates multi-model workflows. 
Feature-specific tools live in `tools/`, provider integrations in `providers/`, and shared helpers in `utils/`. 
Prompt and system context assets stay in `systemprompts/`, while configuration templates and automation scripts live under `conf/`, `scripts/`, and `docker/`. 
Unit tests sit in `tests/`; simulator-driven scenarios and log utilities are in `simulator_tests/` with the `communication_simulator_test.py` harness. 
Authoritative documentation and samples live in `docs/`, and runtime diagnostics are rotated in `logs/`.

## Build, Test, and Development Commands
- `source .zen_venv/bin/activate` – activate the managed Python environment.
- `./run-server.sh` – install dependencies, refresh `.env`, and launch the MCP server locally.
- `./code_quality_checks.sh` – run Ruff autofix, Black, isort, and the default pytest suite.
- `python communication_simulator_test.py --quick` – smoke-test orchestration across tools and providers.
- `./run_integration_tests.sh [--with-simulator]` – exercise provider-dependent flows against remote or Ollama models.

Run code quality checks:
```bash
.zen_venv/bin/activate && ./code_quality_checks.sh
```

For example, this is how we run an individual / all tests:

```bash
.zen_venv/bin/activate && pytest tests/test_auto_mode_model_listing.py -q
.zen_venv/bin/activate && pytest -q
```

## Coding Style & Naming Conventions
Target Python 3.9+ with Black and isort using a 120-character line limit; Ruff enforces pycodestyle, pyflakes, bugbear, comprehension, and pyupgrade rules. Prefer explicit type hints, snake_case modules, and imperative commit-time docstrings. Extend workflows by defining hook or abstract methods instead of checking `hasattr()`/`getattr()`—inheritance-backed contracts keep behavior discoverable and testable.

## Testing Guidelines
Mirror production modules inside `tests/` and name tests `test_<behavior>` or `Test<Feature>` classes. Run `python -m pytest tests/ -v -m "not integration"` before every commit, adding `--cov=. --cov-report=html` for coverage-sensitive changes. Use `python communication_simulator_test.py --verbose` or `--individual <case>` to validate cross-agent flows, and reserve `./run_integration_tests.sh` for provider or transport modifications. Capture relevant excerpts from `logs/mcp_server.log` or `logs/mcp_activity.log` when documenting failures.

## Commit & Pull Request Guidelines
Follow Conventional Commits: `type(scope): summary`, where `type` is one of `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, or `chore`. Keep commits focused, referencing issues or simulator cases when helpful. Pull requests should outline intent, list validation commands executed, flag configuration or tool toggles, and attach screenshots or log snippets when user-visible behavior changes.

## GitHub CLI Commands
The GitHub CLI (`gh`) streamlines issue and PR management directly from the terminal.

### Viewing Issues
```bash
# View issue details in current repository
gh issue view <issue-number>

# View issue from specific repository
gh issue view <issue-number> --repo owner/repo-name

# View issue with all comments
gh issue view <issue-number> --comments

# Get issue data as JSON for scripting
gh issue view <issue-number> --json title,body,author,state,labels,comments

# Open issue in web browser
gh issue view <issue-number> --web
```

### Managing Issues
```bash
# List all open issues
gh issue list

# List issues with filters
gh issue list --label bug --state open

# Create a new issue
gh issue create --title "Issue title" --body "Description"

# Close an issue
gh issue close <issue-number>

# Reopen an issue
gh issue reopen <issue-number>
```

### Pull Request Operations
```bash
# View PR details
gh pr view <pr-number>

# List pull requests
gh pr list

# Create a PR from current branch
gh pr create --title "PR title" --body "Description"

# Check out a PR locally
gh pr checkout <pr-number>

# Merge a PR
gh pr merge <pr-number>
```

Install GitHub CLI: `brew install gh` (macOS) or visit https://cli.github.com for other platforms.

## Security & Configuration Tips
Store API keys and provider URLs in `.env` or your MCP client config; never commit secrets or generated log artifacts. Use `run-server.sh` to regenerate environments and verify connectivity after dependency changes. When adding providers or tools, sanitize prompts and responses, document required environment variables in `docs/`, and update `claude_config_example.json` if new capabilities ship by default.

```

--------------------------------------------------------------------------------
/CLAUDE.md:
--------------------------------------------------------------------------------

```markdown
# Claude Development Guide for Zen MCP Server

This file contains essential commands and workflows for developing and maintaining the Zen MCP Server when working with Claude. Use these instructions to efficiently run quality checks, manage the server, check logs, and run tests.

## Quick Reference Commands

### Code Quality Checks

Before making any changes or submitting PRs, always run the comprehensive quality checks:

```bash
# Activate virtual environment first
source venv/bin/activate

# Run all quality checks (linting, formatting, tests)
./code_quality_checks.sh
```

This script automatically runs:
- Ruff linting with auto-fix
- Black code formatting 
- Import sorting with isort
- Complete unit test suite (excluding integration tests)
- Verification that all checks pass 100%

**Run Integration Tests (requires API keys):**
```bash
# Run integration tests that make real API calls
./run_integration_tests.sh

# Run integration tests + simulator tests
./run_integration_tests.sh --with-simulator
```

### Server Management

#### Setup/Update the Server
```bash
# Run setup script (handles everything)
./run-server.sh
```

This script will:
- Set up Python virtual environment
- Install all dependencies
- Create/update .env file
- Configure MCP with Claude
- Verify API keys

#### View Logs
```bash
# Follow logs in real-time
./run-server.sh -f

# Or manually view logs
tail -f logs/mcp_server.log
```

### Log Management

#### View Server Logs
```bash
# View last 500 lines of server logs
tail -n 500 logs/mcp_server.log

# Follow logs in real-time
tail -f logs/mcp_server.log

# View specific number of lines
tail -n 100 logs/mcp_server.log

# Search logs for specific patterns
grep "ERROR" logs/mcp_server.log
grep "tool_name" logs/mcp_activity.log
```

#### Monitor Tool Executions Only
```bash
# View tool activity log (focused on tool calls and completions)
tail -n 100 logs/mcp_activity.log

# Follow tool activity in real-time
tail -f logs/mcp_activity.log

# Use simple tail commands to monitor logs
tail -f logs/mcp_activity.log | grep -E "(TOOL_CALL|TOOL_COMPLETED|ERROR|WARNING)"
```

#### Available Log Files

**Current log files (with proper rotation):**
```bash
# Main server log (all activity including debug info) - 20MB max, 10 backups
tail -f logs/mcp_server.log

# Tool activity only (TOOL_CALL, TOOL_COMPLETED, etc.) - 20MB max, 5 backups  
tail -f logs/mcp_activity.log
```

**For programmatic log analysis (used by tests):**
```python
# Import the LogUtils class from simulator tests
from simulator_tests.log_utils import LogUtils

# Get recent logs
recent_logs = LogUtils.get_recent_server_logs(lines=500)

# Check for errors
errors = LogUtils.check_server_logs_for_errors()

# Search for specific patterns
matches = LogUtils.search_logs_for_pattern("TOOL_CALL.*debug")
```

### Testing

Simulation tests are available to test the MCP server in a 'live' scenario, using your configured
API keys to ensure the models are working and the server is able to communicate back and forth. 

**IMPORTANT**: After any code changes, restart your Claude session for the changes to take effect.

#### Run All Simulator Tests
```bash
# Run the complete test suite
python communication_simulator_test.py

# Run tests with verbose output
python communication_simulator_test.py --verbose
```

#### Quick Test Mode (Recommended for Time-Limited Testing)
```bash
# Run quick test mode - 6 essential tests that provide maximum functionality coverage
python communication_simulator_test.py --quick

# Run quick test mode with verbose output
python communication_simulator_test.py --quick --verbose
```

**Quick mode runs these 6 essential tests:**
- `cross_tool_continuation` - Cross-tool conversation memory testing (chat, thinkdeep, codereview, analyze, debug)
- `conversation_chain_validation` - Core conversation threading and memory validation
- `consensus_workflow_accurate` - Consensus tool with flash model and stance testing
- `codereview_validation` - CodeReview tool with flash model and multi-step workflows
- `planner_validation` - Planner tool with flash model and complex planning workflows
- `token_allocation_validation` - Token allocation and conversation history buildup testing

**Why these 6 tests:** They cover the core functionality including conversation memory (`utils/conversation_memory.py`), chat tool functionality, file processing and deduplication, model selection (flash/flashlite/o3), and cross-tool conversation workflows. These tests validate the most critical parts of the system in minimal time.

**Note:** Some workflow tools (analyze, codereview, planner, consensus, etc.) require specific workflow parameters and may need individual testing rather than quick mode testing.

#### Run Individual Simulator Tests (For Detailed Testing)
```bash
# List all available tests
python communication_simulator_test.py --list-tests

# RECOMMENDED: Run tests individually for better isolation and debugging
python communication_simulator_test.py --individual basic_conversation
python communication_simulator_test.py --individual content_validation
python communication_simulator_test.py --individual cross_tool_continuation
python communication_simulator_test.py --individual memory_validation

# Run multiple specific tests
python communication_simulator_test.py --tests basic_conversation content_validation

# Run individual test with verbose output for debugging
python communication_simulator_test.py --individual memory_validation --verbose
```

Available simulator tests include:
- `basic_conversation` - Basic conversation flow with chat tool
- `content_validation` - Content validation and duplicate detection
- `per_tool_deduplication` - File deduplication for individual tools
- `cross_tool_continuation` - Cross-tool conversation continuation scenarios
- `cross_tool_comprehensive` - Comprehensive cross-tool file deduplication and continuation
- `line_number_validation` - Line number handling validation across tools
- `memory_validation` - Conversation memory validation
- `model_thinking_config` - Model-specific thinking configuration behavior
- `o3_model_selection` - O3 model selection and usage validation
- `ollama_custom_url` - Ollama custom URL endpoint functionality
- `openrouter_fallback` - OpenRouter fallback behavior when only provider
- `openrouter_models` - OpenRouter model functionality and alias mapping
- `token_allocation_validation` - Token allocation and conversation history validation
- `testgen_validation` - TestGen tool validation with specific test function
- `refactor_validation` - Refactor tool validation with codesmells
- `conversation_chain_validation` - Conversation chain and threading validation
- `consensus_stance` - Consensus tool validation with stance steering (for/against/neutral)

**Note**: All simulator tests should be run individually for optimal testing and better error isolation.

#### Run Unit Tests Only
```bash
# Run all unit tests (excluding integration tests that require API keys)
python -m pytest tests/ -v -m "not integration"

# Run specific test file
python -m pytest tests/test_refactor.py -v

# Run specific test function
python -m pytest tests/test_refactor.py::TestRefactorTool::test_format_response -v

# Run tests with coverage
python -m pytest tests/ --cov=. --cov-report=html -m "not integration"
```

#### Run Integration Tests (Uses Free Local Models)

**Setup Requirements:**
```bash
# 1. Install Ollama (if not already installed)
# Visit https://ollama.ai or use brew install ollama

# 2. Start Ollama service
ollama serve

# 3. Pull a model (e.g., llama3.2)
ollama pull llama3.2

# 4. Set environment variable for custom provider
export CUSTOM_API_URL="http://localhost:11434"
```

**Run Integration Tests:**
```bash
# Run integration tests that make real API calls to local models
python -m pytest tests/ -v -m "integration"

# Run specific integration test
python -m pytest tests/test_prompt_regression.py::TestPromptIntegration::test_chat_normal_prompt -v

# Run all tests (unit + integration)
python -m pytest tests/ -v
```

**Note**: Integration tests use the local-llama model via Ollama, which is completely FREE to run unlimited times. Requires `CUSTOM_API_URL` environment variable set to your local Ollama endpoint. They can be run safely in CI/CD but are excluded from code quality checks to keep them fast.

### Development Workflow

#### Before Making Changes
1. Ensure virtual environment is activated: `source .zen_venv/bin/activate`
2. Run quality checks: `./code_quality_checks.sh`
3. Check logs to ensure server is healthy: `tail -n 50 logs/mcp_server.log`

#### After Making Changes
1. Run quality checks again: `./code_quality_checks.sh`
2. Run integration tests locally: `./run_integration_tests.sh`
3. Run quick test mode for fast validation: `python communication_simulator_test.py --quick`
4. Run relevant specific simulator tests if needed: `python communication_simulator_test.py --individual <test_name>`
5. Check logs for any issues: `tail -n 100 logs/mcp_server.log`
6. Restart Claude session to use updated code

#### Before Committing/PR
1. Final quality check: `./code_quality_checks.sh`
2. Run integration tests: `./run_integration_tests.sh`
3. Run quick test mode: `python communication_simulator_test.py --quick`
4. Run full simulator test suite (optional): `./run_integration_tests.sh --with-simulator`
5. Verify all tests pass 100%

### Common Troubleshooting

#### Server Issues
```bash
# Check if Python environment is set up correctly
./run-server.sh

# View recent errors
grep "ERROR" logs/mcp_server.log | tail -20

# Check virtual environment
which python
# Should show: .../zen-mcp-server/.zen_venv/bin/python
```

#### Test Failures
```bash
# First try quick test mode to see if it's a general issue
python communication_simulator_test.py --quick --verbose

# Run individual failing test with verbose output
python communication_simulator_test.py --individual <test_name> --verbose

# Check server logs during test execution
tail -f logs/mcp_server.log

# Run tests with debug output
LOG_LEVEL=DEBUG python communication_simulator_test.py --individual <test_name>
```

#### Linting Issues
```bash
# Auto-fix most linting issues
ruff check . --fix
black .
isort .

# Check what would be changed without applying
ruff check .
black --check .
isort --check-only .
```

### File Structure Context

- `./code_quality_checks.sh` - Comprehensive quality check script
- `./run-server.sh` - Server setup and management
- `communication_simulator_test.py` - End-to-end testing framework
- `simulator_tests/` - Individual test modules
- `tests/` - Unit test suite
- `tools/` - MCP tool implementations
- `providers/` - AI provider implementations
- `systemprompts/` - System prompt definitions
- `logs/` - Server log files

### Environment Requirements

- Python 3.9+ with virtual environment
- All dependencies from `requirements.txt` installed
- Proper API keys configured in `.env` file

This guide provides everything needed to efficiently work with the Zen MCP Server codebase using Claude. Always run quality checks before and after making changes to ensure code integrity.
```

--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------

```python
# Tests for Zen MCP Server

```

--------------------------------------------------------------------------------
/conf/__init__.py:
--------------------------------------------------------------------------------

```python
"""Configuration data for Zen MCP Server."""

```

--------------------------------------------------------------------------------
/.claude/settings.json:
--------------------------------------------------------------------------------

```json
{
  "permissions": {
    "allow": [
    ],
    "deny": []
  }
}
```

--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------

```yaml
# These are supported funding model platforms

github: [guidedways]

```

--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------

```
pytest>=7.4.0
pytest-asyncio>=0.21.0
pytest-mock>=3.11.0
black>=23.0.0
ruff>=0.1.0
isort>=5.12.0
python-semantic-release>=10.3.0
build>=1.0.0

```

--------------------------------------------------------------------------------
/clink/__init__.py:
--------------------------------------------------------------------------------

```python
"""Public helpers for clink components."""

from __future__ import annotations

from .registry import ClinkRegistry, get_registry

__all__ = ["ClinkRegistry", "get_registry"]

```

--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------

```
[pytest]
testpaths = tests
python_files = test_*.py
python_classes = Test*
python_functions = test_*
asyncio_mode = auto
addopts = 
    -v
    --strict-markers
    --tb=short
markers =
    integration: marks tests as integration tests that make real API calls with local-llama (free to run)
```

--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------

```
mcp>=1.0.0
google-genai>=1.19.0
openai>=1.55.2  # Minimum version for httpx 0.28.0 compatibility
pydantic>=2.0.0
python-dotenv>=1.0.0
importlib-resources>=5.0.0; python_version<"3.9"

# Development dependencies (install with pip install -r requirements-dev.txt)
# pytest>=7.4.0
# pytest-asyncio>=0.21.0
# pytest-mock>=3.11.0
```

--------------------------------------------------------------------------------
/providers/shared/provider_type.py:
--------------------------------------------------------------------------------

```python
"""Enumeration describing which backend owns a given model."""

from enum import Enum

__all__ = ["ProviderType"]


class ProviderType(Enum):
    """Canonical identifiers for every supported provider backend."""

    GOOGLE = "google"
    OPENAI = "openai"
    AZURE = "azure"
    XAI = "xai"
    OPENROUTER = "openrouter"
    CUSTOM = "custom"
    DIAL = "dial"

```

--------------------------------------------------------------------------------
/examples/claude_config_macos.json:
--------------------------------------------------------------------------------

```json
{
  "comment": "macOS configuration using standalone server",
  "comment2": "Run './run-server.sh' to set up the environment and get exact paths",
  "comment3": "Use './run-server.sh -c' to display the correct configuration",
  "mcpServers": {
    "zen": {
      "command": "/path/to/zen-mcp-server/.zen_venv/bin/python",
      "args": ["/path/to/zen-mcp-server/server.py"]
    }
  }
}
```

--------------------------------------------------------------------------------
/claude_config_example.json:
--------------------------------------------------------------------------------

```json
{
  "comment": "Example Claude Desktop configuration for Zen MCP Server",
  "comment2": "Run './run-server.sh -c' to get the exact configuration for your system",
  "comment3": "For platform-specific examples, see the examples/ directory",
  "mcpServers": {
    "zen": {
      "command": "/path/to/zen-mcp-server/.zen_venv/bin/python",
      "args": ["/path/to/zen-mcp-server/server.py"]
    }
  }
}
```

--------------------------------------------------------------------------------
/examples/claude_config_wsl.json:
--------------------------------------------------------------------------------

```json
{
  "comment": "Windows configuration using WSL with standalone server",
  "comment2": "Run './run-server.sh' in WSL to set up the environment and get exact paths",
  "comment3": "Use './run-server.sh -c' to display the correct configuration",
  "mcpServers": {
    "zen": {
      "command": "wsl.exe",
      "args": [
        "/path/to/zen-mcp-server/.zen_venv/bin/python",
        "/path/to/zen-mcp-server/server.py"
      ]
    }
  }
}
```

--------------------------------------------------------------------------------
/conf/cli_clients/gemini.json:
--------------------------------------------------------------------------------

```json
{
  "name": "gemini",
  "command": "gemini",
  "additional_args": [
    "--yolo"
  ],
  "env": {},
  "roles": {
    "default": {
      "prompt_path": "systemprompts/clink/default.txt",
      "role_args": []
    },
    "planner": {
      "prompt_path": "systemprompts/clink/default_planner.txt",
      "role_args": []
    },
    "codereviewer": {
      "prompt_path": "systemprompts/clink/default_codereviewer.txt",
      "role_args": []
    }
  }
}

```

--------------------------------------------------------------------------------
/.claude/commands/fix-github-issue.md:
--------------------------------------------------------------------------------

```markdown
Please analyze and fix the GitHub issue: $ARGUMENTS.

Follow these steps:

1. Use `gh issue view` to get the issue details
2. Understand the problem described in the issue
3. Search the codebase for relevant files
4. Implement the necessary changes to fix the issue
5. Write and run tests to verify the fix
6. Ensure code passes linting and type checking
7. Create a descriptive commit message
8. Push and create a PR

Remember to use the GitHub CLI (`gh`) for all GitHub-related tasks.

```

--------------------------------------------------------------------------------
/tools/simple/__init__.py:
--------------------------------------------------------------------------------

```python
"""
Simple tools for Zen MCP.

Simple tools follow a basic request → AI model → response pattern.
They inherit from SimpleTool which provides streamlined functionality
for tools that don't need multi-step workflows.

Available simple tools:
- chat: General chat and collaborative thinking
- consensus: Multi-perspective analysis
- listmodels: Model listing and information
- testgen: Test generation
- tracer: Execution tracing
"""

from .base import SimpleTool

__all__ = ["SimpleTool"]

```

--------------------------------------------------------------------------------
/conf/cli_clients/codex.json:
--------------------------------------------------------------------------------

```json
{
  "name": "codex",
  "command": "codex",
  "additional_args": [
    "--json",
    "--dangerously-bypass-approvals-and-sandbox"
  ],
  "env": {},
  "roles": {
    "default": {
      "prompt_path": "systemprompts/clink/default.txt",
      "role_args": []
    },
    "planner": {
      "prompt_path": "systemprompts/clink/default_planner.txt",
      "role_args": []
    },
    "codereviewer": {
      "prompt_path": "systemprompts/clink/codex_codereviewer.txt",
      "role_args": []
    }
  }
}

```

--------------------------------------------------------------------------------
/conf/cli_clients/claude.json:
--------------------------------------------------------------------------------

```json
{
  "name": "claude",
  "command": "claude",
  "additional_args": [
    "--permission-mode",
    "acceptEdits",
    "--model",
    "sonnet"
  ],
  "env": {},
  "roles": {
    "default": {
      "prompt_path": "systemprompts/clink/default.txt",
      "role_args": []
    },
    "planner": {
      "prompt_path": "systemprompts/clink/default_planner.txt",
      "role_args": []
    },
    "codereviewer": {
      "prompt_path": "systemprompts/clink/default_codereviewer.txt",
      "role_args": []
    }
  }
}

```

--------------------------------------------------------------------------------
/tools/shared/__init__.py:
--------------------------------------------------------------------------------

```python
"""
Shared infrastructure for Zen MCP tools.

This module contains the core base classes and utilities that are shared
across all tool types. It provides the foundation for the tool architecture.
"""

from .base_models import BaseWorkflowRequest, ConsolidatedFindings, ToolRequest, WorkflowRequest
from .base_tool import BaseTool
from .schema_builders import SchemaBuilder

__all__ = [
    "BaseTool",
    "ToolRequest",
    "BaseWorkflowRequest",
    "WorkflowRequest",
    "ConsolidatedFindings",
    "SchemaBuilder",
]

```

--------------------------------------------------------------------------------
/providers/registries/__init__.py:
--------------------------------------------------------------------------------

```python
"""Registry implementations for provider capability manifests."""

from .azure import AzureModelRegistry
from .custom import CustomEndpointModelRegistry
from .dial import DialModelRegistry
from .gemini import GeminiModelRegistry
from .openai import OpenAIModelRegistry
from .openrouter import OpenRouterModelRegistry
from .xai import XAIModelRegistry

__all__ = [
    "AzureModelRegistry",
    "CustomEndpointModelRegistry",
    "DialModelRegistry",
    "GeminiModelRegistry",
    "OpenAIModelRegistry",
    "OpenRouterModelRegistry",
    "XAIModelRegistry",
]

```

--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------

```python
"""
Utility functions for Zen MCP Server
"""

from .file_types import CODE_EXTENSIONS, FILE_CATEGORIES, PROGRAMMING_EXTENSIONS, TEXT_EXTENSIONS
from .file_utils import expand_paths, read_file_content, read_files
from .security_config import EXCLUDED_DIRS
from .token_utils import check_token_limit, estimate_tokens

__all__ = [
    "read_files",
    "read_file_content",
    "expand_paths",
    "CODE_EXTENSIONS",
    "PROGRAMMING_EXTENSIONS",
    "TEXT_EXTENSIONS",
    "FILE_CATEGORIES",
    "EXCLUDED_DIRS",
    "estimate_tokens",
    "check_token_limit",
]

```

--------------------------------------------------------------------------------
/providers/shared/__init__.py:
--------------------------------------------------------------------------------

```python
"""Shared data structures and helpers for model providers."""

from .model_capabilities import ModelCapabilities
from .model_response import ModelResponse
from .provider_type import ProviderType
from .temperature import (
    DiscreteTemperatureConstraint,
    FixedTemperatureConstraint,
    RangeTemperatureConstraint,
    TemperatureConstraint,
)

__all__ = [
    "ModelCapabilities",
    "ModelResponse",
    "ProviderType",
    "TemperatureConstraint",
    "FixedTemperatureConstraint",
    "RangeTemperatureConstraint",
    "DiscreteTemperatureConstraint",
]

```

--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------

```yaml
blank_issues_enabled: false
contact_links:
  - name: 💬 General Discussion
    url: https://github.com/BeehiveInnovations/zen-mcp-server/discussions
    about: Ask questions, share ideas, or discuss usage patterns with the community
  - name: 📚 Documentation
    url: https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/README.md
    about: Check the README for setup instructions and usage examples
  - name: 🤝 Contributing Guide
    url: https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/CONTRIBUTING.md
    about: Learn how to contribute to the project


```

--------------------------------------------------------------------------------
/providers/registries/xai.py:
--------------------------------------------------------------------------------

```python
"""Registry loader for X.AI model capabilities."""

from __future__ import annotations

from ..shared import ProviderType
from .base import CapabilityModelRegistry


class XAIModelRegistry(CapabilityModelRegistry):
    """Capability registry backed by ``conf/xai_models.json``."""

    def __init__(self, config_path: str | None = None) -> None:
        super().__init__(
            env_var_name="XAI_MODELS_CONFIG_PATH",
            default_filename="xai_models.json",
            provider=ProviderType.XAI,
            friendly_prefix="X.AI ({model})",
            config_path=config_path,
        )

```

--------------------------------------------------------------------------------
/providers/registries/dial.py:
--------------------------------------------------------------------------------

```python
"""Registry loader for DIAL provider capabilities."""

from __future__ import annotations

from ..shared import ProviderType
from .base import CapabilityModelRegistry


class DialModelRegistry(CapabilityModelRegistry):
    """Capability registry backed by ``conf/dial_models.json``."""

    def __init__(self, config_path: str | None = None) -> None:
        super().__init__(
            env_var_name="DIAL_MODELS_CONFIG_PATH",
            default_filename="dial_models.json",
            provider=ProviderType.DIAL,
            friendly_prefix="DIAL ({model})",
            config_path=config_path,
        )

```

--------------------------------------------------------------------------------
/clink/parsers/base.py:
--------------------------------------------------------------------------------

```python
"""Parser interfaces for clink runner outputs."""

from __future__ import annotations

from dataclasses import dataclass
from typing import Any


@dataclass
class ParsedCLIResponse:
    """Result of parsing CLI stdout/stderr."""

    content: str
    metadata: dict[str, Any]


class ParserError(RuntimeError):
    """Raised when CLI output cannot be parsed into a structured response."""


class BaseParser:
    """Base interface for CLI output parsers."""

    name: str = "base"

    def parse(self, stdout: str, stderr: str) -> ParsedCLIResponse:
        raise NotImplementedError("Parsers must implement parse()")

```

--------------------------------------------------------------------------------
/providers/registries/gemini.py:
--------------------------------------------------------------------------------

```python
"""Registry loader for Gemini model capabilities."""

from __future__ import annotations

from ..shared import ProviderType
from .base import CapabilityModelRegistry


class GeminiModelRegistry(CapabilityModelRegistry):
    """Capability registry backed by ``conf/gemini_models.json``."""

    def __init__(self, config_path: str | None = None) -> None:
        super().__init__(
            env_var_name="GEMINI_MODELS_CONFIG_PATH",
            default_filename="gemini_models.json",
            provider=ProviderType.GOOGLE,
            friendly_prefix="Gemini ({model})",
            config_path=config_path,
        )

```

--------------------------------------------------------------------------------
/providers/registries/openai.py:
--------------------------------------------------------------------------------

```python
"""Registry loader for OpenAI model capabilities."""

from __future__ import annotations

from ..shared import ProviderType
from .base import CapabilityModelRegistry


class OpenAIModelRegistry(CapabilityModelRegistry):
    """Capability registry backed by ``conf/openai_models.json``."""

    def __init__(self, config_path: str | None = None) -> None:
        super().__init__(
            env_var_name="OPENAI_MODELS_CONFIG_PATH",
            default_filename="openai_models.json",
            provider=ProviderType.OPENAI,
            friendly_prefix="OpenAI ({model})",
            config_path=config_path,
        )

```

--------------------------------------------------------------------------------
/providers/__init__.py:
--------------------------------------------------------------------------------

```python
"""Model provider abstractions for supporting multiple AI providers."""

from .azure_openai import AzureOpenAIProvider
from .base import ModelProvider
from .gemini import GeminiModelProvider
from .openai import OpenAIModelProvider
from .openai_compatible import OpenAICompatibleProvider
from .openrouter import OpenRouterProvider
from .registry import ModelProviderRegistry
from .shared import ModelCapabilities, ModelResponse

__all__ = [
    "ModelProvider",
    "ModelResponse",
    "ModelCapabilities",
    "ModelProviderRegistry",
    "AzureOpenAIProvider",
    "GeminiModelProvider",
    "OpenAIModelProvider",
    "OpenAICompatibleProvider",
    "OpenRouterProvider",
]

```

--------------------------------------------------------------------------------
/providers/shared/model_response.py:
--------------------------------------------------------------------------------

```python
"""Dataclass used to normalise provider SDK responses."""

from dataclasses import dataclass, field
from typing import Any

from .provider_type import ProviderType

__all__ = ["ModelResponse"]


@dataclass
class ModelResponse:
    """Portable representation of a provider completion."""

    content: str
    usage: dict[str, int] = field(default_factory=dict)
    model_name: str = ""
    friendly_name: str = ""
    provider: ProviderType = ProviderType.GOOGLE
    metadata: dict[str, Any] = field(default_factory=dict)

    @property
    def total_tokens(self) -> int:
        """Return the total token count if the provider reported usage data."""

        return self.usage.get("total_tokens", 0)

```

--------------------------------------------------------------------------------
/clink/agents/__init__.py:
--------------------------------------------------------------------------------

```python
"""Agent factory for clink CLI integrations."""

from __future__ import annotations

from clink.models import ResolvedCLIClient

from .base import AgentOutput, BaseCLIAgent, CLIAgentError
from .claude import ClaudeAgent
from .codex import CodexAgent
from .gemini import GeminiAgent

_AGENTS: dict[str, type[BaseCLIAgent]] = {
    "gemini": GeminiAgent,
    "codex": CodexAgent,
    "claude": ClaudeAgent,
}


def create_agent(client: ResolvedCLIClient) -> BaseCLIAgent:
    agent_key = (client.runner or client.name).lower()
    agent_cls = _AGENTS.get(agent_key, BaseCLIAgent)
    return agent_cls(client)


__all__ = [
    "AgentOutput",
    "BaseCLIAgent",
    "CLIAgentError",
    "create_agent",
]

```

--------------------------------------------------------------------------------
/tests/test_clink_parsers.py:
--------------------------------------------------------------------------------

```python
import pytest

from clink.parsers.base import ParserError
from clink.parsers.codex import CodexJSONLParser


def test_codex_parser_success():
    parser = CodexJSONLParser()
    stdout = """
{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"Hello"}}
{"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":5}}
"""
    parsed = parser.parse(stdout=stdout, stderr="")
    assert parsed.content == "Hello"
    assert parsed.metadata["usage"]["output_tokens"] == 5


def test_codex_parser_requires_agent_message():
    parser = CodexJSONLParser()
    stdout = '{"type":"turn.completed"}'
    with pytest.raises(ParserError):
        parser.parse(stdout=stdout, stderr="")

```

--------------------------------------------------------------------------------
/tools/shared/exceptions.py:
--------------------------------------------------------------------------------

```python
"""
Custom exceptions for Zen MCP tools.

These exceptions allow tools to signal protocol-level errors that should be surfaced
to MCP clients using the `isError` flag on `CallToolResult`. Raising one of these
exceptions ensures the low-level server adapter marks the result as an error while
preserving the structured payload we pass through the exception message.
"""


class ToolExecutionError(RuntimeError):
    """Raised to indicate a tool-level failure that must set `isError=True`."""

    def __init__(self, payload: str):
        """
        Args:
            payload: Serialized error payload (typically JSON) to return to the client.
        """
        super().__init__(payload)
        self.payload = payload

```

--------------------------------------------------------------------------------
/systemprompts/clink/default.txt:
--------------------------------------------------------------------------------

```
You are an external CLI agent operating inside the Zen MCP server with full repository access.

- Use terminal tools to inspect files and gather context before responding; cite exact paths, symbols, or commands when they matter.
- Provide concise, actionable responses in Markdown tailored to engineers working from the CLI.
- Keep output tight—prefer summaries and short bullet lists, and avoid quoting large sections of source unless essential.
- Surface assumptions, missing inputs, or follow-up checks that would improve confidence in the result.
- If a request is unsafe or unsupported, explain the limitation and suggest a safer alternative.
- Always conclude with `<SUMMARY>...</SUMMARY>` containing a terse (≤500 words) recap of key findings and immediate next steps.

```

--------------------------------------------------------------------------------
/clink/parsers/__init__.py:
--------------------------------------------------------------------------------

```python
"""Parser registry for clink."""

from __future__ import annotations

from .base import BaseParser, ParsedCLIResponse, ParserError
from .claude import ClaudeJSONParser
from .codex import CodexJSONLParser
from .gemini import GeminiJSONParser

_PARSER_CLASSES: dict[str, type[BaseParser]] = {
    CodexJSONLParser.name: CodexJSONLParser,
    GeminiJSONParser.name: GeminiJSONParser,
    ClaudeJSONParser.name: ClaudeJSONParser,
}


def get_parser(name: str) -> BaseParser:
    normalized = (name or "").lower()
    if normalized not in _PARSER_CLASSES:
        raise ParserError(f"No parser registered for '{name}'")
    parser_cls = _PARSER_CLASSES[normalized]
    return parser_cls()


__all__ = [
    "BaseParser",
    "ParsedCLIResponse",
    "ParserError",
    "get_parser",
]

```

--------------------------------------------------------------------------------
/tools/workflow/__init__.py:
--------------------------------------------------------------------------------

```python
"""
Workflow tools for Zen MCP.

Workflow tools follow a multi-step pattern with forced pauses between steps
to encourage thorough investigation and analysis. They inherit from WorkflowTool
which combines BaseTool with BaseWorkflowMixin.

Available workflow tools:
- debug: Systematic investigation and root cause analysis
- planner: Sequential planning (special case - no AI calls)
- analyze: Code analysis workflow
- codereview: Code review workflow
- precommit: Pre-commit validation workflow
- refactor: Refactoring analysis workflow
- thinkdeep: Deep thinking workflow
"""

from .base import WorkflowTool
from .schema_builders import WorkflowSchemaBuilder
from .workflow_mixin import BaseWorkflowMixin

__all__ = ["WorkflowTool", "WorkflowSchemaBuilder", "BaseWorkflowMixin"]

```

--------------------------------------------------------------------------------
/systemprompts/clink/default_planner.txt:
--------------------------------------------------------------------------------

```
You are the planning agent operating through the Zen MCP server.

- Respond with JSON only using the planning schema fields (status, step_number, total_steps, metadata, plan_summary, etc.); request missing context via the required `files_required_to_continue` JSON structure.
- Inspect any relevant files, scripts, or docs before outlining the plan; leverage your full CLI access for research.
- Break work into numbered phases with dependencies, validation gates, alternatives, and explicit next actions; highlight risks with mitigations.
- Keep each step concise—avoid repeating source excerpts and limit descriptions to the essentials another engineer needs to execute.
- Ensure the `plan_summary` (when planning is complete) is compact (≤500 words) and captures phases, risks, and immediate next actions.

```

--------------------------------------------------------------------------------
/tests/test_consensus_schema.py:
--------------------------------------------------------------------------------

```python
"""Schema-related tests for ConsensusTool."""

from types import MethodType

from tools.consensus import ConsensusTool


def test_consensus_models_field_includes_available_models(monkeypatch):
    """Consensus schema should surface available model guidance like single-model tools."""

    tool = ConsensusTool()

    monkeypatch.setattr(
        tool,
        "_get_ranked_model_summaries",
        MethodType(lambda self, limit=5: (["gemini-2.5-pro (score 100, 1.0M ctx, thinking)"], 1, False), tool),
    )
    monkeypatch.setattr(tool, "_get_restriction_note", MethodType(lambda self: None, tool))

    schema = tool.get_input_schema()
    models_field_description = schema["properties"]["models"]["description"]

    assert "listmodels" in models_field_description
    assert "Top models" in models_field_description

```

--------------------------------------------------------------------------------
/systemprompts/clink/default_codereviewer.txt:
--------------------------------------------------------------------------------

```
You are an external CLI code reviewer operating inside the Zen MCP server with full repository access.

- Inspect any relevant files directly—run linters or tests as needed—and mention important commands you rely on.
- Report findings in severity order (Critical, High, Medium, Low) across security, correctness, performance, and maintainability while staying within the provided scope.
- Keep feedback succinct—prioritise the highest-impact issues, avoid large code dumps, and summarise recommendations clearly.
- For each issue cite precise references (file:line plus a short excerpt or symbol name), describe the impact, and recommend a concrete fix or mitigation.
- Recognise positive practices worth keeping so peers understand what to preserve.
- Always conclude with `<SUMMARY>...</SUMMARY>` highlighting the top risks, recommended fixes, and key positives in ≤500 words.

```

--------------------------------------------------------------------------------
/systemprompts/clink/codex_codereviewer.txt:
--------------------------------------------------------------------------------

```
/review You are the Codex CLI code reviewer operating inside the Zen MCP server with full repository access.

- Inspect any relevant files directly—use your full repository access, run linters or tests as needed, and mention key commands when they inform your findings.
- Report issues in severity order (Critical, High, Medium, Low) spanning security, correctness, performance, and maintainability while staying within scope.
- Keep the review succinct—prioritize the highest-impact findings, avoid extensive code dumps, and summarise recommendations clearly.
- For each issue cite precise references (file:line plus a short excerpt or symbol name), describe the impact, and recommend a concrete fix or mitigation.
- Recognise positive practices worth keeping so peers understand what to preserve.
- Always conclude with `<SUMMARY>...</SUMMARY>` capturing the top issues, fixes, and positives in ≤500 words.

```

--------------------------------------------------------------------------------
/scripts/sync_version.py:
--------------------------------------------------------------------------------

```python
#!/usr/bin/env python3
"""
Sync version from pyproject.toml to config.py
This script is called by GitHub Actions after semantic-release updates the version
"""

import re
from datetime import datetime

import toml


def update_config_version():
    # Read version from pyproject.toml
    with open("pyproject.toml") as f:
        data = toml.load(f)
        version = data["project"]["version"]

    # Read current config.py
    with open("config.py") as f:
        content = f.read()

    # Update version
    content = re.sub(r'__version__ = "[^"]*"', f'__version__ = "{version}"', content)

    # Update date to current date
    today = datetime.now().strftime("%Y-%m-%d")
    content = re.sub(r'__updated__ = "[^"]*"', f'__updated__ = "{today}"', content)

    # Write back
    with open("config.py", "w") as f:
        f.write(content)

    print(f"Updated config.py to version {version}")


if __name__ == "__main__":
    update_config_version()

```

--------------------------------------------------------------------------------
/providers/registries/custom.py:
--------------------------------------------------------------------------------

```python
"""Registry loader for custom OpenAI-compatible endpoints."""

from __future__ import annotations

from ..shared import ModelCapabilities, ProviderType
from .base import CAPABILITY_FIELD_NAMES, CapabilityModelRegistry


class CustomEndpointModelRegistry(CapabilityModelRegistry):
    """Capability registry backed by ``conf/custom_models.json``."""

    def __init__(self, config_path: str | None = None) -> None:
        super().__init__(
            env_var_name="CUSTOM_MODELS_CONFIG_PATH",
            default_filename="custom_models.json",
            provider=ProviderType.CUSTOM,
            friendly_prefix="Custom ({model})",
            config_path=config_path,
        )

    def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]:
        filtered = {k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES}
        filtered.setdefault("provider", ProviderType.CUSTOM)
        capability = ModelCapabilities(**filtered)
        return capability, {}

```

--------------------------------------------------------------------------------
/systemprompts/__init__.py:
--------------------------------------------------------------------------------

```python
"""
System prompts for Gemini tools
"""

from .analyze_prompt import ANALYZE_PROMPT
from .chat_prompt import CHAT_PROMPT
from .codereview_prompt import CODEREVIEW_PROMPT
from .consensus_prompt import CONSENSUS_PROMPT
from .debug_prompt import DEBUG_ISSUE_PROMPT
from .docgen_prompt import DOCGEN_PROMPT
from .generate_code_prompt import GENERATE_CODE_PROMPT
from .planner_prompt import PLANNER_PROMPT
from .precommit_prompt import PRECOMMIT_PROMPT
from .refactor_prompt import REFACTOR_PROMPT
from .secaudit_prompt import SECAUDIT_PROMPT
from .testgen_prompt import TESTGEN_PROMPT
from .thinkdeep_prompt import THINKDEEP_PROMPT
from .tracer_prompt import TRACER_PROMPT

__all__ = [
    "THINKDEEP_PROMPT",
    "CODEREVIEW_PROMPT",
    "DEBUG_ISSUE_PROMPT",
    "DOCGEN_PROMPT",
    "GENERATE_CODE_PROMPT",
    "ANALYZE_PROMPT",
    "CHAT_PROMPT",
    "CONSENSUS_PROMPT",
    "PLANNER_PROMPT",
    "PRECOMMIT_PROMPT",
    "REFACTOR_PROMPT",
    "SECAUDIT_PROMPT",
    "TESTGEN_PROMPT",
    "TRACER_PROMPT",
]

```

--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------

```python
"""
Tool implementations for Zen MCP Server
"""

from .analyze import AnalyzeTool
from .apilookup import LookupTool
from .challenge import ChallengeTool
from .chat import ChatTool
from .clink import CLinkTool
from .codereview import CodeReviewTool
from .consensus import ConsensusTool
from .debug import DebugIssueTool
from .docgen import DocgenTool
from .listmodels import ListModelsTool
from .planner import PlannerTool
from .precommit import PrecommitTool
from .refactor import RefactorTool
from .secaudit import SecauditTool
from .testgen import TestGenTool
from .thinkdeep import ThinkDeepTool
from .tracer import TracerTool
from .version import VersionTool

__all__ = [
    "ThinkDeepTool",
    "CodeReviewTool",
    "DebugIssueTool",
    "DocgenTool",
    "AnalyzeTool",
    "LookupTool",
    "ChatTool",
    "CLinkTool",
    "ConsensusTool",
    "ListModelsTool",
    "PlannerTool",
    "PrecommitTool",
    "ChallengeTool",
    "RefactorTool",
    "SecauditTool",
    "TestGenTool",
    "TracerTool",
    "VersionTool",
]

```

--------------------------------------------------------------------------------
/docs/gemini-setup.md:
--------------------------------------------------------------------------------

```markdown
# Gemini CLI Setup

> **Note**: While Zen MCP Server connects successfully to Gemini CLI, tool invocation is not working
> correctly yet. We'll update this guide once the integration is fully functional.

This guide explains how to configure Zen MCP Server to work with [Gemini CLI](https://github.com/google-gemini/gemini-cli).

## Prerequisites

- Zen MCP Server installed and configured
- Gemini CLI installed
- At least one API key configured in your `.env` file

## Configuration

1. Edit `~/.gemini/settings.json` and add:

```json
{
  "mcpServers": {
    "zen": {
      "command": "/path/to/zen-mcp-server/zen-mcp-server"
    }
  }
}
```

2. Replace `/path/to/zen-mcp-server` with your actual Zen installation path.

3. If the `zen-mcp-server` wrapper script doesn't exist, create it:

```bash
#!/bin/bash
DIR="$(cd "$(dirname "$0")" && pwd)"
cd "$DIR"
exec .zen_venv/bin/python server.py "$@"
```

Then make it executable: `chmod +x zen-mcp-server`

4. Restart Gemini CLI.

All 15 Zen tools are now available in your Gemini CLI session.
```

--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------

```python
"""
Tests for configuration
"""

from config import (
    DEFAULT_MODEL,
    TEMPERATURE_ANALYTICAL,
    TEMPERATURE_BALANCED,
    TEMPERATURE_CREATIVE,
    __author__,
    __updated__,
    __version__,
)


class TestConfig:
    """Test configuration values"""

    def test_version_info(self):
        """Test version information exists and has correct format"""
        # Check version format (e.g., "2.4.1")
        assert isinstance(__version__, str)
        assert len(__version__.split(".")) == 3  # Major.Minor.Patch

        # Check author
        assert __author__ == "Fahad Gilani"

        # Check updated date exists (don't assert on specific format/value)
        assert isinstance(__updated__, str)

    def test_model_config(self):
        """Test model configuration"""
        # DEFAULT_MODEL is set in conftest.py for tests
        assert DEFAULT_MODEL == "gemini-2.5-flash"

    def test_temperature_defaults(self):
        """Test temperature constants"""
        assert TEMPERATURE_ANALYTICAL == 0.2
        assert TEMPERATURE_BALANCED == 0.5
        assert TEMPERATURE_CREATIVE == 0.7

```

--------------------------------------------------------------------------------
/clink/agents/codex.py:
--------------------------------------------------------------------------------

```python
"""Codex-specific CLI agent hooks."""

from __future__ import annotations

from clink.models import ResolvedCLIClient
from clink.parsers.base import ParserError

from .base import AgentOutput, BaseCLIAgent


class CodexAgent(BaseCLIAgent):
    """Codex CLI agent with JSONL recovery support."""

    def __init__(self, client: ResolvedCLIClient):
        super().__init__(client)

    def _recover_from_error(
        self,
        *,
        returncode: int,
        stdout: str,
        stderr: str,
        sanitized_command: list[str],
        duration_seconds: float,
        output_file_content: str | None,
    ) -> AgentOutput | None:
        try:
            parsed = self._parser.parse(stdout, stderr)
        except ParserError:
            return None

        return AgentOutput(
            parsed=parsed,
            sanitized_command=sanitized_command,
            returncode=returncode,
            stdout=stdout,
            stderr=stderr,
            duration_seconds=duration_seconds,
            parser_name=self._parser.name,
            output_file_content=output_file_content,
        )

```

--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------

```markdown
# Zen MCP Server Documentation

| Document | Description |
|----------|-------------|
| [Getting Started](getting-started.md) | Installation paths, prerequisite setup, and first-run guidance. |
| [Adding Providers](adding_providers.md) | How to register new AI providers and advertise capabilities. |
| [Azure OpenAI](azure_openai.md) | Configure Azure deployments, capability overrides, and env mappings. |
| [Model Ranking](model_ranking.md) | How intelligence scores translate into auto-mode ordering. |
| [Custom Models](custom_models.md) | Configure OpenRouter/custom models and aliases. |
| [Adding Tools](adding_tools.md) | Create new tools using the shared base classes. |
| [Advanced Usage](advanced-usage.md) | Auto-mode tricks, workflow tools, and collaboration tips. |
| [Configuration](configuration.md) | .env options, restriction policies, logging levels. |
| [Testing](testing.md) | Test strategy, command cheats, and coverage notes. |
| [Troubleshooting](troubleshooting.md) | Common issues and resolutions. |

Additional docs live in this directory; start with the table above to orient yourself.

```

--------------------------------------------------------------------------------
/docker/scripts/build.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash
set -euo pipefail

# Colors for output
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m'

echo -e "${GREEN}=== Building Zen MCP Server Docker Image ===${NC}"

# Check if .env file exists
if [[ ! -f .env ]]; then
    echo -e "${YELLOW}Warning: .env file not found. Copying from .env.example${NC}"
    if [[ -f .env.example ]]; then
        cp .env.example .env
        echo -e "${YELLOW}Please edit .env file with your API keys before running the server${NC}"
    else
        echo -e "${RED}Error: .env.example not found${NC}"
        exit 1
    fi
fi

# Build the Docker image
echo -e "${GREEN}Building Docker image...${NC}"
docker-compose build --no-cache

# Verify the build
if docker images | grep -q "zen-mcp-server"; then
    echo -e "${GREEN}✓ Docker image built successfully${NC}"
    echo -e "${GREEN}Image details:${NC}"
    docker images | grep zen-mcp-server
else
    echo -e "${RED}✗ Failed to build Docker image${NC}"
    exit 1
fi

echo -e "${GREEN}=== Build Complete ===${NC}"
echo -e "${YELLOW}Next steps:${NC}"
echo -e "  1. Edit .env file with your API keys"
echo -e "  2. Run: ${GREEN}docker-compose up -d${NC}"

```

--------------------------------------------------------------------------------
/tests/test_conversation_continuation_integration.py:
--------------------------------------------------------------------------------

```python
"""Integration test for conversation continuation persistence."""

from tools.chat import ChatRequest, ChatTool
from utils.conversation_memory import get_thread
from utils.storage_backend import get_storage_backend


def test_first_response_persisted_in_conversation_history(tmp_path):
    """Ensure the assistant's initial reply is stored for newly created threads."""

    # Clear in-memory storage to avoid cross-test contamination
    storage = get_storage_backend()
    storage._store.clear()  # type: ignore[attr-defined]

    tool = ChatTool()
    request = ChatRequest(
        prompt="First question?",
        model="local-llama",
        working_directory_absolute_path=str(tmp_path),
    )
    response_text = "Here is the initial answer."

    # Mimic the first tool invocation (no continuation_id supplied)
    continuation_data = tool._create_continuation_offer(request, model_info={"model_name": "local-llama"})
    tool._create_continuation_offer_response(
        response_text,
        continuation_data,
        request,
        {"model_name": "local-llama", "provider": "custom"},
    )

    thread_id = continuation_data["continuation_id"]
    thread = get_thread(thread_id)

    assert thread is not None
    assert [turn.role for turn in thread.turns] == ["user", "assistant"]
    assert thread.turns[-1].content == response_text

    # Cleanup storage for subsequent tests
    storage._store.clear()  # type: ignore[attr-defined]

```

--------------------------------------------------------------------------------
/tests/mock_helpers.py:
--------------------------------------------------------------------------------

```python
"""Helper functions for test mocking."""

from unittest.mock import Mock

from providers.shared import ModelCapabilities, ProviderType, RangeTemperatureConstraint


def create_mock_provider(model_name="gemini-2.5-flash", context_window=1_048_576):
    """Create a properly configured mock provider."""
    mock_provider = Mock()

    # Set up capabilities
    mock_capabilities = ModelCapabilities(
        provider=ProviderType.GOOGLE,
        model_name=model_name,
        friendly_name="Gemini",
        context_window=context_window,
        max_output_tokens=8192,
        supports_extended_thinking=False,
        supports_system_prompts=True,
        supports_streaming=True,
        supports_function_calling=True,
        temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 0.7),
    )

    mock_provider.get_capabilities.return_value = mock_capabilities
    mock_provider.get_provider_type.return_value = ProviderType.GOOGLE
    mock_provider.validate_model_name.return_value = True

    # Set up generate_content response
    mock_response = Mock()
    mock_response.content = "Test response"
    mock_response.usage = {"input_tokens": 10, "output_tokens": 20}
    mock_response.model_name = model_name
    mock_response.friendly_name = "Gemini"
    mock_response.provider = ProviderType.GOOGLE
    mock_response.metadata = {"finish_reason": "STOP"}

    mock_provider.generate_content.return_value = mock_response

    return mock_provider

```

--------------------------------------------------------------------------------
/.github/workflows/semantic-pr.yml:
--------------------------------------------------------------------------------

```yaml
---
name: Semantic PR

on:
  pull_request:
    types: [opened, edited, synchronize]

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

permissions:
  contents: read
  pull-requests: write

jobs:
  semantic-pr:
    name: Validate PR
    runs-on: ubuntu-latest
    timeout-minutes: 5
    steps:
      - name: Check PR Title
        id: lint-pr-title
        uses: amannn/action-semantic-pull-request@0723387faaf9b38adef4775cd42cfd5155ed6017 # v5.5.3
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      - name: Add PR error comment
        uses: marocchino/sticky-pull-request-comment@d2ad0de260ae8b0235ce059e63f2949ba9e05943 # v2.9.3
        if: always() && (steps.lint-pr-title.outputs.error_message != null)
        with:
          header: pr-title-lint-error
          message: |
            We require pull request titles to follow the [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0/) and it looks like your proposed title needs to be adjusted.

            Details:

            ```
            ${{ steps.lint-pr-title.outputs.error_message }}
            ```

      - name: Delete PR error comment
        uses: marocchino/sticky-pull-request-comment@d2ad0de260ae8b0235ce059e63f2949ba9e05943 # v2.9.3
        if: ${{ steps.lint-pr-title.outputs.error_message == null }}
        with:
          header: pr-title-lint-error
          delete: true
```

--------------------------------------------------------------------------------
/clink/agents/claude.py:
--------------------------------------------------------------------------------

```python
"""Claude-specific CLI agent hooks."""

from __future__ import annotations

from clink.models import ResolvedCLIRole
from clink.parsers.base import ParserError

from .base import AgentOutput, BaseCLIAgent


class ClaudeAgent(BaseCLIAgent):
    """Claude CLI agent with system-prompt injection support."""

    def _build_command(self, *, role: ResolvedCLIRole, system_prompt: str | None) -> list[str]:
        command = list(self.client.executable)
        command.extend(self.client.internal_args)
        command.extend(self.client.config_args)

        if system_prompt and "--append-system-prompt" not in self.client.config_args:
            command.extend(["--append-system-prompt", system_prompt])

        command.extend(role.role_args)
        return command

    def _recover_from_error(
        self,
        *,
        returncode: int,
        stdout: str,
        stderr: str,
        sanitized_command: list[str],
        duration_seconds: float,
        output_file_content: str | None,
    ) -> AgentOutput | None:
        try:
            parsed = self._parser.parse(stdout, stderr)
        except ParserError:
            return None

        return AgentOutput(
            parsed=parsed,
            sanitized_command=sanitized_command,
            returncode=returncode,
            stdout=stdout,
            stderr=stderr,
            duration_seconds=duration_seconds,
            parser_name=self._parser.name,
            output_file_content=output_file_content,
        )

```

--------------------------------------------------------------------------------
/tests/test_clink_gemini_parser.py:
--------------------------------------------------------------------------------

```python
"""Tests for the Gemini CLI JSON parser."""

import pytest

from clink.parsers.gemini import GeminiJSONParser, ParserError


def _build_rate_limit_stdout() -> str:
    return (
        "{\n"
        '  "response": "",\n'
        '  "stats": {\n'
        '    "models": {\n'
        '      "gemini-2.5-pro": {\n'
        '        "api": {\n'
        '          "totalRequests": 5,\n'
        '          "totalErrors": 5,\n'
        '          "totalLatencyMs": 13319\n'
        "        },\n"
        '        "tokens": {"prompt": 0, "candidates": 0, "total": 0, "cached": 0, "thoughts": 0, "tool": 0}\n'
        "      }\n"
        "    },\n"
        '    "tools": {"totalCalls": 0},\n'
        '    "files": {"totalLinesAdded": 0, "totalLinesRemoved": 0}\n'
        "  }\n"
        "}"
    )


def test_gemini_parser_handles_rate_limit_empty_response():
    parser = GeminiJSONParser()
    stdout = _build_rate_limit_stdout()
    stderr = "Attempt 1 failed with status 429. Retrying with backoff... ApiError: quota exceeded"

    parsed = parser.parse(stdout, stderr)

    assert "429" in parsed.content
    assert parsed.metadata.get("rate_limit_status") == 429
    assert parsed.metadata.get("empty_response") is True
    assert "Attempt 1 failed" in parsed.metadata.get("stderr", "")


def test_gemini_parser_still_errors_when_no_fallback_available():
    parser = GeminiJSONParser()
    stdout = '{"response": "", "stats": {}}'

    with pytest.raises(ParserError):
        parser.parse(stdout, stderr="")

```

--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------

```yaml
name: Tests

on:
  pull_request:
    branches: [main]

jobs:
  test:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ["3.10", "3.11", "3.12"]

    steps:
      - uses: actions/checkout@v4

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install -r requirements.txt
          pip install -r requirements-dev.txt

      - name: Run unit tests
        run: |
          # Run only unit tests (exclude simulation tests and integration tests)
          # Integration tests require local-llama which isn't available in CI
          python -m pytest tests/ -v --ignore=simulator_tests/ -m "not integration"
        env:
          # Ensure no API key is accidentally used in CI
          GEMINI_API_KEY: ""
          OPENAI_API_KEY: ""

  lint:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: "3.11"

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install -r requirements-dev.txt

      - name: Run black formatter check
        run: black --check . --exclude="test_simulation_files/"

      - name: Run ruff linter
        run: ruff check . --exclude test_simulation_files

```

--------------------------------------------------------------------------------
/clink/constants.py:
--------------------------------------------------------------------------------

```python
"""Internal defaults and constants for clink."""

from __future__ import annotations

from dataclasses import dataclass, field
from pathlib import Path

DEFAULT_TIMEOUT_SECONDS = 1800
DEFAULT_STREAM_LIMIT = 10 * 1024 * 1024  # 10MB per stream

PROJECT_ROOT = Path(__file__).resolve().parent.parent
BUILTIN_PROMPTS_DIR = PROJECT_ROOT / "systemprompts" / "clink"
CONFIG_DIR = PROJECT_ROOT / "conf" / "cli_clients"
USER_CONFIG_DIR = Path.home() / ".zen" / "cli_clients"


@dataclass(frozen=True)
class CLIInternalDefaults:
    """Internal defaults applied to a CLI client during registry load."""

    parser: str
    additional_args: list[str] = field(default_factory=list)
    env: dict[str, str] = field(default_factory=dict)
    default_role_prompt: str | None = None
    timeout_seconds: int = DEFAULT_TIMEOUT_SECONDS
    runner: str | None = None


INTERNAL_DEFAULTS: dict[str, CLIInternalDefaults] = {
    "gemini": CLIInternalDefaults(
        parser="gemini_json",
        additional_args=["-o", "json"],
        default_role_prompt="systemprompts/clink/default.txt",
        runner="gemini",
    ),
    "codex": CLIInternalDefaults(
        parser="codex_jsonl",
        additional_args=["exec"],
        default_role_prompt="systemprompts/clink/default.txt",
        runner="codex",
    ),
    "claude": CLIInternalDefaults(
        parser="claude_json",
        additional_args=["--print", "--output-format", "json"],
        default_role_prompt="systemprompts/clink/default.txt",
        runner="claude",
    ),
}

```

--------------------------------------------------------------------------------
/docs/tools/challenge.md:
--------------------------------------------------------------------------------

```markdown
# challenge - Challenge an approach or validate ideas with confidence

The `challenge` tool encourages thoughtful critical thinking instead of automatic agreement with the dreaded **You're absolutely right!** responses - especially 
when you're not. This tool wraps your comment with instructions that prompt critical thinking and honest analysis instead of blind agreement.

## Quick Example

```
challenge but do we even need all this extra caching because it'll just slow the app down?
```

```
challenge I don't think this approach solves my original complaint
```

Normally, your favorite coding agent will enthusiastically reply with **“You’re absolutely right!”**—then proceed to 
reverse the _correct_ strategy entirely, without stopping to consider that you might actually be wrong, missing the 
bigger picture or ignoring architectural constraints.

`challenge` fixes this. Claude can even _detect_ when you're challenging something and automatically invokes this tool
to ensure thoughtful analysis instead of reflexive agreement.

**Without Zen:**
![without_zen@2x](https://github.com/user-attachments/assets/64f3c9fb-7ca9-4876-b687-25e847edfd87)

**With Zen:**
![with_zen@2x](https://github.com/user-attachments/assets/9d72f444-ba53-4ab1-83e5-250062c6ee70)

## Why Use Challenge?

AI assistants sometimes tend to agree too readily. The challenge tool helps you:
- Get genuine critical evaluation of your ideas
- Challenge assumptions constructively
- Receive honest feedback on proposals
- Validate approaches with thoughtful analysis
```

--------------------------------------------------------------------------------
/providers/registries/openrouter.py:
--------------------------------------------------------------------------------

```python
"""OpenRouter model registry for managing model configurations and aliases."""

from __future__ import annotations

from ..shared import ModelCapabilities, ProviderType
from .base import CAPABILITY_FIELD_NAMES, CapabilityModelRegistry


class OpenRouterModelRegistry(CapabilityModelRegistry):
    """Capability registry backed by ``conf/openrouter_models.json``."""

    def __init__(self, config_path: str | None = None) -> None:
        super().__init__(
            env_var_name="OPENROUTER_MODELS_CONFIG_PATH",
            default_filename="openrouter_models.json",
            provider=ProviderType.OPENROUTER,
            friendly_prefix="OpenRouter ({model})",
            config_path=config_path,
        )

    def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]:
        provider_override = entry.get("provider")
        if isinstance(provider_override, str):
            entry_provider = ProviderType(provider_override.lower())
        elif isinstance(provider_override, ProviderType):
            entry_provider = provider_override
        else:
            entry_provider = ProviderType.OPENROUTER

        if entry_provider == ProviderType.CUSTOM:
            entry.setdefault("friendly_name", f"Custom ({entry['model_name']})")
        else:
            entry.setdefault("friendly_name", f"OpenRouter ({entry['model_name']})")

        filtered = {k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES}
        filtered.setdefault("provider", entry_provider)
        capability = ModelCapabilities(**filtered)
        return capability, {}

```

--------------------------------------------------------------------------------
/docs/logging.md:
--------------------------------------------------------------------------------

```markdown
# Logging

## Quick Start - Follow Logs

The easiest way to monitor logs is to use the `-f` flag when starting the server:

```bash
# Start server and automatically follow MCP logs
./run-server.sh -f
```

This will start the server and immediately begin tailing the MCP server logs.

## Log Files

Logs are stored in the `logs/` directory within your project folder:

- **`mcp_server.log`** - Main server operations, API calls, and errors
- **`mcp_activity.log`** - Tool calls and conversation tracking

Log files rotate automatically when they reach 20MB, keeping up to 10 rotated files.

## Viewing Logs

To monitor MCP server activity:

```bash
# Follow logs in real-time
tail -f logs/mcp_server.log

# View last 100 lines
tail -n 100 logs/mcp_server.log

# View activity logs (tool calls only)
tail -f logs/mcp_activity.log

# Search for specific patterns
grep "ERROR" logs/mcp_server.log
grep "tool_name" logs/mcp_activity.log
```

## Log Level

Set verbosity with `LOG_LEVEL` in your `.env` file:

```env
# Options: DEBUG, INFO, WARNING, ERROR
LOG_LEVEL=INFO
```

- **DEBUG**: Detailed information for debugging
- **INFO**: General operational messages (default)
- **WARNING**: Warning messages
- **ERROR**: Only error messages

## Log Format

Logs use a standardized format with timestamps:

```
2024-06-14 10:30:45,123 - module.name - INFO - Message here
```

## Tips

- Use `./run-server.sh -f` for the easiest log monitoring experience
- Activity logs show only tool-related events for cleaner output
- Main server logs include all operational details
- Logs persist across server restarts
```

--------------------------------------------------------------------------------
/providers/registries/azure.py:
--------------------------------------------------------------------------------

```python
"""Registry loader for Azure OpenAI model configurations."""

from __future__ import annotations

import logging

from ..shared import ModelCapabilities, ProviderType, TemperatureConstraint
from .base import CAPABILITY_FIELD_NAMES, CustomModelRegistryBase

logger = logging.getLogger(__name__)


class AzureModelRegistry(CustomModelRegistryBase):
    """Load Azure-specific model metadata from configuration files."""

    def __init__(self, config_path: str | None = None) -> None:
        super().__init__(
            env_var_name="AZURE_MODELS_CONFIG_PATH",
            default_filename="azure_models.json",
            config_path=config_path,
        )
        self.reload()

    def _extra_keys(self) -> set[str]:
        return {"deployment", "deployment_name"}

    def _provider_default(self) -> ProviderType:
        return ProviderType.AZURE

    def _default_friendly_name(self, model_name: str) -> str:
        return f"Azure OpenAI ({model_name})"

    def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]:
        deployment = entry.pop("deployment", None) or entry.pop("deployment_name", None)
        if not deployment:
            raise ValueError(f"Azure model '{entry.get('model_name')}' is missing required 'deployment' field")

        temp_hint = entry.get("temperature_constraint")
        if isinstance(temp_hint, str):
            entry["temperature_constraint"] = TemperatureConstraint.create(temp_hint)

        filtered = {k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES}
        filtered.setdefault("provider", ProviderType.AZURE)
        capability = ModelCapabilities(**filtered)
        return capability, {"deployment": deployment}

```

--------------------------------------------------------------------------------
/tests/test_line_numbers_integration.py:
--------------------------------------------------------------------------------

```python
"""
Integration test demonstrating that all tools get line numbers by default.
"""

from tools.analyze import AnalyzeTool
from tools.chat import ChatTool
from tools.codereview import CodeReviewTool
from tools.debug import DebugIssueTool
from tools.precommit import PrecommitTool
from tools.refactor import RefactorTool
from tools.testgen import TestGenTool


class TestLineNumbersIntegration:
    """Test that all tools inherit line number behavior correctly."""

    def test_all_tools_want_line_numbers(self):
        """Verify that all tools want line numbers by default."""
        tools = [
            ChatTool(),
            AnalyzeTool(),
            CodeReviewTool(),
            DebugIssueTool(),
            RefactorTool(),
            TestGenTool(),
            PrecommitTool(),
        ]

        for tool in tools:
            assert tool.wants_line_numbers_by_default(), f"{tool.get_name()} should want line numbers by default"

    def test_no_tools_override_line_numbers(self):
        """Verify that no tools override the base class line number behavior."""
        # Check that tools don't have their own wants_line_numbers_by_default method
        tools_classes = [
            ChatTool,
            AnalyzeTool,
            CodeReviewTool,
            DebugIssueTool,
            RefactorTool,
            TestGenTool,
            PrecommitTool,
        ]

        for tool_class in tools_classes:
            # Check if the method is defined in the tool class itself
            # (not inherited from base)
            has_override = "wants_line_numbers_by_default" in tool_class.__dict__
            assert not has_override, f"{tool_class.__name__} should not override wants_line_numbers_by_default"

```

--------------------------------------------------------------------------------
/tests/transport_helpers.py:
--------------------------------------------------------------------------------

```python
"""Helper functions for HTTP transport injection in tests."""

from tests.http_transport_recorder import TransportFactory


def inject_transport(monkeypatch, cassette_path: str):
    """Inject HTTP transport into OpenAICompatibleProvider for testing.

    This helper simplifies the monkey patching pattern used across tests
    to inject custom HTTP transports for recording/replaying API calls.

    Also ensures OpenAI provider is properly registered for tests that need it.

    Args:
        monkeypatch: pytest monkeypatch fixture
        cassette_path: Path to cassette file for recording/replay

    Returns:
        The created transport instance

    Example:
        transport = inject_transport(monkeypatch, "path/to/cassette.json")
    """
    # Ensure OpenAI provider is registered - always needed for transport injection
    from providers.openai import OpenAIModelProvider
    from providers.registry import ModelProviderRegistry
    from providers.shared import ProviderType

    # Always register OpenAI provider for transport tests (API key might be dummy)
    ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)

    # Create transport
    transport = TransportFactory.create_transport(str(cassette_path))

    # Inject transport using the established pattern
    from providers.openai_compatible import OpenAICompatibleProvider

    original_client_property = OpenAICompatibleProvider.client

    def patched_client_getter(self):
        if self._client is None:
            self._test_transport = transport
        return original_client_property.fget(self)

    monkeypatch.setattr(OpenAICompatibleProvider, "client", property(patched_client_getter))

    return transport

```

--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation.yml:
--------------------------------------------------------------------------------

```yaml
name: 📖 Documentation Improvement
description: Report an issue or suggest an improvement for the documentation
labels: ["documentation", "good first issue"]
body:
  - type: input
    id: location
    attributes:
      label: Documentation Location
      description: "Which file or page has the issue? (e.g., README.md, CONTRIBUTING.md, CLAUDE.md)"
      placeholder: "e.g., README.md"
    validations:
      required: true

  - type: dropdown
    id: issue-type
    attributes:
      label: Type of Documentation Issue
      description: What kind of documentation improvement is this?
      options:
        - Typo or grammar error
        - Unclear or confusing explanation
        - Outdated information
        - Missing information
        - Code example doesn't work
        - Installation/setup instructions unclear
        - Tool usage examples need improvement
        - Other
    validations:
      required: true

  - type: textarea
    id: problem
    attributes:
      label: What is wrong with the documentation?
      description: "Please describe the problem. Be specific about what is unclear, incorrect, or missing."
      placeholder: "README is missing some details"
    validations:
      required: true

  - type: textarea
    id: suggestion
    attributes:
      label: Suggested Improvement
      description: "How can we make it better? If you can, please provide the exact text or changes you'd like to see."
      placeholder: "Please improve...."


  - type: dropdown
    id: audience
    attributes:
      label: Target Audience
      description: Which audience would benefit most from this improvement?
      options:
        - New users (first-time setup)
        - Developers (contributing to the project)
        - Advanced users (complex workflows)
        - All users
    validations:
      required: true


```

--------------------------------------------------------------------------------
/utils/token_utils.py:
--------------------------------------------------------------------------------

```python
"""
Token counting utilities for managing API context limits

This module provides functions for estimating token counts to ensure
requests stay within the Gemini API's context window limits.

Note: The estimation uses a simple character-to-token ratio which is
approximate. For production systems requiring precise token counts,
consider using the actual tokenizer for the specific model.
"""

# Default fallback for token limit (conservative estimate)
DEFAULT_CONTEXT_WINDOW = 200_000  # Conservative fallback for unknown models


def estimate_tokens(text: str) -> int:
    """
    Estimate token count using a character-based approximation.

    This uses a rough heuristic where 1 token ≈ 4 characters, which is
    a reasonable approximation for English text. The actual token count
    may vary based on:
    - Language (non-English text may have different ratios)
    - Code vs prose (code often has more tokens per character)
    - Special characters and formatting

    Args:
        text: The text to estimate tokens for

    Returns:
        int: Estimated number of tokens
    """
    return len(text) // 4


def check_token_limit(text: str, context_window: int = DEFAULT_CONTEXT_WINDOW) -> tuple[bool, int]:
    """
    Check if text exceeds the specified token limit.

    This function is used to validate that prepared prompts will fit
    within the model's context window, preventing API errors and ensuring
    reliable operation.

    Args:
        text: The text to check
        context_window: The model's context window size (defaults to conservative fallback)

    Returns:
        Tuple[bool, int]: (is_within_limit, estimated_tokens)
        - is_within_limit: True if the text fits within context_window
        - estimated_tokens: The estimated token count
    """
    estimated = estimate_tokens(text)
    return estimated <= context_window, estimated

```

--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------

```yaml
name: 🐞 Bug Report
description: Create a report to help us improve
labels: ["bug", "needs-triage"]
body:
  - type: markdown
    attributes:
      value: |
        Thank you for taking the time to file a bug report! Please provide as much detail as possible to help us reproduce and fix the issue.

  - type: input
    id: version
    attributes:
      label: Project Version
      description: "Which version are you using? (To see version: ./run-server.sh -v)"
      placeholder: "e.g., 5.1.0"
    validations:
      required: true

  - type: textarea
    id: description
    attributes:
      label: Bug Description
      description: A clear and concise description of what the bug is.
      placeholder: "When I run the `codereview` nothing happens"
    validations:
      required: true

  - type: textarea
    id: logs
    attributes:
      label: Relevant Log Output
      description: "Please copy and paste any relevant log output. Logs are stored under the `logs` folder in the zen folder. You an also use `./run-server.sh -f` to see logs"
      render: shell

  - type: dropdown
    id: environment
    attributes:
      label: Operating System
      description: What operating system are you running the Docker client on?
      options:
        - macOS
        - Windows
        - Linux
    validations:
      required: true

  - type: checkboxes
    id: no-duplicate-issues
    attributes:
      label: Sanity Checks
      description: "Before submitting, please confirm the following:"
      options:
        - label: I have searched the existing issues and this is not a duplicate.
          required: true
        - label: I am using `GEMINI_API_KEY`
          required: true
        - label: I am using `OPENAI_API_KEY`
          required: true
        - label: I am using `OPENROUTER_API_KEY`
          required: true
        - label: I am using `CUSTOM_API_URL`
          required: true

```

--------------------------------------------------------------------------------
/utils/security_config.py:
--------------------------------------------------------------------------------

```python
"""
Security configuration and path validation constants

This module contains security-related constants and configurations
for file access control.
"""

from pathlib import Path

# Dangerous paths that should never be scanned
# These would give overly broad access and pose security risks
DANGEROUS_PATHS = {
    "/",
    "/etc",
    "/usr",
    "/bin",
    "/var",
    "/root",
    "/home",
    "C:\\",
    "C:\\Windows",
    "C:\\Program Files",
    "C:\\Users",
}

# Directories to exclude from recursive file search
# These typically contain generated code, dependencies, or build artifacts
EXCLUDED_DIRS = {
    # Python
    "__pycache__",
    ".venv",
    "venv",
    "env",
    ".env",
    "*.egg-info",
    ".eggs",
    "wheels",
    ".Python",
    ".mypy_cache",
    ".pytest_cache",
    ".tox",
    "htmlcov",
    ".coverage",
    "coverage",
    # Node.js / JavaScript
    "node_modules",
    ".next",
    ".nuxt",
    "bower_components",
    ".sass-cache",
    # Version Control
    ".git",
    ".svn",
    ".hg",
    # Build Output
    "build",
    "dist",
    "target",
    "out",
    # IDEs
    ".idea",
    ".vscode",
    ".sublime",
    ".atom",
    ".brackets",
    # Temporary / Cache
    ".cache",
    ".temp",
    ".tmp",
    "*.swp",
    "*.swo",
    "*~",
    # OS-specific
    ".DS_Store",
    "Thumbs.db",
    # Java / JVM
    ".gradle",
    ".m2",
    # Documentation build
    "_build",
    "site",
    # Mobile development
    ".expo",
    ".flutter",
    # Package managers
    "vendor",
}


def is_dangerous_path(path: Path) -> bool:
    """
    Check if a path is in the dangerous paths list.

    Args:
        path: Path to check

    Returns:
        True if the path is dangerous and should not be accessed
    """
    try:
        resolved = path.resolve()
        return str(resolved) in DANGEROUS_PATHS or resolved.parent == resolved
    except Exception:
        return True  # If we can't resolve, consider it dangerous

```

--------------------------------------------------------------------------------
/.github/workflows/semantic-release.yml:
--------------------------------------------------------------------------------

```yaml
name: Semantic Release

on:
  push:
    branches:
      - main

permissions:
  contents: write
  issues: write
  pull-requests: write

jobs:
  release:
    runs-on: ubuntu-latest
    concurrency: release

    steps:
      - name: Checkout
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          token: ${{ secrets.GITHUB_TOKEN }}
          persist-credentials: true

      - name: Setup Python
        uses: actions/setup-python@v4
        with:
          python-version: "3.11"

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install python-semantic-release

      - name: Verify tests pass
        run: |
          pip install -r requirements.txt
          pip install -r requirements-dev.txt
          python -m pytest tests/ -v --ignore=simulator_tests/ -m "not integration"

      - name: Run semantic release
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          git config --global user.name "github-actions[bot]"
          git config --global user.email "github-actions[bot]@users.noreply.github.com"
          semantic-release version
          semantic-release publish
          
      - name: Sync version to config.py
        run: |
          pip install toml
          python scripts/sync_version.py
          if git diff --quiet config.py; then
            echo "No version changes in config.py"
          else
            git add config.py
            git commit -m "chore: sync version to config.py [skip ci]"
            git push
          fi

      - name: Upload build artifacts to release
        if: hashFiles('dist/*') != ''
        run: |
          # Get the latest release tag
          LATEST_TAG=$(gh release list --limit 1 --json tagName --jq '.[0].tagName')
          if [ ! -z "$LATEST_TAG" ]; then
            echo "Uploading artifacts to release $LATEST_TAG"
            gh release upload "$LATEST_TAG" dist/* --clobber
          fi
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

```

--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------

```yaml
name: ✨ Feature Request
description: Suggest an idea for this project
labels: ["enhancement", "needs-triage"]
body:
  - type: textarea
    id: problem-description
    attributes:
      label: What problem is this feature trying to solve?
      description: "A clear and concise description of the problem or user need. Why is this change needed?"
      placeholder: "Currently, I can only use one Gemini tool at a time. I want to be able to chain multiple tools together (e.g., analyze -> codereview -> thinkdeep) in a single workflow."
    validations:
      required: true

  - type: textarea
    id: proposed-solution
    attributes:
      label: Describe the solution you'd like
      description: A clear and concise description of what you want to happen. How would it work from a user's perspective?
      placeholder: "I'd like to be able to specify a workflow like 'analyze src/ then codereview the findings then use thinkdeep to suggest improvements' in a single command or configuration."
    validations:
      required: true

  - type: textarea
    id: alternatives
    attributes:
      label: Describe alternatives you've considered
      description: A clear and concise description of any alternative solutions or features you've considered.
      placeholder: "I considered manually running each tool sequentially, but automatic workflow chaining would be more efficient and ensure context is preserved between steps."

  - type: dropdown
    id: feature-type
    attributes:
      label: Feature Category
      description: What type of enhancement is this?
      options:
        - New tool (chat, codereview, debug, etc.)
        - Workflow improvement
        - Integration enhancement
        - Performance optimization
        - User experience improvement
        - Documentation enhancement
        - Other
    validations:
      required: true

  - type: checkboxes
    id: contribution
    attributes:
      label: Contribution
      options:
        - label: I am willing to submit a Pull Request to implement this feature.


```

--------------------------------------------------------------------------------
/tests/test_mcp_error_handling.py:
--------------------------------------------------------------------------------

```python
import json
from types import SimpleNamespace

import pytest
from mcp.types import CallToolRequest, CallToolRequestParams

from providers.registry import ModelProviderRegistry
from server import server as mcp_server


def _install_dummy_provider(monkeypatch):
    """Ensure preflight model checks succeed without real provider configuration."""

    class DummyProvider:
        def get_provider_type(self):
            return SimpleNamespace(value="dummy")

        def get_capabilities(self, model_name):
            return SimpleNamespace(
                supports_extended_thinking=False,
                allow_code_generation=False,
                supports_images=False,
                context_window=1_000_000,
                max_image_size_mb=10,
            )

    monkeypatch.setattr(
        ModelProviderRegistry,
        "get_provider_for_model",
        classmethod(lambda cls, model_name: DummyProvider()),
    )
    monkeypatch.setattr(
        ModelProviderRegistry,
        "get_available_models",
        classmethod(lambda cls, respect_restrictions=False: {"gemini-2.5-flash": None}),
    )


@pytest.mark.asyncio
async def test_tool_execution_error_sets_is_error_flag_for_mcp_response(monkeypatch):
    """Ensure ToolExecutionError surfaces as CallToolResult with isError=True."""

    _install_dummy_provider(monkeypatch)

    handler = mcp_server.request_handlers[CallToolRequest]

    arguments = {
        "prompt": "Trigger working_directory_absolute_path validation failure",
        "working_directory_absolute_path": "relative/path",  # Not absolute -> ToolExecutionError from ChatTool
        "absolute_file_paths": [],
        "model": "gemini-2.5-flash",
    }

    request = CallToolRequest(params=CallToolRequestParams(name="chat", arguments=arguments))

    server_result = await handler(request)

    assert server_result.root.isError is True
    assert server_result.root.content, "Expected error response content"

    payload = server_result.root.content[0].text
    data = json.loads(payload)
    assert data["status"] == "error"
    assert "absolute" in data["content"].lower()

```

--------------------------------------------------------------------------------
/docker/scripts/build.ps1:
--------------------------------------------------------------------------------

```
#!/usr/bin/env pwsh
#Requires -Version 5.1
[CmdletBinding()]
param()

# Set error action preference
$ErrorActionPreference = "Stop"

# Colors for output (using Write-Host with colors)
function Write-ColorText {
    param(
        [Parameter(Mandatory)]
        [string]$Text,
        [string]$Color = "White",
        [switch]$NoNewline
    )
    if ($NoNewline) {
        Write-Host $Text -ForegroundColor $Color -NoNewline
    } else {
        Write-Host $Text -ForegroundColor $Color
    }
}

Write-ColorText "=== Building Zen MCP Server Docker Image ===" -Color Green

# Check if .env file exists
if (!(Test-Path ".env")) {
    Write-ColorText "Warning: .env file not found. Copying from .env.example" -Color Yellow
    if (Test-Path ".env.example") {
        Copy-Item ".env.example" ".env"
        Write-ColorText "Please edit .env file with your API keys before running the server" -Color Yellow
    } else {
        Write-ColorText "Error: .env.example not found" -Color Red
        exit 1
    }
}

# Build the Docker image
Write-ColorText "Building Docker image..." -Color Green
try {
    docker-compose build --no-cache
    if ($LASTEXITCODE -ne 0) {
        throw "Docker build failed"
    }
} catch {
    Write-ColorText "Error: Failed to build Docker image" -Color Red
    exit 1
}

# Verify the build
Write-ColorText "Verifying build..." -Color Green
$images = docker images --format "table {{.Repository}}\t{{.Tag}}\t{{.Size}}\t{{.CreatedAt}}" | Select-String "zen-mcp-server"

if ($images) {
    Write-ColorText "✓ Docker image built successfully" -Color Green
    Write-ColorText "Image details:" -Color Green
    $images | ForEach-Object { Write-Host $_.Line }
} else {
    Write-ColorText "✗ Failed to build Docker image" -Color Red
    exit 1
}

Write-ColorText "=== Build Complete ===" -Color Green
Write-ColorText "Next steps:" -Color Yellow
Write-Host "  1. Edit .env file with your API keys"
Write-ColorText "  2. Run: " -Color White -NoNewline
Write-ColorText "docker-compose up -d" -Color Green

Write-ColorText "Or use the deploy script: " -Color White -NoNewline
Write-ColorText ".\deploy.ps1" -Color Green

```

--------------------------------------------------------------------------------
/tests/test_clink_claude_parser.py:
--------------------------------------------------------------------------------

```python
"""Tests for the Claude CLI JSON parser."""

import json

import pytest

from clink.parsers.base import ParserError
from clink.parsers.claude import ClaudeJSONParser


def _build_success_payload() -> str:
    return (
        '{"type":"result","subtype":"success","is_error":false,"duration_ms":1234,'
        '"duration_api_ms":1200,"num_turns":1,"result":"42","session_id":"abc","total_cost_usd":0.12,'
        '"usage":{"input_tokens":10,"output_tokens":5},'
        '"modelUsage":{"claude-sonnet-4-5-20250929":{"inputTokens":10,"outputTokens":5}}}'
    )


def test_claude_parser_extracts_result_and_metadata():
    parser = ClaudeJSONParser()
    stdout = _build_success_payload()

    parsed = parser.parse(stdout=stdout, stderr="")

    assert parsed.content == "42"
    assert parsed.metadata["model_used"] == "claude-sonnet-4-5-20250929"
    assert parsed.metadata["usage"]["output_tokens"] == 5
    assert parsed.metadata["is_error"] is False


def test_claude_parser_falls_back_to_message():
    parser = ClaudeJSONParser()
    stdout = '{"type":"result","is_error":true,"message":"API error message"}'

    parsed = parser.parse(stdout=stdout, stderr="warning")

    assert parsed.content == "API error message"
    assert parsed.metadata["is_error"] is True
    assert parsed.metadata["stderr"] == "warning"


def test_claude_parser_requires_output():
    parser = ClaudeJSONParser()

    with pytest.raises(ParserError):
        parser.parse(stdout="", stderr="")


def test_claude_parser_handles_array_payload_with_result_event():
    parser = ClaudeJSONParser()
    events = [
        {"type": "system", "session_id": "abc"},
        {"type": "assistant", "message": "intermediate"},
        {
            "type": "result",
            "subtype": "success",
            "result": "42",
            "duration_api_ms": 9876,
            "usage": {"input_tokens": 12, "output_tokens": 3},
        },
    ]
    stdout = json.dumps(events)

    parsed = parser.parse(stdout=stdout, stderr="warning")

    assert parsed.content == "42"
    assert parsed.metadata["duration_api_ms"] == 9876
    assert parsed.metadata["raw_events"] == events
    assert parsed.metadata["raw"] == events

```

--------------------------------------------------------------------------------
/clink/parsers/codex.py:
--------------------------------------------------------------------------------

```python
"""Parser for Codex CLI JSONL output."""

from __future__ import annotations

import json
from typing import Any

from .base import BaseParser, ParsedCLIResponse, ParserError


class CodexJSONLParser(BaseParser):
    """Parse stdout emitted by `codex exec --json`."""

    name = "codex_jsonl"

    def parse(self, stdout: str, stderr: str) -> ParsedCLIResponse:
        lines = [line.strip() for line in (stdout or "").splitlines() if line.strip()]
        events: list[dict[str, Any]] = []
        agent_messages: list[str] = []
        errors: list[str] = []
        usage: dict[str, Any] | None = None

        for line in lines:
            if not line.startswith("{"):
                continue
            try:
                event = json.loads(line)
            except json.JSONDecodeError:
                continue

            events.append(event)
            event_type = event.get("type")
            if event_type == "item.completed":
                item = event.get("item") or {}
                if item.get("type") == "agent_message":
                    text = item.get("text")
                    if isinstance(text, str) and text.strip():
                        agent_messages.append(text.strip())
            elif event_type == "error":
                message = event.get("message")
                if isinstance(message, str) and message.strip():
                    errors.append(message.strip())
            elif event_type == "turn.completed":
                turn_usage = event.get("usage")
                if isinstance(turn_usage, dict):
                    usage = turn_usage

        if not agent_messages and errors:
            agent_messages.extend(errors)

        if not agent_messages:
            raise ParserError("Codex CLI JSONL output did not include an agent_message item")

        content = "\n\n".join(agent_messages).strip()
        metadata: dict[str, Any] = {"events": events}
        if errors:
            metadata["errors"] = errors
        if usage:
            metadata["usage"] = usage
        if stderr and stderr.strip():
            metadata["stderr"] = stderr.strip()

        return ParsedCLIResponse(content=content, metadata=metadata)

```

--------------------------------------------------------------------------------
/docs/wsl-setup.md:
--------------------------------------------------------------------------------

```markdown
# WSL (Windows Subsystem for Linux) Setup Guide

This guide provides detailed instructions for setting up Zen MCP Server on Windows using WSL.

## Prerequisites for WSL

```bash
# Update WSL and ensure you have a recent Ubuntu distribution
sudo apt update && sudo apt upgrade -y

# Install required system dependencies
sudo apt install -y python3-venv python3-pip curl git

# Install Node.js and npm (required for Claude Code CLI)
curl -fsSL https://deb.nodesource.com/setup_lts.x | sudo -E bash -
sudo apt install -y nodejs

# Install Claude Code CLI globally
npm install -g @anthropic-ai/claude-code
```

## WSL-Specific Installation Steps

1. **Clone the repository in your WSL environment** (not in Windows filesystem):
   ```bash
   # Navigate to your home directory or preferred location in WSL
   cd ~
   
   # Clone the repository
   git clone https://github.com/BeehiveInnovations/zen-mcp-server.git
   cd zen-mcp-server
   ```

2. **Run the setup script**:
   ```bash
   # Make the script executable and run it
   chmod +x run-server.sh
   ./run-server.sh
   ```

3. **Verify Claude Code can find the MCP server**:
   ```bash
   # List configured MCP servers
   claude mcp list
   
   # You should see 'zen' listed in the output
   # If not, the setup script will provide the correct configuration
   ```

## Troubleshooting WSL Issues

### Python Environment Issues

```bash
# If you encounter Python virtual environment issues
sudo apt install -y python3.12-venv python3.12-dev

# Ensure pip is up to date
python3 -m pip install --upgrade pip
```

### Path Issues

- Always use the full WSL path for MCP configuration (e.g., `/home/YourName/zen-mcp-server/`)
- The setup script automatically detects WSL and configures the correct paths

### Claude Code Connection Issues

```bash
# If Claude Code can't connect to the MCP server, check the configuration
cat ~/.claude.json | grep -A 10 "zen"

# The configuration should show the correct WSL path to the Python executable
# Example: "/home/YourName/zen-mcp-server/.zen_venv/bin/python"
```

### Performance Tip

For best performance, keep your zen-mcp-server directory in the WSL filesystem (e.g., `~/zen-mcp-server`) rather than in the Windows filesystem (`/mnt/c/...`).
```

--------------------------------------------------------------------------------
/conf/custom_models.json:
--------------------------------------------------------------------------------

```json
{
  "_README": {
    "description": "Model metadata for local/self-hosted OpenAI-compatible endpoints (Custom provider).",
    "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
    "usage": "Each entry will be advertised by the Custom provider. Aliases are case-insensitive.",
    "field_notes": "Matches providers/shared/model_capabilities.py.",
    "field_descriptions": {
      "model_name": "The model identifier e.g., 'llama3.2'",
      "aliases": "Array of short names users can type instead of the full model name",
      "context_window": "Total number of tokens the model can process (input + output combined)",
      "max_output_tokens": "Maximum number of tokens the model can generate in a single response",
      "supports_extended_thinking": "Whether the model supports extended reasoning tokens",
      "supports_json_mode": "Whether the model can guarantee valid JSON output",
      "supports_function_calling": "Whether the model supports function/tool calling",
      "supports_images": "Whether the model can process images/visual input",
      "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
      "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
      "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
      "description": "Human-readable description of the model",
      "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
    }
  },
  "models": [
    {
      "model_name": "llama3.2",
      "aliases": [
        "local-llama",
        "ollama-llama"
      ],
      "context_window": 128000,
      "max_output_tokens": 64000,
      "supports_extended_thinking": false,
      "supports_json_mode": false,
      "supports_function_calling": false,
      "supports_images": false,
      "max_image_size_mb": 0.0,
      "description": "Local Llama 3.2 model via custom endpoint (Ollama/vLLM) - 128K context window (text-only)",
      "intelligence_score": 6
    }
  ]
}

```

--------------------------------------------------------------------------------
/tests/test_conversation_missing_files.py:
--------------------------------------------------------------------------------

```python
"""
Test conversation memory handling of missing files.

Following existing test patterns to ensure conversation memory gracefully
handles missing files without crashing.
"""

from unittest.mock import Mock

from utils.conversation_memory import (
    ConversationTurn,
    ThreadContext,
    build_conversation_history,
)


class TestConversationMissingFiles:
    """Test handling of missing files during conversation memory reconstruction."""

    def test_build_conversation_history_handles_missing_files(self):
        """Test that conversation history building handles missing files gracefully."""

        # Create conversation context with missing file reference (following existing test patterns)
        context = ThreadContext(
            thread_id="test-thread",
            created_at="2024-01-01T00:00:00Z",
            last_updated_at="2024-01-01T00:05:00Z",
            tool_name="analyze",
            turns=[
                ConversationTurn(
                    role="user",
                    content="Please analyze this file",
                    timestamp="2024-01-01T00:01:00Z",
                    files=["/nonexistent/missing_file.py"],  # File that doesn't exist
                    tool_name="analyze",
                ),
                ConversationTurn(
                    role="assistant",
                    content="Here's my analysis...",
                    timestamp="2024-01-01T00:02:00Z",
                    tool_name="analyze",
                ),
            ],
            initial_context={"path": "/nonexistent/missing_file.py"},
        )

        # Mock model context (following existing test patterns)
        mock_model_context = Mock()
        mock_model_context.calculate_token_allocation.return_value = Mock(file_tokens=50000, history_tokens=50000)
        mock_model_context.estimate_tokens.return_value = 100
        mock_model_context.model_name = "test-model"

        # Should not crash, should handle missing file gracefully
        history, tokens = build_conversation_history(context, mock_model_context)

        # Should return valid history despite missing file
        assert isinstance(history, str)
        assert isinstance(tokens, int)
        assert len(history) > 0

        # Should contain conversation content
        assert "CONVERSATION HISTORY" in history
        assert "Please analyze this file" in history
        assert "Here's my analysis" in history

```

--------------------------------------------------------------------------------
/tests/test_workflow_prompt_size_validation_simple.py:
--------------------------------------------------------------------------------

```python
"""Integration tests for workflow step size validation.

These tests exercise the debug workflow tool end-to-end to ensure that step size
validation operates on the real execution path rather than mocked helpers.
"""

from __future__ import annotations

import json

import pytest

from config import MCP_PROMPT_SIZE_LIMIT
from tools.debug import DebugIssueTool
from tools.shared.exceptions import ToolExecutionError


def build_debug_arguments(**overrides) -> dict[str, object]:
    """Create a minimal set of workflow arguments for DebugIssueTool."""

    base_arguments: dict[str, object] = {
        "step": "Investigate the authentication issue in the login module",
        "step_number": 1,
        "total_steps": 3,
        "next_step_required": True,
        "findings": "Initial observations about the login failure",
        "files_checked": [],
        "relevant_files": [],
        "relevant_context": [],
        "issues_found": [],
        "confidence": "low",
        "use_assistant_model": False,
        # WorkflowRequest accepts optional fields; leave hypothesis/continuation unset
    }

    base_arguments.update(overrides)
    return base_arguments


@pytest.mark.asyncio
async def test_workflow_tool_accepts_normal_step_content() -> None:
    """Verify a typical step executes through the real workflow path."""

    tool = DebugIssueTool()
    arguments = build_debug_arguments()

    responses = await tool.execute(arguments)
    assert len(responses) == 1

    payload = json.loads(responses[0].text)
    assert payload["status"] == "pause_for_investigation"
    assert payload["step_number"] == 1
    assert "error" not in payload


@pytest.mark.asyncio
async def test_workflow_tool_rejects_oversized_step_with_guidance() -> None:
    """Large step content should trigger the size safeguard with helpful guidance."""

    oversized_step = "Investigate this issue: " + ("A" * (MCP_PROMPT_SIZE_LIMIT + 1000))
    tool = DebugIssueTool()
    arguments = build_debug_arguments(step=oversized_step)

    with pytest.raises(ToolExecutionError) as exc_info:
        await tool.execute(arguments)

    output_payload = json.loads(exc_info.value.payload)

    assert output_payload["status"] == "resend_prompt"
    assert output_payload["metadata"]["prompt_size"] > MCP_PROMPT_SIZE_LIMIT

    guidance = output_payload["content"].lower()
    assert "shorter instructions" in guidance
    assert "file paths" in guidance

```

--------------------------------------------------------------------------------
/tests/test_provider_retry_logic.py:
--------------------------------------------------------------------------------

```python
"""Tests covering shared retry behaviour for providers."""

from types import SimpleNamespace

import pytest

from providers.openai import OpenAIModelProvider


def _mock_chat_response(content: str = "retry success") -> SimpleNamespace:
    """Create a minimal chat completion response for tests."""

    usage = SimpleNamespace(prompt_tokens=10, completion_tokens=5, total_tokens=15)
    message = SimpleNamespace(content=content)
    choice = SimpleNamespace(message=message, finish_reason="stop")
    return SimpleNamespace(choices=[choice], model="gpt-4.1", id="resp-1", created=123, usage=usage)


def test_openai_provider_retries_on_transient_error(monkeypatch):
    """Provider should retry once for retryable errors and eventually succeed."""

    monkeypatch.setattr("providers.base.time.sleep", lambda _: None)

    provider = OpenAIModelProvider(api_key="test-key")

    attempts = {"count": 0}

    def create_completion(**kwargs):
        attempts["count"] += 1
        if attempts["count"] == 1:
            raise RuntimeError("temporary network interruption")
        return _mock_chat_response("second attempt response")

    provider._client = SimpleNamespace(
        chat=SimpleNamespace(completions=SimpleNamespace(create=create_completion)),
        responses=SimpleNamespace(create=lambda **_: None),
    )

    result = provider.generate_content("hello", "gpt-4.1")

    assert attempts["count"] == 2, "Expected a retry before succeeding"
    assert result.content == "second attempt response"


def test_openai_provider_bails_on_non_retryable_error(monkeypatch):
    """Provider should stop immediately when the error is marked non-retryable."""

    monkeypatch.setattr("providers.base.time.sleep", lambda _: None)

    provider = OpenAIModelProvider(api_key="test-key")

    attempts = {"count": 0}

    def create_completion(**kwargs):
        attempts["count"] += 1
        raise RuntimeError("context length exceeded 429")

    provider._client = SimpleNamespace(
        chat=SimpleNamespace(completions=SimpleNamespace(create=create_completion)),
        responses=SimpleNamespace(create=lambda **_: None),
    )

    monkeypatch.setattr(
        OpenAIModelProvider,
        "_is_error_retryable",
        lambda self, error: False,
    )

    with pytest.raises(RuntimeError) as excinfo:
        provider.generate_content("hello", "gpt-4.1")

    assert "after 1 attempt" in str(excinfo.value)
    assert attempts["count"] == 1

```

--------------------------------------------------------------------------------
/tests/test_clink_integration.py:
--------------------------------------------------------------------------------

```python
import json
import os
import shutil

import pytest

from tools.clink import CLinkTool


@pytest.mark.integration
@pytest.mark.asyncio
async def test_clink_gemini_single_digit_sum():
    if shutil.which("gemini") is None:
        pytest.skip("gemini CLI is not installed or on PATH")

    if not (os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")):
        pytest.skip("Gemini API key is not configured")

    tool = CLinkTool()
    prompt = "Respond with a single digit equal to the sum of 2 + 2. Output only that digit."

    results = await tool.execute(
        {
            "prompt": prompt,
            "cli_name": "gemini",
            "role": "default",
            "absolute_file_paths": [],
            "images": [],
        }
    )

    assert results, "clink tool returned no outputs"
    payload = json.loads(results[0].text)
    status = payload["status"]
    assert status in {"success", "continuation_available"}

    content = payload.get("content", "").strip()
    assert content == "4"

    if status == "continuation_available":
        offer = payload.get("continuation_offer") or {}
        assert offer.get("continuation_id"), "Expected continuation metadata when status indicates availability"


@pytest.mark.integration
@pytest.mark.asyncio
async def test_clink_claude_single_digit_sum():
    if shutil.which("claude") is None:
        pytest.skip("claude CLI is not installed or on PATH")

    tool = CLinkTool()
    prompt = "Respond with a single digit equal to the sum of 2 + 2. Output only that digit."

    results = await tool.execute(
        {
            "prompt": prompt,
            "cli_name": "claude",
            "role": "default",
            "absolute_file_paths": [],
            "images": [],
        }
    )

    assert results, "clink tool returned no outputs"
    payload = json.loads(results[0].text)
    status = payload["status"]

    if status == "error":
        metadata = payload.get("metadata") or {}
        reason = payload.get("content") or metadata.get("message") or "Claude CLI reported an error"
        pytest.skip(f"Skipping Claude integration test: {reason}")

    assert status in {"success", "continuation_available"}

    content = payload.get("content", "").strip()
    assert content == "4"

    if status == "continuation_available":
        offer = payload.get("continuation_offer") or {}
        assert offer.get("continuation_id"), "Expected continuation metadata when status indicates availability"

```

--------------------------------------------------------------------------------
/tests/test_clink_codex_agent.py:
--------------------------------------------------------------------------------

```python
import asyncio
import shutil
from pathlib import Path

import pytest

from clink.agents.base import CLIAgentError
from clink.agents.codex import CodexAgent
from clink.models import ResolvedCLIClient, ResolvedCLIRole


class DummyProcess:
    def __init__(self, *, stdout: bytes = b"", stderr: bytes = b"", returncode: int = 0):
        self._stdout = stdout
        self._stderr = stderr
        self.returncode = returncode

    async def communicate(self, _input):
        return self._stdout, self._stderr


@pytest.fixture()
def codex_agent():
    prompt_path = Path("systemprompts/clink/codex_default.txt").resolve()
    role = ResolvedCLIRole(name="default", prompt_path=prompt_path, role_args=[])
    client = ResolvedCLIClient(
        name="codex",
        executable=["codex"],
        internal_args=["exec"],
        config_args=["--json", "--dangerously-bypass-approvals-and-sandbox"],
        env={},
        timeout_seconds=30,
        parser="codex_jsonl",
        roles={"default": role},
        output_to_file=None,
        working_dir=None,
    )
    return CodexAgent(client), role


async def _run_agent_with_process(monkeypatch, agent, role, process):
    async def fake_create_subprocess_exec(*_args, **_kwargs):
        return process

    def fake_which(executable_name):
        return f"/usr/bin/{executable_name}"

    monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create_subprocess_exec)
    monkeypatch.setattr(shutil, "which", fake_which)
    return await agent.run(role=role, prompt="do something", files=[], images=[])


@pytest.mark.asyncio
async def test_codex_agent_recovers_jsonl(monkeypatch, codex_agent):
    agent, role = codex_agent
    stdout = b"""
{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"Hello from Codex"}}
{"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":5}}
"""
    process = DummyProcess(stdout=stdout, returncode=124)
    result = await _run_agent_with_process(monkeypatch, agent, role, process)

    assert result.returncode == 124
    assert "Hello from Codex" in result.parsed.content
    assert result.parsed.metadata["usage"]["output_tokens"] == 5


@pytest.mark.asyncio
async def test_codex_agent_propagates_invalid_json(monkeypatch, codex_agent):
    agent, role = codex_agent
    stdout = b"not json"
    process = DummyProcess(stdout=stdout, returncode=1)

    with pytest.raises(CLIAgentError):
        await _run_agent_with_process(monkeypatch, agent, role, process)

```

--------------------------------------------------------------------------------
/clink/agents/gemini.py:
--------------------------------------------------------------------------------

```python
"""Gemini-specific CLI agent hooks."""

from __future__ import annotations

import json
from typing import Any

from clink.models import ResolvedCLIClient
from clink.parsers.base import ParsedCLIResponse

from .base import AgentOutput, BaseCLIAgent


class GeminiAgent(BaseCLIAgent):
    """Gemini-specific behaviour."""

    def __init__(self, client: ResolvedCLIClient):
        super().__init__(client)

    def _recover_from_error(
        self,
        *,
        returncode: int,
        stdout: str,
        stderr: str,
        sanitized_command: list[str],
        duration_seconds: float,
        output_file_content: str | None,
    ) -> AgentOutput | None:
        combined = "\n".join(part for part in (stderr, stdout) if part)
        if not combined:
            return None

        brace_index = combined.find("{")
        if brace_index == -1:
            return None

        json_candidate = combined[brace_index:]
        try:
            payload: dict[str, Any] = json.loads(json_candidate)
        except json.JSONDecodeError:
            return None

        error_block = payload.get("error")
        if not isinstance(error_block, dict):
            return None

        code = error_block.get("code")
        err_type = error_block.get("type")
        detail_message = error_block.get("message")

        prologue = combined[:brace_index].strip()
        lines: list[str] = []
        if prologue and (not detail_message or prologue not in detail_message):
            lines.append(prologue)
        if detail_message:
            lines.append(detail_message)

        header = "Gemini CLI reported a tool failure"
        if code:
            header = f"{header} ({code})"
        elif err_type:
            header = f"{header} ({err_type})"

        content_lines = [header.rstrip(".") + "."]
        content_lines.extend(lines)
        message = "\n".join(content_lines).strip()

        metadata = {
            "cli_error_recovered": True,
            "cli_error_code": code,
            "cli_error_type": err_type,
            "cli_error_payload": payload,
        }

        parsed = ParsedCLIResponse(content=message or header, metadata=metadata)
        return AgentOutput(
            parsed=parsed,
            sanitized_command=sanitized_command,
            returncode=returncode,
            stdout=stdout,
            stderr=stderr,
            duration_seconds=duration_seconds,
            parser_name=self._parser.name,
            output_file_content=output_file_content,
        )

```

--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------

```yaml
services:
  zen-mcp:
    build:
      context: .
      dockerfile: Dockerfile
      target: runtime
    image: zen-mcp-server:latest
    container_name: zen-mcp-server
    
    # Container labels for traceability
    labels:
      - "com.zen-mcp.service=zen-mcp-server"
      - "com.zen-mcp.version=1.0.0"
      - "com.zen-mcp.environment=production"
      - "com.zen-mcp.description=AI-powered Model Context Protocol server"
    
    # Environment variables
    environment:
      # Default model configuration
      - DEFAULT_MODEL=${DEFAULT_MODEL:-auto}
      
      # API Keys (use Docker secrets in production)
      - GEMINI_API_KEY=${GEMINI_API_KEY}
      - GOOGLE_API_KEY=${GOOGLE_API_KEY}
      - OPENAI_API_KEY=${OPENAI_API_KEY}
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
      - XAI_API_KEY=${XAI_API_KEY}
      - DIAL_API_KEY=${DIAL_API_KEY}
      - DIAL_API_HOST=${DIAL_API_HOST}
      - DIAL_API_VERSION=${DIAL_API_VERSION}
      - OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
      - CUSTOM_API_URL=${CUSTOM_API_URL}
      - CUSTOM_API_KEY=${CUSTOM_API_KEY}
      - CUSTOM_MODEL_NAME=${CUSTOM_MODEL_NAME}
      
      # Logging configuration
      - LOG_LEVEL=${LOG_LEVEL:-INFO}
      - LOG_MAX_SIZE=${LOG_MAX_SIZE:-10MB}
      - LOG_BACKUP_COUNT=${LOG_BACKUP_COUNT:-5}
      
      # Advanced configuration
      - DEFAULT_THINKING_MODE_THINKDEEP=${DEFAULT_THINKING_MODE_THINKDEEP:-high}
      - DISABLED_TOOLS=${DISABLED_TOOLS}
      - MAX_MCP_OUTPUT_TOKENS=${MAX_MCP_OUTPUT_TOKENS}
      
      # Server configuration
      - PYTHONUNBUFFERED=1
      - PYTHONPATH=/app
      - TZ=${TZ:-UTC}
    
    # Volumes for persistent data
    volumes:
      - ./logs:/app/logs
      - zen-mcp-config:/app/conf
      - /etc/localtime:/etc/localtime:ro
    
    # Network configuration
    networks:
      - zen-network
    
    # Resource limits
    deploy:
      resources:
        limits:
          memory: 512M
          cpus: '0.5'
        reservations:
          memory: 256M
          cpus: '0.25'
    
    # Health check
    healthcheck:
      test: ["CMD", "python", "/usr/local/bin/healthcheck.py"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
    
    # Restart policy
    restart: unless-stopped
    
    # Security
    security_opt:
      - no-new-privileges:true
    read_only: true
    tmpfs:
      - /tmp:noexec,nosuid,size=100m
      - /app/tmp:noexec,nosuid,size=50m

# Named volumes
volumes:
  zen-mcp-config:
    driver: local

# Networks
networks:
  zen-network:
    driver: bridge
    ipam:
      config:
        - subnet: 172.20.0.0/16

```

--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
# ===========================================
# STAGE 1: Build dependencies
# ===========================================
FROM python:3.11-slim AS builder

# Install system dependencies for building
RUN apt-get update && apt-get install -y \
    build-essential \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Set working directory
WORKDIR /app

# Copy requirements files
COPY requirements.txt ./

# Create virtual environment and install dependencies
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

# Install Python dependencies
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
    pip install --no-cache-dir -r requirements.txt

# ===========================================
# STAGE 2: Runtime image
# ===========================================
FROM python:3.11-slim AS runtime

# Add metadata labels for traceability
LABEL maintainer="Zen MCP Server Team"
LABEL version="1.0.0"
LABEL description="Zen MCP Server - AI-powered Model Context Protocol server"
LABEL org.opencontainers.image.title="zen-mcp-server"
LABEL org.opencontainers.image.description="AI-powered Model Context Protocol server with multi-provider support"
LABEL org.opencontainers.image.version="1.0.0"
LABEL org.opencontainers.image.source="https://github.com/BeehiveInnovations/zen-mcp-server"
LABEL org.opencontainers.image.documentation="https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/README.md"
LABEL org.opencontainers.image.licenses="Apache 2.0 License"

# Create non-root user for security
RUN groupadd -r zenuser && useradd -r -g zenuser zenuser

# Install minimal runtime dependencies
RUN apt-get update && apt-get install -y \
    ca-certificates \
    procps \
    && rm -rf /var/lib/apt/lists/* \
    && apt-get clean

# Copy virtual environment from builder
COPY --from=builder /opt/venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

# Set working directory
WORKDIR /app

# Copy application code
COPY --chown=zenuser:zenuser . .

# Create logs directory with proper permissions
RUN mkdir -p logs && chown -R zenuser:zenuser logs

# Create tmp directory for container operations
RUN mkdir -p tmp && chown -R zenuser:zenuser tmp

# Copy health check script
COPY --chown=zenuser:zenuser docker/scripts/healthcheck.py /usr/local/bin/healthcheck.py
RUN chmod +x /usr/local/bin/healthcheck.py

# Switch to non-root user
USER zenuser

# Health check configuration
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD python /usr/local/bin/healthcheck.py

# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV PYTHONPATH=/app

# Default command
CMD ["python", "server.py"]

```

--------------------------------------------------------------------------------
/run_integration_tests.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash

# Zen MCP Server - Run Integration Tests
# This script runs integration tests that require API keys
# Run this locally on your Mac to ensure everything works end-to-end

set -e  # Exit on any error

echo "🧪 Running Integration Tests for Zen MCP Server"
echo "=============================================="
echo "These tests use real API calls with your configured keys"
echo ""

# Activate virtual environment
if [[ -f ".zen_venv/bin/activate" ]]; then
    source .zen_venv/bin/activate
    echo "✅ Using virtual environment"
else
    echo "❌ No virtual environment found!"
    echo "Please run: ./run-server.sh first"
    exit 1
fi

# Check for .env file
if [[ ! -f ".env" ]]; then
    echo "⚠️  Warning: No .env file found. Integration tests may fail without API keys."
    echo ""
fi

echo "🔑 Checking API key availability:"
echo "---------------------------------"

# Check which API keys are available
if [[ -n "$GEMINI_API_KEY" ]] || grep -q "GEMINI_API_KEY=" .env 2>/dev/null; then
    echo "✅ GEMINI_API_KEY configured"
else
    echo "❌ GEMINI_API_KEY not found"
fi

if [[ -n "$OPENAI_API_KEY" ]] || grep -q "OPENAI_API_KEY=" .env 2>/dev/null; then
    echo "✅ OPENAI_API_KEY configured"
else
    echo "❌ OPENAI_API_KEY not found"
fi

if [[ -n "$XAI_API_KEY" ]] || grep -q "XAI_API_KEY=" .env 2>/dev/null; then
    echo "✅ XAI_API_KEY configured"
else
    echo "❌ XAI_API_KEY not found"
fi

if [[ -n "$OPENROUTER_API_KEY" ]] || grep -q "OPENROUTER_API_KEY=" .env 2>/dev/null; then
    echo "✅ OPENROUTER_API_KEY configured"
else
    echo "❌ OPENROUTER_API_KEY not found"
fi

if [[ -n "$CUSTOM_API_URL" ]] || grep -q "CUSTOM_API_URL=" .env 2>/dev/null; then
    echo "✅ CUSTOM_API_URL configured (local models)"
else
    echo "❌ CUSTOM_API_URL not found"
fi

echo ""

# Run integration tests
echo "🏃 Running integration tests..."
echo "------------------------------"

# Run only integration tests (marked with @pytest.mark.integration)
python -m pytest tests/ -v -m "integration" --tb=short

echo ""
echo "✅ Integration tests completed!"
echo ""

# Also run simulator tests if requested
if [[ "$1" == "--with-simulator" ]]; then
    echo "🤖 Running simulator tests..."
    echo "----------------------------"
    python communication_simulator_test.py --verbose
    echo ""
    echo "✅ Simulator tests completed!"
fi

echo "💡 Tips:"
echo "- Run './run_integration_tests.sh' for integration tests only"
echo "- Run './run_integration_tests.sh --with-simulator' to also run simulator tests"
echo "- Run './code_quality_checks.sh' for unit tests and linting"
echo "- Check logs in logs/mcp_server.log if tests fail"
```

--------------------------------------------------------------------------------
/tests/test_clink_gemini_agent.py:
--------------------------------------------------------------------------------

```python
import asyncio
import shutil
from pathlib import Path

import pytest

from clink.agents.base import CLIAgentError
from clink.agents.gemini import GeminiAgent
from clink.models import ResolvedCLIClient, ResolvedCLIRole


class DummyProcess:
    def __init__(self, *, stdout: bytes = b"", stderr: bytes = b"", returncode: int = 0):
        self._stdout = stdout
        self._stderr = stderr
        self.returncode = returncode

    async def communicate(self, _input):
        return self._stdout, self._stderr


@pytest.fixture()
def gemini_agent():
    prompt_path = Path("systemprompts/clink/gemini_default.txt").resolve()
    role = ResolvedCLIRole(name="default", prompt_path=prompt_path, role_args=[])
    client = ResolvedCLIClient(
        name="gemini",
        executable=["gemini"],
        internal_args=[],
        config_args=[],
        env={},
        timeout_seconds=30,
        parser="gemini_json",
        roles={"default": role},
        output_to_file=None,
        working_dir=None,
    )
    return GeminiAgent(client), role


async def _run_agent_with_process(monkeypatch, agent, role, process):
    async def fake_create_subprocess_exec(*_args, **_kwargs):
        return process

    def fake_which(executable_name):
        return f"/usr/bin/{executable_name}"

    monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create_subprocess_exec)
    monkeypatch.setattr(shutil, "which", fake_which)
    return await agent.run(role=role, prompt="do something", files=[], images=[])


@pytest.mark.asyncio
async def test_gemini_agent_recovers_tool_error(monkeypatch, gemini_agent):
    agent, role = gemini_agent
    error_json = """{
  "error": {
    "type": "FatalToolExecutionError",
    "message": "Error executing tool replace: Failed to edit",
    "code": "edit_expected_occurrence_mismatch"
  }
}"""
    stderr = ("Error: Failed to edit, expected 1 occurrence but found 2.\n" + error_json).encode()
    process = DummyProcess(stderr=stderr, returncode=54)

    result = await _run_agent_with_process(monkeypatch, agent, role, process)

    assert result.returncode == 54
    assert result.parsed.metadata["cli_error_recovered"] is True
    assert result.parsed.metadata["cli_error_code"] == "edit_expected_occurrence_mismatch"
    assert "Gemini CLI reported a tool failure" in result.parsed.content


@pytest.mark.asyncio
async def test_gemini_agent_propagates_unrecoverable_error(monkeypatch, gemini_agent):
    agent, role = gemini_agent
    stderr = b"Plain failure without structured payload"
    process = DummyProcess(stderr=stderr, returncode=54)

    with pytest.raises(CLIAgentError):
        await _run_agent_with_process(monkeypatch, agent, role, process)

```

--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/tool_addition.yml:
--------------------------------------------------------------------------------

```yaml
name: 🛠️ New Gemini Tool Proposal
description: Propose a new Zen MCP tool (e.g., `summarize`, `fixer`, `refactor`)
labels: ["enhancement", "new-tool"]
body:
  - type: input
    id: tool-name
    attributes:
      label: Proposed Tool Name
      description: "What would the tool be called? (e.g., `summarize`, `docgen`, `refactor`)"
      placeholder: "e.g., `docgen`"
    validations:
      required: true

  - type: textarea
    id: purpose
    attributes:
      label: What is the primary purpose of this tool?
      description: "Explain the tool's core function and the value it provides to developers using Claude + Zen."
      placeholder: "This tool will automatically generate comprehensive documentation from code, extracting class and function signatures, docstrings, and creating usage examples."
    validations:
      required: true

  - type: textarea
    id: example-usage
    attributes:
      label: Example Usage in Claude Desktop
      description: "Show how a user would invoke this tool through Claude and what the expected output would look like."
      placeholder: |
        **User prompt to Claude:**
        "Use zen to generate documentation for my entire src/ directory"

        **Expected behavior:**
        - Analyze all Python files in src/
        - Extract classes, functions, and their docstrings
        - Generate structured markdown documentation
        - Include usage examples where possible
        - Return organized documentation with table of contents
      render: markdown
    validations:
      required: true

  - type: dropdown
    id: tool-category
    attributes:
      label: Tool Category
      description: What category does this tool fit into?
      options:
        - Code Analysis (like analyze)
        - Code Quality (like codereview)
        - Code Generation/Refactoring
        - Documentation Generation
        - Testing Support
        - Debugging Support (like debug)
        - Workflow Automation
        - Architecture Planning (like thinkdeep)
        - Other
    validations:
      required: true

  - type: textarea
    id: system-prompt
    attributes:
      label: Proposed System Prompt (Optional)
      description: "If you have ideas for how zen should be prompted for this tool, share them here."
      placeholder: |
        You are an expert technical documentation generator. Your task is to create comprehensive, user-friendly documentation from source code...

  - type: checkboxes
    id: contribution
    attributes:
      label: Contribution
      options:
        - label: I am willing to submit a Pull Request to implement this new tool.
        - label: I have checked that this tool doesn't overlap significantly with existing tools (analyze, codereview, debug, thinkdeep, chat).


```

--------------------------------------------------------------------------------
/conf/azure_models.json:
--------------------------------------------------------------------------------

```json
{
  "_README": {
    "description": "Model metadata for Azure OpenAI / Azure AI Foundry-backed provider. The `models` definition can be copied from openrouter_models.json / custom_models.json",
    "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/azure_models.md",
    "usage": "Models listed here are exposed through Azure AI Foundry. Aliases are case-insensitive.",
    "field_notes": "Matches providers/shared/model_capabilities.py.",
    "field_descriptions": {
      "model_name": "The model identifier e.g., 'gpt-4'",
      "deployment": "Azure model deployment name",
      "aliases": "Array of short names users can type instead of the full model name",
      "context_window": "Total number of tokens the model can process (input + output combined)",
      "max_output_tokens": "Maximum number of tokens the model can generate in a single response",
      "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
      "supports_json_mode": "Whether the model can guarantee valid JSON output",
      "supports_function_calling": "Whether the model supports function/tool calling",
      "supports_images": "Whether the model can process images/visual input",
      "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
      "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
      "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
      "use_openai_response_api": "Set to true when the deployment must call Azure's /responses endpoint (O-series reasoning models). Leave false/omit for standard chat completions.",
      "default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
      "description": "Human-readable description of the model",
      "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
    }
  },
  "_example_models": [
    {
      "model_name": "gpt-4",
      "deployment": "gpt-4",
      "aliases": [
        "gpt4"
      ],
      "context_window": 128000,
      "max_output_tokens": 16384,
      "supports_extended_thinking": false,
      "supports_json_mode": true,
      "supports_function_calling": false,
      "supports_images": false,
      "max_image_size_mb": 0.0,
      "supports_temperature": false,
      "temperature_constraint": "fixed",
      "use_openai_response_api": false,
      "description": "GPT-4 (128K context, 16K output)",
      "intelligence_score": 10
    }
  ],
  "models": []
}

```

--------------------------------------------------------------------------------
/docs/troubleshooting.md:
--------------------------------------------------------------------------------

```markdown
# Troubleshooting Guide

## Quick Debugging Steps

If you're experiencing issues with the Zen MCP Server, follow these steps:

### 1. Check MCP Connection

Open Claude Desktop and type `/mcp` to see if zen is connected:
- ✅ If zen appears in the list, the connection is working
- ❌ If not listed or shows an error, continue to step 2

### 2. Launch Claude with Debug Mode

Close Claude Desktop and restart with debug logging:

```bash
# macOS/Linux
claude --debug

# Windows (in WSL2)
claude.exe --debug
```

Look for error messages in the console output, especially:
- API key errors
- Python/environment issues
- File permission errors

### 3. Verify API Keys

Check that your API keys are properly set:

```bash
# Check your .env file
cat .env

# Ensure at least one key is set:
# GEMINI_API_KEY=your-key-here
# OPENAI_API_KEY=your-key-here
```

If you need to update your API keys, edit the `.env` file and then restart Claude for changes to take effect.

### 4. Check Server Logs

View the server logs for detailed error information:

```bash
# View recent logs
tail -n 100 logs/mcp_server.log

# Follow logs in real-time
tail -f logs/mcp_server.log

# Or use the -f flag when starting to automatically follow logs
./run-server.sh -f

# Search for errors
grep "ERROR" logs/mcp_server.log
```

See [Logging Documentation](logging.md) for more details on accessing logs.

### 5. Common Issues

**"Connection failed" in Claude Desktop**
- Ensure the server path is correct in your Claude config
- Run `./run-server.sh` to verify setup and see configuration
- Check that Python is installed: `python3 --version`

**"API key environment variable is required"**
- Add your API key to the `.env` file
- Restart Claude Desktop after updating `.env`

**File path errors**
- Always use absolute paths: `/Users/you/project/file.py`
- Never use relative paths: `./file.py`

**Python module not found**
- Run `./run-server.sh` to reinstall dependencies
- Check virtual environment is activated: should see `.zen_venv` in the Python path

### 6. Environment Issues

**Virtual Environment Problems**
```bash
# Reset environment completely
rm -rf .zen_venv
./run-server.sh
```

**Permission Issues**
```bash
# Ensure script is executable
chmod +x run-server.sh
```

### 7. Still Having Issues?

If the problem persists after trying these steps:

1. **Reproduce the issue** - Note the exact steps that cause the problem
2. **Collect logs** - Save relevant error messages from Claude debug mode and server logs
3. **Open a GitHub issue** with:
   - Your operating system
   - Python version: `python3 --version`
   - Error messages from logs
   - Steps to reproduce
   - What you've already tried

## Windows Users

**Important**: Windows users must use WSL2. Install it with:

```powershell
wsl --install -d Ubuntu
```

Then follow the standard setup inside WSL2.
```

--------------------------------------------------------------------------------
/docs/model_ranking.md:
--------------------------------------------------------------------------------

```markdown
# Model Capability Ranking

Auto mode needs a short, trustworthy list of models to suggest. The server
computes a capability rank for every model at runtime using a simple recipe:

1. Start with the human-supplied `intelligence_score` (1–20). This is the
   anchor—multiply it by five to map onto the 0–100 scale the server uses.
2. Add a few light bonuses for hard capabilities:
   - **Context window:** up to +5 (log-scale bonus when the model exceeds ~1K tokens).
   - **Output budget:** +2 for ≥65K tokens, +1 for ≥32K.
   - **Extended thinking:** +3 when the provider supports it.
   - **Function calling / JSON / images:** +1 each when available.
   - **Custom endpoints:** −1 to nudge cloud-hosted defaults ahead unless tuned.
3. Clamp the final score to 0–100 so downstream callers can rely on the range.

In code this looks like:

```python
base = clamp(intelligence_score, 1, 20) * 5
ctx_bonus = min(5, max(0, log10(context_window) - 3))
output_bonus = 2 if max_output_tokens >= 65_000 else 1 if >= 32_000 else 0
feature_bonus = (
    (3 if supports_extended_thinking else 0)
    + (1 if supports_function_calling else 0)
    + (1 if supports_json_mode else 0)
    + (1 if supports_images else 0)
)
penalty = 1 if provider == CUSTOM else 0

effective_rank = clamp(base + ctx_bonus + output_bonus + feature_bonus - penalty, 0, 100)
```

The bonuses are intentionally small—the human intelligence score does most
of the work so you can enforce organisational preferences easily.

## Picking an intelligence score

A straightforward rubric that mirrors typical provider tiers:

| Intelligence | Guidance |
|--------------|----------|
| 18–19 | Frontier reasoning models (Gemini 2.5 Pro, GPT‑5) |
| 15–17 | Strong general models with large context (O3 Pro, DeepSeek R1) |
| 12–14 | Balanced assistants (Claude Opus/Sonnet, Mistral Large) |
| 9–11  | Fast distillations (Gemini Flash, GPT-5 Mini, Mistral medium) |
| 6–8   | Local or efficiency-focused models (Llama 3 70B, Claude Haiku) |
| ≤5    | Experimental/lightweight models |

Record the reasoning for your scores so future updates stay consistent.

## How the rank is used

The ranked list is cached per provider and consumed by:
- Tool schemas (`model` parameter descriptions) when auto mode is active.
- The `listmodels` tool’s “top models” sections.
- Fallback messaging when a requested model is unavailable.

Because the rank is computed after restriction filters, only allowed models
appear in these summaries.

## Customising further

If you need a different weighting you can:
- Override `intelligence_score` in your provider or custom model config.
- Subclass the provider and override `get_effective_capability_rank()`.
- Post-process the rank via `get_capabilities_by_rank()` before surfacing it.

Most teams find that adjusting `intelligence_score` alone is enough to keep
auto mode honest without revisiting code.

```

--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------

```markdown
## PR Title Format

**Please ensure your PR title follows [Conventional Commits](https://www.conventionalcommits.org/) format:**

### Version Bumping Types (trigger semantic release):
- `feat: <description>` - New features → **MINOR** version bump (1.1.0 → 1.2.0)
- `fix: <description>` - Bug fixes → **PATCH** version bump (1.1.0 → 1.1.1) 
- `perf: <description>` - Performance improvements → **PATCH** version bump (1.1.0 → 1.1.1)

### Breaking Changes (trigger MAJOR version bump):
For breaking changes, use any commit type above with `BREAKING CHANGE:` in the commit body or `!` after the type:
- `feat!: <description>` → **MAJOR** version bump (1.1.0 → 2.0.0)
- `fix!: <description>` → **MAJOR** version bump (1.1.0 → 2.0.0)

### Non-Versioning Types (no release):
- `build: <description>` - Build system changes
- `chore: <description>` - Maintenance tasks
- `ci: <description>` - CI/CD changes
- `docs: <description>` - Documentation only
- `refactor: <description>` - Code refactoring (no functional changes)
- `style: <description>` - Code style/formatting changes
- `test: <description>` - Test additions/changes

### Docker Build Triggering:

Docker builds are **independent** of versioning and trigger based on:

**Automatic**: When PRs modify relevant files:
- Python files (`*.py`), `requirements*.txt`, `pyproject.toml`
- Docker files (`Dockerfile`, `docker-compose.yml`, `.dockerignore`)

**Manual**: Add the `docker-build` label to force builds for any PR.

## Description

Please provide a clear and concise description of what this PR does.

## Changes Made

- [ ] List the specific changes made
- [ ] Include any breaking changes
- [ ] Note any dependencies added/removed

## Testing

**Please review our [Testing Guide](../docs/testing.md) before submitting.**

### Run all linting and tests (required):
```bash
# Activate virtual environment first
source venv/bin/activate

# Run comprehensive code quality checks (recommended)
./code_quality_checks.sh

# If you made tool changes, also run simulator tests
python communication_simulator_test.py
```

- [ ] All linting passes (ruff, black, isort)
- [ ] All unit tests pass
- [ ] **For new features**: Unit tests added in `tests/`
- [ ] **For tool changes**: Simulator tests added in `simulator_tests/`
- [ ] **For bug fixes**: Tests added to prevent regression
- [ ] Simulator tests pass (if applicable)
- [ ] Manual testing completed with realistic scenarios

## Related Issues

Fixes #(issue number)

## Checklist

- [ ] PR title follows the format guidelines above
- [ ] **Activated venv and ran code quality checks: `source venv/bin/activate && ./code_quality_checks.sh`**
- [ ] Self-review completed
- [ ] **Tests added for ALL changes** (see Testing section above)
- [ ] Documentation updated as needed
- [ ] All unit tests passing
- [ ] Relevant simulator tests passing (if tool changes)
- [ ] Ready for review

## Additional Notes

Any additional information that reviewers should know.
```

--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------

```toml
[project]
name = "zen-mcp-server"
version = "9.1.3"
description = "AI-powered MCP server with multiple model providers"
requires-python = ">=3.9"
dependencies = [
    "mcp>=1.0.0",
    "google-genai>=1.19.0",
    "openai>=1.55.2",
    "pydantic>=2.0.0",
    "python-dotenv>=1.0.0",
]

[tool.setuptools.packages.find]
include = ["tools*", "providers*", "systemprompts*", "utils*", "conf*", "clink*"]

[tool.setuptools]
py-modules = ["server", "config"]

[tool.setuptools.package-data]
"*" = ["conf/*.json"]

[tool.setuptools.data-files]
"conf" = [
    "conf/custom_models.json",
    "conf/openrouter_models.json",
    "conf/azure_models.json",
    "conf/openai_models.json",
    "conf/gemini_models.json",
    "conf/xai_models.json",
    "conf/dial_models.json",
]

[project.scripts]
zen-mcp-server = "server:run"

[tool.black]
line-length = 120
target-version = ['py39', 'py310', 'py311', 'py312', 'py313']
include = '\.pyi?$'
extend-exclude = '''
/(
  # directories
  \.eggs
  | \.git
  | \.hg
  | \.mypy_cache
  | \.tox
  | \.venv
  | \.zen_venv
  | venv
  | _build
  | buck-out
  | build
  | dist
)/
'''

[tool.isort]
profile = "black"
multi_line_output = 3
include_trailing_comma = true
force_grid_wrap = 0
use_parentheses = true
ensure_newline_before_comments = true
line_length = 120
skip_glob = ["venv/*", ".venv/*", ".zen_venv/*"]

[tool.ruff]
target-version = "py39"
line-length = 120

[tool.ruff.lint]
select = [
    "E",  # pycodestyle errors
    "W",  # pycodestyle warnings
    "F",  # pyflakes
    "I",  # isort
    "B",  # flake8-bugbear
    "C4", # flake8-comprehensions
    "UP", # pyupgrade
]
ignore = [
    "E501",  # line too long, handled by black
    "B008",  # do not perform function calls in argument defaults
    "C901",  # too complex
    "B904",  # exception handling with raise from
]

[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]
"tests/*" = ["B011"]
"tests/conftest.py" = ["E402"]  # Module level imports not at top of file - needed for test setup

[tool.semantic_release]
version_toml = ["pyproject.toml:project.version"]
branch = "main"
version_source = "tag"
version_pattern = "v(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
major_on_zero = false
build_command = "python -m pip install --upgrade build && python -m build"
dist_path = "dist/"
upload_to_vcs_release = true
upload_to_repository = false
remove_dist = false
commit_version_number = true
commit_message = "chore(release): {version}\n\nAutomatically generated by python-semantic-release"
tag_format = "v{version}"

[tool.semantic_release.branches.main]
match = "main"
prerelease = false

[tool.semantic_release.changelog]
exclude_commit_patterns = []

[tool.semantic_release.commit_parser_options]
allowed_tags = ["build", "chore", "ci", "docs", "feat", "fix", "perf", "style", "refactor", "test"]
minor_tags = ["feat"]
patch_tags = ["fix", "perf"]

[tool.semantic_release.remote.token]
env = "GH_TOKEN"

[build-system]
requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"]
build-backend = "setuptools.build_meta"

```

--------------------------------------------------------------------------------
/docker/scripts/deploy.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash
set -euo pipefail

# Colors for output
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m'

echo -e "${GREEN}=== Deploying Zen MCP Server ===${NC}"

# Function to check if required environment variables are set
check_env_vars() {
    # At least one of these API keys must be set
    local required_vars=("GEMINI_API_KEY" "GOOGLE_API_KEY" "OPENAI_API_KEY" "XAI_API_KEY" "DIAL_API_KEY" "OPENROUTER_API_KEY")
    
    local has_api_key=false
    for var in "${required_vars[@]}"; do
        if [[ -n "${!var:-}" ]]; then
            has_api_key=true
            break
        fi
    done

    if [[ "$has_api_key" == false ]]; then
        echo -e "${RED}Error: At least one API key must be set in your .env file${NC}"
        printf '  %s\n' "${required_vars[@]}"
        exit 1
    fi
}

# Load environment variables
if [[ -f .env ]]; then
    set -a
    source .env
    set +a
    echo -e "${GREEN}✓ Environment variables loaded from .env${NC}"
else
    echo -e "${RED}Error: .env file not found${NC}"
    echo -e "${YELLOW}Please copy .env.example to .env and configure your API keys${NC}"
    exit 1
fi

# Check required environment variables
check_env_vars

# Exponential backoff health check function
wait_for_health() {
    local max_attempts=6
    local attempt=1
    local delay=2

    while (( attempt <= max_attempts )); do
        status=$(docker-compose ps -q zen-mcp | xargs docker inspect -f "{{.State.Health.Status}}" 2>/dev/null || echo "unavailable")
        if [[ "$status" == "healthy" ]]; then
            return 0
        fi
        echo -e "${YELLOW}Waiting for service to be healthy... (attempt $attempt/${max_attempts}, retrying in ${delay}s)${NC}"
        sleep $delay
        delay=$(( delay * 2 ))
        attempt=$(( attempt + 1 ))
    done

    echo -e "${RED}Service failed to become healthy after $max_attempts attempts${NC}"
    echo -e "${YELLOW}Checking logs:${NC}"
    docker-compose logs zen-mcp
    exit 1
}

# Create logs directory if it doesn't exist
mkdir -p logs

# Stop existing containers
echo -e "${GREEN}Stopping existing containers...${NC}"
docker-compose down

# Start the services
echo -e "${GREEN}Starting Zen MCP Server...${NC}"
docker-compose up -d

# Wait for health check
echo -e "${GREEN}Waiting for service to be healthy...${NC}"
timeout 60 bash -c 'while [[ "$(docker-compose ps -q zen-mcp | xargs docker inspect -f "{{.State.Health.Status}}")" != "healthy" ]]; do sleep 2; done' || {
    wait_for_health
    echo -e "${RED}Service failed to become healthy${NC}"
    echo -e "${YELLOW}Checking logs:${NC}"
    docker-compose logs zen-mcp
    exit 1
}

echo -e "${GREEN}✓ Zen MCP Server deployed successfully${NC}"
echo -e "${GREEN}Service Status:${NC}"
docker-compose ps

echo -e "${GREEN}=== Deployment Complete ===${NC}"
echo -e "${YELLOW}Useful commands:${NC}"
echo -e "  View logs: ${GREEN}docker-compose logs -f zen-mcp${NC}"
echo -e "  Stop service: ${GREEN}docker-compose down${NC}"
echo -e "  Restart service: ${GREEN}docker-compose restart zen-mcp${NC}"

```

--------------------------------------------------------------------------------
/code_quality_checks.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash

# Zen MCP Server - Code Quality Checks
# This script runs all required linting and testing checks before committing changes.
# ALL checks must pass 100% for CI/CD to succeed.

set -e  # Exit on any error

echo "🔍 Running Code Quality Checks for Zen MCP Server"
echo "================================================="

# Determine Python command
if [[ -f ".zen_venv/bin/python" ]]; then
    PYTHON_CMD=".zen_venv/bin/python"
    PIP_CMD=".zen_venv/bin/pip"
    echo "✅ Using venv"
elif [[ -n "$VIRTUAL_ENV" ]]; then
    PYTHON_CMD="python"
    PIP_CMD="pip"
    echo "✅ Using activated virtual environment: $VIRTUAL_ENV"
else
    echo "❌ No virtual environment found!"
    echo "Please run: ./run-server.sh first to set up the environment"
    exit 1
fi
echo ""

# Check and install dev dependencies if needed
echo "🔍 Checking development dependencies..."
DEV_DEPS_NEEDED=false

# Check each dev dependency
for tool in ruff black isort pytest; do
    # Check if tool exists in venv or in PATH
    if [[ -f ".zen_venv/bin/$tool" ]] || command -v $tool &> /dev/null; then
        continue
    else
        DEV_DEPS_NEEDED=true
        break
    fi
done

if [ "$DEV_DEPS_NEEDED" = true ]; then
    echo "📦 Installing development dependencies..."
    $PIP_CMD install -q -r requirements-dev.txt
    echo "✅ Development dependencies installed"
else
    echo "✅ Development dependencies already installed"
fi

# Set tool paths
if [[ -f ".zen_venv/bin/ruff" ]]; then
    RUFF=".zen_venv/bin/ruff"
    BLACK=".zen_venv/bin/black"
    ISORT=".zen_venv/bin/isort"
    PYTEST=".zen_venv/bin/pytest"
else
    RUFF="ruff"
    BLACK="black"
    ISORT="isort"
    PYTEST="pytest"
fi
echo ""

# Step 1: Linting and Formatting
echo "📋 Step 1: Running Linting and Formatting Checks"
echo "--------------------------------------------------"

echo "🔧 Running ruff linting with auto-fix..."
$RUFF check --fix --exclude test_simulation_files --exclude .zen_venv

echo "🎨 Running black code formatting..."
$BLACK . --exclude="test_simulation_files/" --exclude=".zen_venv/"

echo "📦 Running import sorting with isort..."
$ISORT . --skip-glob=".zen_venv/*" --skip-glob="test_simulation_files/*"

echo "✅ Verifying all linting passes..."
$RUFF check --exclude test_simulation_files --exclude .zen_venv

echo "✅ Step 1 Complete: All linting and formatting checks passed!"
echo ""

# Step 2: Unit Tests
echo "🧪 Step 2: Running Complete Unit Test Suite"
echo "---------------------------------------------"

echo "🏃 Running unit tests (excluding integration tests)..."
$PYTHON_CMD -m pytest tests/ -v -x -m "not integration"

echo "✅ Step 2 Complete: All unit tests passed!"
echo ""

# Step 3: Final Summary
echo "🎉 All Code Quality Checks Passed!"
echo "=================================="
echo "✅ Linting (ruff): PASSED"
echo "✅ Formatting (black): PASSED" 
echo "✅ Import sorting (isort): PASSED"
echo "✅ Unit tests: PASSED"
echo ""
echo "🚀 Your code is ready for commit and GitHub Actions!"
echo "💡 Remember to add simulator tests if you modified tools"
```