beehiveinnovations/gemini-mcp-server # codebase.md

This is page 1 of 25. Use http://codebase.md/beehiveinnovations/gemini-mcp-server?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .claude
│   ├── commands
│   │   └── fix-github-issue.md
│   └── settings.json
├── .coveragerc
├── .dockerignore
├── .env.example
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE
│   │   ├── bug_report.yml
│   │   ├── config.yml
│   │   ├── documentation.yml
│   │   ├── feature_request.yml
│   │   └── tool_addition.yml
│   ├── pull_request_template.md
│   └── workflows
│       ├── docker-pr.yml
│       ├── docker-release.yml
│       ├── semantic-pr.yml
│       ├── semantic-release.yml
│       └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── AGENTS.md
├── CHANGELOG.md
├── claude_config_example.json
├── CLAUDE.md
├── clink
│   ├── __init__.py
│   ├── agents
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── claude.py
│   │   ├── codex.py
│   │   └── gemini.py
│   ├── constants.py
│   ├── models.py
│   ├── parsers
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── claude.py
│   │   ├── codex.py
│   │   └── gemini.py
│   └── registry.py
├── code_quality_checks.ps1
├── code_quality_checks.sh
├── communication_simulator_test.py
├── conf
│   ├── __init__.py
│   ├── azure_models.json
│   ├── cli_clients
│   │   ├── claude.json
│   │   ├── codex.json
│   │   └── gemini.json
│   ├── custom_models.json
│   ├── dial_models.json
│   ├── gemini_models.json
│   ├── openai_models.json
│   ├── openrouter_models.json
│   └── xai_models.json
├── config.py
├── docker
│   ├── README.md
│   └── scripts
│       ├── build.ps1
│       ├── build.sh
│       ├── deploy.ps1
│       ├── deploy.sh
│       └── healthcheck.py
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── adding_providers.md
│   ├── adding_tools.md
│   ├── advanced-usage.md
│   ├── ai_banter.md
│   ├── ai-collaboration.md
│   ├── azure_openai.md
│   ├── configuration.md
│   ├── context-revival.md
│   ├── contributions.md
│   ├── custom_models.md
│   ├── docker-deployment.md
│   ├── gemini-setup.md
│   ├── getting-started.md
│   ├── index.md
│   ├── locale-configuration.md
│   ├── logging.md
│   ├── model_ranking.md
│   ├── testing.md
│   ├── tools
│   │   ├── analyze.md
│   │   ├── apilookup.md
│   │   ├── challenge.md
│   │   ├── chat.md
│   │   ├── clink.md
│   │   ├── codereview.md
│   │   ├── consensus.md
│   │   ├── debug.md
│   │   ├── docgen.md
│   │   ├── listmodels.md
│   │   ├── planner.md
│   │   ├── precommit.md
│   │   ├── refactor.md
│   │   ├── secaudit.md
│   │   ├── testgen.md
│   │   ├── thinkdeep.md
│   │   ├── tracer.md
│   │   └── version.md
│   ├── troubleshooting.md
│   ├── vcr-testing.md
│   └── wsl-setup.md
├── examples
│   ├── claude_config_macos.json
│   └── claude_config_wsl.json
├── LICENSE
├── providers
│   ├── __init__.py
│   ├── azure_openai.py
│   ├── base.py
│   ├── custom.py
│   ├── dial.py
│   ├── gemini.py
│   ├── openai_compatible.py
│   ├── openai.py
│   ├── openrouter.py
│   ├── registries
│   │   ├── __init__.py
│   │   ├── azure.py
│   │   ├── base.py
│   │   ├── custom.py
│   │   ├── dial.py
│   │   ├── gemini.py
│   │   ├── openai.py
│   │   ├── openrouter.py
│   │   └── xai.py
│   ├── registry_provider_mixin.py
│   ├── registry.py
│   ├── shared
│   │   ├── __init__.py
│   │   ├── model_capabilities.py
│   │   ├── model_response.py
│   │   ├── provider_type.py
│   │   └── temperature.py
│   └── xai.py
├── pyproject.toml
├── pytest.ini
├── README.md
├── requirements-dev.txt
├── requirements.txt
├── run_integration_tests.ps1
├── run_integration_tests.sh
├── run-server.ps1
├── run-server.sh
├── scripts
│   └── sync_version.py
├── server.py
├── simulator_tests
│   ├── __init__.py
│   ├── base_test.py
│   ├── conversation_base_test.py
│   ├── log_utils.py
│   ├── test_analyze_validation.py
│   ├── test_basic_conversation.py
│   ├── test_chat_simple_validation.py
│   ├── test_codereview_validation.py
│   ├── test_consensus_conversation.py
│   ├── test_consensus_three_models.py
│   ├── test_consensus_workflow_accurate.py
│   ├── test_content_validation.py
│   ├── test_conversation_chain_validation.py
│   ├── test_cross_tool_comprehensive.py
│   ├── test_cross_tool_continuation.py
│   ├── test_debug_certain_confidence.py
│   ├── test_debug_validation.py
│   ├── test_line_number_validation.py
│   ├── test_logs_validation.py
│   ├── test_model_thinking_config.py
│   ├── test_o3_model_selection.py
│   ├── test_o3_pro_expensive.py
│   ├── test_ollama_custom_url.py
│   ├── test_openrouter_fallback.py
│   ├── test_openrouter_models.py
│   ├── test_per_tool_deduplication.py
│   ├── test_planner_continuation_history.py
│   ├── test_planner_validation_old.py
│   ├── test_planner_validation.py
│   ├── test_precommitworkflow_validation.py
│   ├── test_prompt_size_limit_bug.py
│   ├── test_refactor_validation.py
│   ├── test_secaudit_validation.py
│   ├── test_testgen_validation.py
│   ├── test_thinkdeep_validation.py
│   ├── test_token_allocation_validation.py
│   ├── test_vision_capability.py
│   └── test_xai_models.py
├── systemprompts
│   ├── __init__.py
│   ├── analyze_prompt.py
│   ├── chat_prompt.py
│   ├── clink
│   │   ├── codex_codereviewer.txt
│   │   ├── default_codereviewer.txt
│   │   ├── default_planner.txt
│   │   └── default.txt
│   ├── codereview_prompt.py
│   ├── consensus_prompt.py
│   ├── debug_prompt.py
│   ├── docgen_prompt.py
│   ├── generate_code_prompt.py
│   ├── planner_prompt.py
│   ├── precommit_prompt.py
│   ├── refactor_prompt.py
│   ├── secaudit_prompt.py
│   ├── testgen_prompt.py
│   ├── thinkdeep_prompt.py
│   └── tracer_prompt.py
├── tests
│   ├── __init__.py
│   ├── CASSETTE_MAINTENANCE.md
│   ├── conftest.py
│   ├── gemini_cassettes
│   │   ├── chat_codegen
│   │   │   └── gemini25_pro_calculator
│   │   │       └── mldev.json
│   │   ├── chat_cross
│   │   │   └── step1_gemini25_flash_number
│   │   │       └── mldev.json
│   │   └── consensus
│   │       └── step2_gemini25_flash_against
│   │           └── mldev.json
│   ├── http_transport_recorder.py
│   ├── mock_helpers.py
│   ├── openai_cassettes
│   │   ├── chat_cross_step2_gpt5_reminder.json
│   │   ├── chat_gpt5_continuation.json
│   │   ├── chat_gpt5_moon_distance.json
│   │   ├── consensus_step1_gpt5_for.json
│   │   └── o3_pro_basic_math.json
│   ├── pii_sanitizer.py
│   ├── sanitize_cassettes.py
│   ├── test_alias_target_restrictions.py
│   ├── test_auto_mode_comprehensive.py
│   ├── test_auto_mode_custom_provider_only.py
│   ├── test_auto_mode_model_listing.py
│   ├── test_auto_mode_provider_selection.py
│   ├── test_auto_mode.py
│   ├── test_auto_model_planner_fix.py
│   ├── test_azure_openai_provider.py
│   ├── test_buggy_behavior_prevention.py
│   ├── test_cassette_semantic_matching.py
│   ├── test_challenge.py
│   ├── test_chat_codegen_integration.py
│   ├── test_chat_cross_model_continuation.py
│   ├── test_chat_openai_integration.py
│   ├── test_chat_simple.py
│   ├── test_clink_claude_agent.py
│   ├── test_clink_claude_parser.py
│   ├── test_clink_codex_agent.py
│   ├── test_clink_gemini_agent.py
│   ├── test_clink_gemini_parser.py
│   ├── test_clink_integration.py
│   ├── test_clink_parsers.py
│   ├── test_clink_tool.py
│   ├── test_collaboration.py
│   ├── test_config.py
│   ├── test_consensus_integration.py
│   ├── test_consensus_schema.py
│   ├── test_consensus.py
│   ├── test_conversation_continuation_integration.py
│   ├── test_conversation_field_mapping.py
│   ├── test_conversation_file_features.py
│   ├── test_conversation_memory.py
│   ├── test_conversation_missing_files.py
│   ├── test_custom_openai_temperature_fix.py
│   ├── test_custom_provider.py
│   ├── test_debug.py
│   ├── test_deploy_scripts.py
│   ├── test_dial_provider.py
│   ├── test_directory_expansion_tracking.py
│   ├── test_disabled_tools.py
│   ├── test_docker_claude_desktop_integration.py
│   ├── test_docker_config_complete.py
│   ├── test_docker_healthcheck.py
│   ├── test_docker_implementation.py
│   ├── test_docker_mcp_validation.py
│   ├── test_docker_security.py
│   ├── test_docker_volume_persistence.py
│   ├── test_file_protection.py
│   ├── test_gemini_token_usage.py
│   ├── test_image_support_integration.py
│   ├── test_image_validation.py
│   ├── test_integration_utf8.py
│   ├── test_intelligent_fallback.py
│   ├── test_issue_245_simple.py
│   ├── test_large_prompt_handling.py
│   ├── test_line_numbers_integration.py
│   ├── test_listmodels_restrictions.py
│   ├── test_listmodels.py
│   ├── test_mcp_error_handling.py
│   ├── test_model_enumeration.py
│   ├── test_model_metadata_continuation.py
│   ├── test_model_resolution_bug.py
│   ├── test_model_restrictions.py
│   ├── test_o3_pro_output_text_fix.py
│   ├── test_o3_temperature_fix_simple.py
│   ├── test_openai_compatible_token_usage.py
│   ├── test_openai_provider.py
│   ├── test_openrouter_provider.py
│   ├── test_openrouter_registry.py
│   ├── test_parse_model_option.py
│   ├── test_per_tool_model_defaults.py
│   ├── test_pii_sanitizer.py
│   ├── test_pip_detection_fix.py
│   ├── test_planner.py
│   ├── test_precommit_workflow.py
│   ├── test_prompt_regression.py
│   ├── test_prompt_size_limit_bug_fix.py
│   ├── test_provider_retry_logic.py
│   ├── test_provider_routing_bugs.py
│   ├── test_provider_utf8.py
│   ├── test_providers.py
│   ├── test_rate_limit_patterns.py
│   ├── test_refactor.py
│   ├── test_secaudit.py
│   ├── test_server.py
│   ├── test_supported_models_aliases.py
│   ├── test_thinking_modes.py
│   ├── test_tools.py
│   ├── test_tracer.py
│   ├── test_utf8_localization.py
│   ├── test_utils.py
│   ├── test_uvx_resource_packaging.py
│   ├── test_uvx_support.py
│   ├── test_workflow_file_embedding.py
│   ├── test_workflow_metadata.py
│   ├── test_workflow_prompt_size_validation_simple.py
│   ├── test_workflow_utf8.py
│   ├── test_xai_provider.py
│   ├── transport_helpers.py
│   └── triangle.png
├── tools
│   ├── __init__.py
│   ├── analyze.py
│   ├── apilookup.py
│   ├── challenge.py
│   ├── chat.py
│   ├── clink.py
│   ├── codereview.py
│   ├── consensus.py
│   ├── debug.py
│   ├── docgen.py
│   ├── listmodels.py
│   ├── models.py
│   ├── planner.py
│   ├── precommit.py
│   ├── refactor.py
│   ├── secaudit.py
│   ├── shared
│   │   ├── __init__.py
│   │   ├── base_models.py
│   │   ├── base_tool.py
│   │   ├── exceptions.py
│   │   └── schema_builders.py
│   ├── simple
│   │   ├── __init__.py
│   │   └── base.py
│   ├── testgen.py
│   ├── thinkdeep.py
│   ├── tracer.py
│   ├── version.py
│   └── workflow
│       ├── __init__.py
│       ├── base.py
│       ├── schema_builders.py
│       └── workflow_mixin.py
├── utils
│   ├── __init__.py
│   ├── client_info.py
│   ├── conversation_memory.py
│   ├── env.py
│   ├── file_types.py
│   ├── file_utils.py
│   ├── image_utils.py
│   ├── model_context.py
│   ├── model_restrictions.py
│   ├── security_config.py
│   ├── storage_backend.py
│   └── token_utils.py
└── zen-mcp-server
```

# Files

--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------

```
 1 | [run]
 2 | source = gemini_server
 3 | omit = 
 4 |     */tests/*
 5 |     */venv/*
 6 |     */__pycache__/*
 7 |     */site-packages/*
 8 | 
 9 | [report]
10 | exclude_lines =
11 |     pragma: no cover
12 |     def __repr__
13 |     if self.debug:
14 |     if settings.DEBUG
15 |     raise AssertionError
16 |     raise NotImplementedError
17 |     if 0:
18 |     if __name__ == .__main__.:
19 |     if TYPE_CHECKING:
20 |     class .*\bProtocol\):
21 |     @(abc\.)?abstractmethod
22 | 
23 | [html]
24 | directory = htmlcov
```

--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------

```
 1 | # Ensure shell scripts always have LF line endings on checkout
 2 | *.sh text eol=lf
 3 | *.bash text eol=lf
 4 | 
 5 | # Python files
 6 | *.py text eol=lf
 7 | 
 8 | # Shell script without extension
 9 | run-server text eol=lf
10 | code_quality_checks text eol=lf
11 | run_integration_tests text eol=lf
12 | 
13 | # General text files
14 | *.md text
15 | *.txt text
16 | *.yml text
17 | *.yaml text
18 | *.json text
19 | *.xml text
20 | 
21 | # Binary files
22 | *.png binary
23 | *.jpg binary
24 | *.jpeg binary
25 | *.gif binary
26 | *.ico binary
27 | *.pdf binary
```

--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------

```
 1 | # Git
 2 | .git
 3 | .gitignore
 4 | 
 5 | # Python
 6 | __pycache__/
 7 | *.py[cod]
 8 | *$py.class
 9 | *.so
10 | .Python
11 | env/
12 | venv/
13 | .venv/
14 | .zen_venv/
15 | ENV/
16 | env.bak/
17 | venv.bak/
18 | 
19 | # IDE
20 | .vscode/
21 | .idea/
22 | *.swp
23 | *.swo
24 | 
25 | # OS
26 | .DS_Store
27 | Thumbs.db
28 | 
29 | # Logs
30 | logs/*.log*
31 | *.log
32 | 
33 | # Docker
34 | Dockerfile*
35 | docker-compose*
36 | .dockerignore
37 | 
38 | # Documentation
39 | docs/
40 | README.md
41 | *.md
42 | 
43 | # Tests
44 | tests/
45 | simulator_tests/
46 | test_simulation_files/
47 | pytest.ini
48 | 
49 | # Development
50 | .env
51 | .env.local
52 | examples/
53 | code_quality_checks.sh
54 | run_integration_tests.sh
55 | 
56 | # Security - Sensitive files
57 | *.key
58 | *.pem
59 | *.p12
60 | *.pfx
61 | *.crt
62 | *.csr
63 | secrets/
64 | private/
65 | 
```

--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------

```yaml
 1 | ---
 2 | default_stages: [pre-commit, pre-push]
 3 | repos:
 4 |   - repo: https://github.com/psf/black
 5 |     rev: 25.1.0
 6 |     hooks:
 7 |       - id: black
 8 | 
 9 |   - repo: https://github.com/pycqa/isort
10 |     rev: 6.0.1
11 |     hooks:
12 |       - id: isort
13 |         args: ["--profile", "black"]
14 | 
15 |   - repo: https://github.com/astral-sh/ruff-pre-commit
16 |     rev: v0.12.8
17 |     hooks:
18 |       - id: ruff
19 |         args: [--fix]
20 | 
21 | # Configuration for specific tools
22 | default_language_version:
23 |   python: python3
24 | 
25 | # Exclude patterns
26 | exclude: |
27 |   (?x)^(
28 |     \.git/|
29 |     \.venv/|
30 |     venv/|
31 |     \.zen_venv/|
32 |     __pycache__/|
33 |     \.pytest_cache/|
34 |     logs/|
35 |     dist/|
36 |     build/|
37 |     test_simulation_files/
38 |   )
39 | 
```

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .nox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | *.py,cover
 48 | .hypothesis/
 49 | .pytest_cache/
 50 | cover/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | db.sqlite3
 60 | db.sqlite3-journal
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | .pybuilder/
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # IPython
 80 | profile_default/
 81 | ipython_config.py
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # pipenv
 87 | Pipfile.lock
 88 | 
 89 | # poetry
 90 | poetry.lock
 91 | 
 92 | # pdm
 93 | .pdm.toml
 94 | .pdm-python
 95 | pdm.lock
 96 | 
 97 | # PEP 582
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .env~
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 
135 | # pytype static type analyzer
136 | .pytype/
137 | 
138 | # Cython debug symbols
139 | cython_debug/
140 | 
141 | # PyCharm
142 | .idea/
143 | 
144 | # VS Code
145 | .vscode/
146 | 
147 | # macOS
148 | .DS_Store
149 | 
150 | # API Keys and secrets
151 | *.key
152 | *.pem
153 | .env.local
154 | .env.*.local
155 | 
156 | # Test outputs
157 | test_output/
158 | *.test.log
159 | .coverage
160 | htmlcov/
161 | coverage.xml
162 | .pytest_cache/
163 | 
164 | # Test simulation artifacts (dynamically created during testing)
165 | test_simulation_files/.claude/
166 | 
167 | # Temporary test directories
168 | test-setup/
169 | 
170 | # Scratch feature documentation files
171 | FEATURE_*.md
172 | # Temporary files
173 | /tmp/
174 | 
175 | # Local user instructions
176 | CLAUDE.local.md
177 | 
178 | # Claude Code personal settings
179 | .claude/settings.local.json
180 | 
181 | # Standalone mode files
182 | .zen_venv/
183 | .docker_cleaned
184 | logs/
185 | *.backup
186 | /.desktop_configured
187 | 
188 | /worktrees/
189 | test_simulation_files/
190 | .mcp.json
191 | 
```

--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------

```
  1 | # Zen MCP Server Environment Configuration
  2 | # Copy this file to .env and fill in your values
  3 | 
  4 | # API Keys - At least one is required
  5 | #
  6 | # IMPORTANT: Choose ONE approach:
  7 | # - Native APIs (Gemini/OpenAI/XAI) for direct access
  8 | # - DIAL for unified enterprise access
  9 | # - OpenRouter for unified cloud access
 10 | # Having multiple unified providers creates ambiguity about which serves each model.
 11 | #
 12 | # Option 1: Use native APIs (recommended for direct access)
 13 | # Get your Gemini API key from: https://makersuite.google.com/app/apikey
 14 | GEMINI_API_KEY=your_gemini_api_key_here
 15 | # GEMINI_BASE_URL=                            # Optional: Custom Gemini endpoint (defaults to Google's API)
 16 | 
 17 | # Get your OpenAI API key from: https://platform.openai.com/api-keys
 18 | OPENAI_API_KEY=your_openai_api_key_here
 19 | 
 20 | # Azure OpenAI mirrors OpenAI models through Azure-hosted deployments
 21 | # Set the endpoint from Azure Portal. Models are defined in conf/azure_models.json
 22 | # (or the file referenced by AZURE_MODELS_CONFIG_PATH).
 23 | AZURE_OPENAI_API_KEY=your_azure_openai_key_here
 24 | AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
 25 | # AZURE_OPENAI_API_VERSION=2024-02-15-preview
 26 | # AZURE_OPENAI_ALLOWED_MODELS=gpt-4o,gpt-4o-mini
 27 | # AZURE_MODELS_CONFIG_PATH=/absolute/path/to/custom_azure_models.json
 28 | 
 29 | # Get your X.AI API key from: https://console.x.ai/
 30 | XAI_API_KEY=your_xai_api_key_here
 31 | 
 32 | # Get your DIAL API key and configure host URL
 33 | # DIAL provides unified access to multiple AI models through a single API
 34 | DIAL_API_KEY=your_dial_api_key_here
 35 | # DIAL_API_HOST=https://core.dialx.ai        # Optional: Base URL without /openai suffix (auto-appended)
 36 | # DIAL_API_VERSION=2025-01-01-preview        # Optional: API version header for DIAL requests
 37 | 
 38 | # Option 2: Use OpenRouter for access to multiple models through one API
 39 | # Get your OpenRouter API key from: https://openrouter.ai/
 40 | # If using OpenRouter, comment out the native API keys above
 41 | OPENROUTER_API_KEY=your_openrouter_api_key_here
 42 | 
 43 | # Option 3: Use custom API endpoints for local models (Ollama, vLLM, LM Studio, etc.)
 44 | # CUSTOM_API_URL=http://localhost:11434/v1  # Ollama example
 45 | # CUSTOM_API_KEY=                                      # Empty for Ollama (no auth needed)
 46 | # CUSTOM_MODEL_NAME=llama3.2                          # Default model name
 47 | 
 48 | # Optional: HTTP timeout tuning for OpenAI-compatible endpoints (OpenRouter/custom/local)
 49 | # Values are seconds; defaults are 45s connect / 900s read/write/pool for remote URLs
 50 | # and 60s/1800s when pointing at localhost. Raise these if long-running models time out.
 51 | # CUSTOM_CONNECT_TIMEOUT=45.0
 52 | # CUSTOM_READ_TIMEOUT=900.0
 53 | # CUSTOM_WRITE_TIMEOUT=900.0
 54 | # CUSTOM_POOL_TIMEOUT=900.0
 55 | 
 56 | # Optional: Default model to use
 57 | # Options: 'auto' (Claude picks best model), 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', 'o4-mini-high',
 58 | #          'gpt-5', 'gpt-5-mini', 'grok', 'opus-4.1', 'sonnet-4.1', or any DIAL model if DIAL is configured
 59 | # When set to 'auto', Claude will select the best model for each task
 60 | # Defaults to 'auto' if not specified
 61 | DEFAULT_MODEL=auto
 62 | 
 63 | # Optional: Default thinking mode for ThinkDeep tool
 64 | # NOTE: Only applies to models that support extended thinking (e.g., Gemini 2.5 Pro, GPT-5 models)
 65 | #       Flash models (2.0) will use system prompt engineering instead
 66 | # Token consumption per mode:
 67 | #   minimal: 128 tokens   - Quick analysis, fastest response
 68 | #   low:     2,048 tokens - Light reasoning tasks  
 69 | #   medium:  8,192 tokens - Balanced reasoning (good for most cases)
 70 | #   high:    16,384 tokens - Complex analysis (recommended for thinkdeep)
 71 | #   max:     32,768 tokens - Maximum reasoning depth, slowest but most thorough
 72 | # Defaults to 'high' if not specified
 73 | DEFAULT_THINKING_MODE_THINKDEEP=high
 74 | 
 75 | # Optional: Model usage restrictions
 76 | # Limit which models can be used from each provider for cost control, compliance, or standardization
 77 | # Format: Comma-separated list of allowed model names (case-insensitive, whitespace tolerant)
 78 | # Empty or unset = all models allowed (default behavior)
 79 | # If you want to disable a provider entirely, don't set its API key
 80 | #
 81 | # Supported OpenAI models:
 82 | #   - o3               (200K context, high reasoning)
 83 | #   - o3-mini          (200K context, balanced)
 84 | #   - o4-mini          (200K context, latest balanced, temperature=1.0 only)
 85 | #   - o4-mini-high     (200K context, enhanced reasoning, temperature=1.0 only)
 86 | #   - gpt-5            (400K context, 128K output, reasoning tokens)
 87 | #   - gpt-5-mini       (400K context, 128K output, reasoning tokens)
 88 | #   - mini             (shorthand for o4-mini)
 89 | #
 90 | # Supported Google/Gemini models:
 91 | #   - gemini-2.5-flash   (1M context, fast, supports thinking)
 92 | #   - gemini-2.5-pro     (1M context, powerful, supports thinking)
 93 | #   - flash                             (shorthand for gemini-2.5-flash)
 94 | #   - pro                               (shorthand for gemini-2.5-pro)
 95 | #
 96 | # Supported X.AI GROK models:
 97 | #   - grok-3          (131K context, advanced reasoning)
 98 | #   - grok-3-fast     (131K context, higher performance but more expensive)
 99 | #   - grok            (shorthand for grok-3)
100 | #   - grok3           (shorthand for grok-3)
101 | #   - grokfast        (shorthand for grok-3-fast)
102 | #
103 | # Supported DIAL models (when available in your DIAL deployment):
104 | #   - o3-2025-04-16   (200K context, latest O3 release)
105 | #   - o4-mini-2025-04-16 (200K context, latest O4 mini)
106 | #   - o3              (shorthand for o3-2025-04-16)
107 | #   - o4-mini         (shorthand for o4-mini-2025-04-16)
108 | #   - anthropic.claude-sonnet-4.1-20250805-v1:0 (200K context, Claude 4.1 Sonnet)
109 | #   - anthropic.claude-sonnet-4.1-20250805-v1:0-with-thinking (200K context, Claude 4.1 Sonnet with thinking mode)
110 | #   - anthropic.claude-opus-4.1-20250805-v1:0 (200K context, Claude 4.1 Opus)
111 | #   - anthropic.claude-opus-4.1-20250805-v1:0-with-thinking (200K context, Claude 4.1 Opus with thinking mode)
112 | #   - sonnet-4.1        (shorthand for Claude 4.1 Sonnet)
113 | #   - sonnet-4.1-thinking (shorthand for Claude 4.1 Sonnet with thinking)
114 | #   - opus-4.1          (shorthand for Claude 4.1 Opus)
115 | #   - opus-4.1-thinking (shorthand for Claude 4.1 Opus with thinking)
116 | #   - gemini-2.5-pro-preview-03-25-google-search (1M context, with Google Search)
117 | #   - gemini-2.5-pro-preview-05-06 (1M context, latest preview)
118 | #   - gemini-2.5-flash-preview-05-20 (1M context, latest flash preview)
119 | #   - gemini-2.5-pro  (shorthand for gemini-2.5-pro-preview-05-06)
120 | #   - gemini-2.5-pro-search (shorthand for gemini-2.5-pro-preview-03-25-google-search)
121 | #   - gemini-2.5-flash (shorthand for gemini-2.5-flash-preview-05-20)
122 | #
123 | # Examples:
124 | #   OPENAI_ALLOWED_MODELS=o3-mini,o4-mini,mini  # Only allow mini models (cost control)
125 | #   GOOGLE_ALLOWED_MODELS=flash                  # Only allow Flash (fast responses)
126 | #   XAI_ALLOWED_MODELS=grok-3                    # Only allow standard GROK (not fast variant)
127 | #   OPENAI_ALLOWED_MODELS=o4-mini                # Single model standardization
128 | #   GOOGLE_ALLOWED_MODELS=flash,pro              # Allow both Gemini models
129 | #   XAI_ALLOWED_MODELS=grok,grok-3-fast          # Allow both GROK variants
130 | #   DIAL_ALLOWED_MODELS=o3,o4-mini                       # Only allow O3/O4 models via DIAL
131 | #   DIAL_ALLOWED_MODELS=opus-4.1,sonnet-4.1                  # Only Claude 4.1 models (without thinking)
132 | #   DIAL_ALLOWED_MODELS=opus-4.1-thinking,sonnet-4.1-thinking # Only Claude 4.1 with thinking mode
133 | #   DIAL_ALLOWED_MODELS=gemini-2.5-pro,gemini-2.5-flash  # Only Gemini 2.5 models via DIAL
134 | #
135 | # Note: These restrictions apply even in 'auto' mode - Claude will only pick from allowed models
136 | # OPENAI_ALLOWED_MODELS=
137 | # GOOGLE_ALLOWED_MODELS=
138 | # XAI_ALLOWED_MODELS=
139 | # DIAL_ALLOWED_MODELS=
140 | 
141 | # Optional: Custom model configuration file path
142 | # Override the default location of custom_models.json
143 | # CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json
144 | 
145 | # Note: Conversations are stored in memory during the session
146 | 
147 | # Optional: Conversation timeout (hours)
148 | # How long AI-to-AI conversation threads persist before expiring
149 | # Longer timeouts use more memory but allow resuming conversations later
150 | # Defaults to 24 hours if not specified
151 | CONVERSATION_TIMEOUT_HOURS=24
152 | 
153 | # Optional: Max conversation turns
154 | # Maximum number of turns allowed in an AI-to-AI conversation thread
155 | # Each exchange (Claude asks, Gemini responds) counts as 2 turns
156 | # So 20 turns = 10 exchanges. Defaults to 40 if not specified
157 | MAX_CONVERSATION_TURNS=40
158 | 
159 | # Optional: Logging level (DEBUG, INFO, WARNING, ERROR)
160 | # DEBUG: Shows detailed operational messages for troubleshooting (default)
161 | # INFO: Shows general operational messages
162 | # WARNING: Shows only warnings and errors
163 | # ERROR: Shows only errors
164 | LOG_LEVEL=DEBUG
165 | 
166 | # Optional: Tool Selection
167 | # Comma-separated list of tools to disable. If not set, all tools are enabled.
168 | # Essential tools (version, listmodels) cannot be disabled.
169 | # Available tools: chat, thinkdeep, planner, consensus, codereview, precommit,
170 | #                  debug, docgen, analyze, refactor, tracer, testgen, challenge, secaudit
171 | # 
172 | # DEFAULT CONFIGURATION: To optimize context window usage, non-essential tools
173 | # are disabled by default. Only the essential tools remain enabled:
174 | # - chat, thinkdeep, planner, consensus (collaboration tools)
175 | # - codereview, precommit, debug (code quality tools)  
176 | # - challenge (critical thinking utility)
177 | #
178 | # To enable additional tools, remove them from the DISABLED_TOOLS list below.
179 | DISABLED_TOOLS=analyze,refactor,testgen,secaudit,docgen,tracer
180 | 
181 | # Optional: Language/Locale for AI responses
182 | # When set, all AI tools will respond in the specified language
183 | # while maintaining their analytical capabilities
184 | # Examples: "fr-FR", "en-US", "zh-CN", "zh-TW", "ja-JP", "ko-KR", "es-ES"
185 | # Leave empty for default language (English)
186 | # LOCALE=fr-FR
187 | 
188 | # ===========================================
189 | # Zen MCP Server Configuration  
190 | # ===========================================
191 | 
192 | # Force .env file values to override system environment variables
193 | # This prevents issues where different AI tools (Claude Code, etc.) pass 
194 | # conflicting or cached environment variables that override each other
195 | # 
196 | # When enabled (true):
197 | #   - .env file values take absolute precedence
198 | #   - Prevents MCP clients from passing outdated/cached API keys
199 | #   - Ensures consistent configuration across different AI tool integrations
200 | #   - Solves environment variable conflicts between multiple AI applications
201 | #
202 | # When disabled (false):
203 | #   - System environment variables take precedence (standard behavior)  
204 | #   - Suitable for production deployments with secure environment injection
205 | #   - Respects container orchestrator and CI/CD pipeline configurations
206 | #
207 | # Recommended settings:
208 | #   Development with multiple AI tools: true (prevents tool conflicts)
209 | #   Production/Container deployments: false (preserves security practices)
210 | #   CI/CD environments: false (respects pipeline secrets)
211 | ZEN_MCP_FORCE_ENV_OVERRIDE=false
212 | 
213 | # ===========================================
214 | # Docker Configuration
215 | # ===========================================
216 | 
217 | # Container name for Docker Compose
218 | # Used when running with docker-compose.yml
219 | COMPOSE_PROJECT_NAME=zen-mcp
220 | 
221 | # Timezone for Docker containers
222 | # Ensures consistent time handling in containerized environments
223 | TZ=UTC
224 | 
225 | # Maximum log file size (default: 10MB)
226 | # Applicable when using file-based logging
227 | LOG_MAX_SIZE=10MB
228 | 
```

--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Zen MCP Server - Docker Setup
  2 | 
  3 | ## Quick Start
  4 | 
  5 | ### 1. Prerequisites
  6 | 
  7 | - Docker installed (Docker Compose optional)
  8 | - At least one API key (Gemini, OpenAI, xAI, etc.)
  9 | 
 10 | ### 2. Configuration
 11 | 
 12 | ```bash
 13 | # Copy environment template
 14 | cp .env.example .env
 15 | 
 16 | # Edit with your API keys (at least one required)
 17 | # Required: GEMINI_API_KEY or OPENAI_API_KEY or XAI_API_KEY
 18 | nano .env
 19 | ```
 20 | 
 21 | ### 3. Build Image
 22 | 
 23 | ```bash
 24 | # Build the Docker image
 25 | docker build -t zen-mcp-server:latest .
 26 | 
 27 | # Or use the build script (Bash)
 28 | chmod +x docker/scripts/build.sh
 29 | ./docker/scripts/build.sh
 30 | 
 31 | # Build with PowerShell
 32 | docker/scripts/build.ps1
 33 | 
 34 | ```
 35 | 
 36 | ### 4. Usage Options
 37 | 
 38 | #### A. Direct Docker Run (Recommended for MCP)
 39 | 
 40 | ```bash
 41 | # Run with environment file
 42 | docker run --rm -i --env-file .env \
 43 |   -v $(pwd)/logs:/app/logs \
 44 |   zen-mcp-server:latest
 45 | 
 46 | # Run with inline environment variables
 47 | docker run --rm -i \
 48 |   -e GEMINI_API_KEY="your_key_here" \
 49 |   -e LOG_LEVEL=INFO \
 50 |   -v $(pwd)/logs:/app/logs \
 51 |   zen-mcp-server:latest
 52 | ```
 53 | 
 54 | #### B. Docker Compose (For Development/Monitoring)
 55 | 
 56 | ```bash
 57 | # Deploy with Docker Compose
 58 | chmod +x docker/scripts/deploy.sh
 59 | ./docker/scripts/deploy.sh
 60 | 
 61 | # Or use PowerShell script
 62 | docker/scripts/deploy.ps1
 63 | 
 64 | # Interactive stdio mode
 65 | docker-compose exec zen-mcp python server.py
 66 | ```
 67 | 
 68 | ## Service Management
 69 | 
 70 | ### Docker Commands
 71 | 
 72 | ```bash
 73 | # View running containers
 74 | docker ps
 75 | 
 76 | # View logs from container
 77 | docker logs <container_id>
 78 | 
 79 | # Stop all zen-mcp containers
 80 | docker stop $(docker ps -q --filter "ancestor=zen-mcp-server:latest")
 81 | 
 82 | # Remove old containers and images
 83 | docker container prune
 84 | docker image prune
 85 | ```
 86 | 
 87 | ### Docker Compose Management (Optional)
 88 | 
 89 | ```bash
 90 | # View logs
 91 | docker-compose logs -f zen-mcp
 92 | 
 93 | # Check status
 94 | docker-compose ps
 95 | 
 96 | # Restart service
 97 | docker-compose restart zen-mcp
 98 | 
 99 | # Stop services
100 | docker-compose down
101 | 
102 | # Rebuild and update
103 | docker-compose build --no-cache zen-mcp
104 | docker-compose up -d zen-mcp
105 | ```
106 | 
107 | ## Health Monitoring
108 | 
109 | The container includes health checks that verify:
110 | - Server process is running
111 | - Python modules can be imported
112 | - Log directory is writable  
113 | - API keys are configured
114 | 
115 | ## Volumes and Persistent Data
116 | 
117 | The Docker setup includes persistent volumes to preserve data between container runs:
118 | 
119 | - **`./logs:/app/logs`** - Persistent log storage (local folder mount)
120 | - **`zen-mcp-config:/app/conf`** - Configuration persistence (named Docker volume)
121 | - **`/etc/localtime:/etc/localtime:ro`** - Host timezone synchronization (read-only)
122 | 
123 | ### How Persistent Volumes Work
124 | 
125 | The `zen-mcp` service (used by `zen-docker-compose` and Docker Compose commands) mounts the named volume `zen-mcp-config` persistently. All data placed in `/app/conf` inside the container is preserved between runs thanks to this Docker volume.
126 | 
127 | In the `docker-compose.yml` file, you will find:
128 | 
129 | ```yaml
130 | volumes:
131 |   - ./logs:/app/logs
132 |   - zen-mcp-config:/app/conf
133 |   - /etc/localtime:/etc/localtime:ro
134 | ```
135 | 
136 | and the named volume definition:
137 | 
138 | ```yaml
139 | volumes:
140 |   zen-mcp-config:
141 |     driver: local
142 | ```
143 | 
144 | ## Security
145 | 
146 | - Runs as non-root user `zenuser`
147 | - Read-only filesystem with tmpfs for temporary files
148 | - No network ports exposed (stdio communication only)
149 | - Secrets managed via environment variables
150 | 
151 | ## Troubleshooting
152 | 
153 | ### Container won't start
154 | 
155 | ```bash
156 | # Check if image exists
157 | docker images zen-mcp-server
158 | 
159 | # Test container interactively
160 | docker run --rm -it --env-file .env zen-mcp-server:latest bash
161 | 
162 | # Check environment variables
163 | docker run --rm --env-file .env zen-mcp-server:latest env | grep API
164 | 
165 | # Test with minimal configuration
166 | docker run --rm -i -e GEMINI_API_KEY="test" zen-mcp-server:latest python server.py
167 | ```
168 | 
169 | ### MCP Connection Issues
170 | 
171 | ```bash
172 | # Test Docker connectivity
173 | docker run --rm hello-world
174 | 
175 | # Verify container stdio
176 | echo '{"jsonrpc": "2.0", "method": "ping"}' | docker run --rm -i --env-file .env zen-mcp-server:latest python server.py
177 | 
178 | # Check Claude Desktop logs for connection errors
179 | ```
180 | 
181 | ### API Key Problems
182 | 
183 | ```bash
184 | # Verify API keys are loaded
185 | docker run --rm --env-file .env zen-mcp-server:latest python -c "import os; print('GEMINI_API_KEY:', bool(os.getenv('GEMINI_API_KEY')))"
186 | 
187 | # Test API connectivity
188 | docker run --rm --env-file .env zen-mcp-server:latest python /usr/local/bin/healthcheck.py
189 | ```
190 | 
191 | ### Permission Issues
192 | 
193 | ```bash
194 | # Fix log directory permissions (Linux/macOS)
195 | sudo chown -R $USER:$USER logs/
196 | chmod 755 logs/
197 | 
198 | # Windows: Run Docker Desktop as Administrator if needed
199 | ```
200 | 
201 | ### Memory/Performance Issues
202 | 
203 | ```bash
204 | # Check container resource usage
205 | docker stats
206 | 
207 | # Run with memory limits
208 | docker run --rm -i --memory="512m" --env-file .env zen-mcp-server:latest
209 | 
210 | # Monitor Docker logs
211 | docker run --rm -i --env-file .env zen-mcp-server:latest 2>&1 | tee docker.log
212 | ```
213 | 
214 | ## MCP Integration (Claude Desktop)
215 | 
216 | ### Recommended Configuration (docker run)
217 | 
218 | ```json
219 | {
220 |   "servers": {
221 |     "zen-docker": {
222 |       "command": "docker",
223 |       "args": [
224 |         "run",
225 |         "--rm",
226 |         "-i",
227 |         "--env-file",
228 |         "/absolute/path/to/zen-mcp-server/.env",
229 |         "-v",
230 |         "/absolute/path/to/zen-mcp-server/logs:/app/logs",
231 |         "zen-mcp-server:latest"
232 |       ]
233 |     }
234 |   }
235 | }
236 | ```
237 | 
238 | ### Windows Example
239 | 
240 | ```json
241 | {
242 |   "servers": {
243 |     "zen-docker": {
244 |       "command": "docker",
245 |       "args": [
246 |         "run",
247 |         "--rm",
248 |         "-i",
249 |         "--env-file",
250 |         "C:/Users/YourName/path/to/zen-mcp-server/.env",
251 |         "-v",
252 |         "C:/Users/YourName/path/to/zen-mcp-server/logs:/app/logs",
253 |         "zen-mcp-server:latest"
254 |       ]
255 |     }
256 |   }
257 | }
258 | ```
259 | 
260 | ### Advanced Option: docker-compose run (uses compose configuration)
261 | 
262 | ```json
263 | {
264 |   "servers": {
265 |     "zen-docker": {
266 |       "command": "docker-compose",
267 |       "args": [
268 |         "-f",
269 |         "/absolute/path/to/zen-mcp-server/docker-compose.yml",
270 |         "run",
271 |         "--rm",
272 |         "zen-mcp"
273 |       ]
274 |     }
275 |   }
276 | }
277 | ```
278 | 
279 | ### Environment File Template
280 | 
281 | Create a `.env` file with at least one API key:
282 | 
283 | ```bash
284 | # Required: At least one API key
285 | GEMINI_API_KEY=your_gemini_key_here
286 | OPENAI_API_KEY=your_openai_key_here
287 | 
288 | # Optional configuration
289 | LOG_LEVEL=INFO
290 | DEFAULT_MODEL=auto
291 | DEFAULT_THINKING_MODE_THINKDEEP=high
292 | 
293 | # Optional API keys (leave empty if not used)
294 | ANTHROPIC_API_KEY=
295 | XAI_API_KEY=
296 | DIAL_API_KEY=
297 | OPENROUTER_API_KEY=
298 | CUSTOM_API_URL=
299 | ```
300 | 
301 | ## Quick Test & Validation
302 | 
303 | ### 1. Test Docker Image
304 | 
305 | ```bash
306 | # Test container starts correctly
307 | docker run --rm zen-mcp-server:latest python --version
308 | 
309 | # Test health check
310 | docker run --rm -e GEMINI_API_KEY="test" zen-mcp-server:latest python /usr/local/bin/healthcheck.py
311 | ```
312 | 
313 | ### 2. Test MCP Protocol
314 | 
315 | ```bash
316 | # Test basic MCP communication
317 | echo '{"jsonrpc": "2.0", "method": "initialize", "params": {}}' | \
318 |   docker run --rm -i --env-file .env zen-mcp-server:latest python server.py
319 | ```
320 | 
321 | ### 3. Validate Configuration
322 | 
323 | ```bash
324 | # Run validation script
325 | python test_mcp_config.py
326 | 
327 | # Or validate JSON manually
328 | python -m json.tool .vscode/mcp.json
329 | ```
330 | 
331 | ## Available Tools
332 | 
333 | The Zen MCP Server provides these tools when properly configured:
334 | 
335 | - **chat** - General AI conversation and collaboration
336 | - **thinkdeep** - Multi-stage investigation and reasoning  
337 | - **planner** - Interactive sequential planning
338 | - **consensus** - Multi-model consensus workflow
339 | - **codereview** - Comprehensive code review
340 | - **debug** - Root cause analysis and debugging
341 | - **analyze** - Code analysis and assessment
342 | - **refactor** - Refactoring analysis and suggestions
343 | - **secaudit** - Security audit workflow
344 | - **testgen** - Test generation with edge cases
345 | - **docgen** - Documentation generation
346 | - **tracer** - Code tracing and dependency mapping
347 | - **precommit** - Pre-commit validation workflow
348 | - **listmodels** - Available AI models information
349 | - **version** - Server version and configuration
350 | 
351 | ## Performance Notes
352 | 
353 | - **Image size**: ~293MB optimized multi-stage build
354 | - **Memory usage**: ~256MB base + model overhead
355 | - **Startup time**: ~2-3 seconds for container initialization
356 | - **API response**: Varies by model and complexity (1-30 seconds)
357 | 
358 | For production use, consider:
359 | - Using specific API keys for rate limiting
360 | - Monitoring container resource usage
361 | - Setting up log rotation for persistent logs
362 | - Using Docker health checks for reliability
363 | 
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Zen MCP: Many Workflows. One Context.
  2 | 
  3 | <div align="center">
  4 | 
  5 |   [Zen in action](https://github.com/user-attachments/assets/0d26061e-5f21-4ab1-b7d0-f883ddc2c3da)
  6 | 
  7 | 👉 **[Watch more examples](#-watch-tools-in-action)**
  8 | 
  9 | ### Your CLI + Multiple Models = Your AI Dev Team
 10 | 
 11 | **Use the 🤖 CLI you love:**  
 12 | [Claude Code](https://www.anthropic.com/claude-code) · [Gemini CLI](https://github.com/google-gemini/gemini-cli) · [Codex CLI](https://github.com/openai/codex) · [Qwen Code CLI](https://qwenlm.github.io/qwen-code-docs/) · [Cursor](https://cursor.com) · _and more_
 13 | 
 14 | **With multiple models within a single prompt:**  
 15 | Gemini · OpenAI · Anthropic · Grok · Azure · Ollama · OpenRouter · DIAL · On-Device Model
 16 | 
 17 | </div>
 18 | 
 19 | ---
 20 | 
 21 | ## 🆕 Now with CLI-to-CLI Bridge
 22 | 
 23 | The new **[`clink`](docs/tools/clink.md)** (CLI + Link) tool connects external AI CLIs directly into your workflow:
 24 | 
 25 | - **Connect external CLIs** like [Gemini CLI](https://github.com/google-gemini/gemini-cli), [Codex CLI](https://github.com/openai/codex), and [Claude Code](https://www.anthropic.com/claude-code) directly into your workflow
 26 | - **CLI Subagents** - Launch isolated CLI instances from _within_ your current CLI! Claude Code can spawn Codex subagents, Codex can spawn Gemini CLI subagents, etc. Offload heavy tasks (code reviews, bug hunting) to fresh contexts while your main session's context window remains unpolluted. Each subagent returns only final results.
 27 | - **Context Isolation** - Run separate investigations without polluting your primary workspace
 28 | - **Role Specialization** - Spawn `planner`, `codereviewer`, or custom role agents with specialized system prompts
 29 | - **Full CLI Capabilities** - Web search, file inspection, MCP tool access, latest documentation lookups
 30 | - **Seamless Continuity** - Sub-CLIs participate as first-class members with full conversation context between tools
 31 | 
 32 | ```bash
 33 | # Codex spawns Codex subagent for isolated code review in fresh context
 34 | clink with codex codereviewer to audit auth module for security issues
 35 | # Subagent reviews in isolation, returns final report without cluttering your context as codex reads each file and walks the directory structure
 36 | 
 37 | # Consensus from different AI models → Implementation handoff with full context preservation between tools
 38 | Use consensus with gpt-5 and gemini-pro to decide: dark mode or offline support next
 39 | Continue with clink gemini - implement the recommended feature
 40 | # Gemini receives full debate context and starts coding immediately
 41 | ```
 42 | 
 43 | 👉 **[Learn more about clink](docs/tools/clink.md)**
 44 | 
 45 | ---
 46 | 
 47 | ## Why Zen MCP?
 48 | 
 49 | **Why rely on one AI model when you can orchestrate them all?**
 50 | 
 51 | A Model Context Protocol server that supercharges tools like [Claude Code](https://www.anthropic.com/claude-code), [Codex CLI](https://developers.openai.com/codex/cli), and IDE clients such
 52 | as [Cursor](https://cursor.com) or the [Claude Dev VS Code extension](https://marketplace.visualstudio.com/items?itemName=Anthropic.claude-vscode). **Zen MCP connects your favorite AI tool
 53 | to multiple AI models** for enhanced code analysis, problem-solving, and collaborative development.
 54 | 
 55 | ### True AI Collaboration with Conversation Continuity
 56 | 
 57 | Zen supports **conversation threading** so your CLI can **discuss ideas with multiple AI models, exchange reasoning, get second opinions, and even run collaborative debates between models** to help you reach deeper insights and better solutions.
 58 | 
 59 | Your CLI always stays in control but gets perspectives from the best AI for each subtask. Context carries forward seamlessly across tools and models, enabling complex workflows like: code reviews with multiple models → automated planning → implementation → pre-commit validation.
 60 | 
 61 | > **You're in control.** Your CLI of choice orchestrates the AI team, but you decide the workflow. Craft powerful prompts that bring in Gemini Pro, GPT 5, Flash, or local offline models exactly when needed.
 62 | 
 63 | <details>
 64 | <summary><b>Reasons to Use Zen MCP</b></summary>
 65 | 
 66 | A typical workflow with Claude Code as an example:
 67 | 
 68 | 1. **Multi-Model Orchestration** - Claude coordinates with Gemini Pro, O3, GPT-5, and 50+ other models to get the best analysis for each task
 69 | 
 70 | 2. **Context Revival Magic** - Even after Claude's context resets, continue conversations seamlessly by having other models "remind" Claude of the discussion
 71 | 
 72 | 3. **Guided Workflows** - Enforces systematic investigation phases that prevent rushed analysis and ensure thorough code examination
 73 | 
 74 | 4. **Extended Context Windows** - Break Claude's limits by delegating to Gemini (1M tokens) or O3 (200K tokens) for massive codebases
 75 | 
 76 | 5. **True Conversation Continuity** - Full context flows across tools and models - Gemini remembers what O3 said 10 steps ago
 77 | 
 78 | 6. **Model-Specific Strengths** - Extended thinking with Gemini Pro, blazing speed with Flash, strong reasoning with O3, privacy with local Ollama
 79 | 
 80 | 7. **Professional Code Reviews** - Multi-pass analysis with severity levels, actionable feedback, and consensus from multiple AI experts
 81 | 
 82 | 8. **Smart Debugging Assistant** - Systematic root cause analysis with hypothesis tracking and confidence levels
 83 | 
 84 | 9. **Automatic Model Selection** - Claude intelligently picks the right model for each subtask (or you can specify)
 85 | 
 86 | 10. **Vision Capabilities** - Analyze screenshots, diagrams, and visual content with vision-enabled models
 87 | 
 88 | 11. **Local Model Support** - Run Llama, Mistral, or other models locally for complete privacy and zero API costs
 89 | 
 90 | 12. **Bypass MCP Token Limits** - Automatically works around MCP's 25K limit for large prompts and responses
 91 | 
 92 | **The Killer Feature:** When Claude's context resets, just ask to "continue with O3" - the other model's response magically revives Claude's understanding without re-ingesting documents!
 93 | 
 94 | #### Example: Multi-Model Code Review Workflow
 95 | 
 96 | 1. `Perform a codereview using gemini pro and o3 and use planner to generate a detailed plan, implement the fixes and do a final precommit check by continuing from the previous codereview`
 97 | 2. This triggers a [`codereview`](docs/tools/codereview.md) workflow where Claude walks the code, looking for all kinds of issues
 98 | 3. After multiple passes, collects relevant code and makes note of issues along the way
 99 | 4. Maintains a `confidence` level between `exploring`, `low`, `medium`, `high` and `certain` to track how confidently it's been able to find and identify issues
100 | 5. Generates a detailed list of critical -> low issues
101 | 6. Shares the relevant files, findings, etc with **Gemini Pro** to perform a deep dive for a second [`codereview`](docs/tools/codereview.md)
102 | 7. Comes back with a response and next does the same with o3, adding to the prompt if a new discovery comes to light
103 | 8. When done, Claude takes in all the feedback and combines a single list of all critical -> low issues, including good patterns in your code. The final list includes new findings or revisions in case Claude misunderstood or missed something crucial and one of the other models pointed this out
104 | 9. It then uses the [`planner`](docs/tools/planner.md) workflow to break the work down into simpler steps if a major refactor is required
105 | 10. Claude then performs the actual work of fixing highlighted issues
106 | 11. When done, Claude returns to Gemini Pro for a [`precommit`](docs/tools/precommit.md) review
107 | 
108 | All within a single conversation thread! Gemini Pro in step 11 _knows_ what was recommended by O3 in step 7! Taking that context
109 | and review into consideration to aid with its final pre-commit review.
110 | 
111 | **Think of it as Claude Code _for_ Claude Code.** This MCP isn't magic. It's just **super-glue**.
112 | 
113 | > **Remember:** Claude stays in full control — but **YOU** call the shots.
114 | > Zen is designed to have Claude engage other models only when needed — and to follow through with meaningful back-and-forth.
115 | > **You're** the one who crafts the powerful prompt that makes Claude bring in Gemini, Flash, O3 — or fly solo.
116 | > You're the guide. The prompter. The puppeteer.
117 | > #### You are the AI - **Actually Intelligent**.
118 | </details>
119 | 
120 | #### Recommended AI Stack
121 | 
122 | <details>
123 | <summary>For Claude Code Users</summary>
124 | 
125 | For best results when using [Claude Code](https://claude.ai/code):  
126 | 
127 | - **Sonnet 4.5** - All agentic work and orchestration
128 | - **Gemini 2.5 Pro** OR **GPT-5-Pro** - Deep thinking, additional code reviews, debugging and validations, pre-commit analysis
129 | </details>
130 | 
131 | <details>
132 | <summary>For Codex Users</summary>
133 | 
134 | For best results when using [Codex CLI](https://developers.openai.com/codex/cli):  
135 | 
136 | - **GPT-5 Codex Medium** - All agentic work and orchestration
137 | - **Gemini 2.5 Pro** OR **GPT-5-Pro** - Deep thinking, additional code reviews, debugging and validations, pre-commit analysis
138 | </details>
139 | 
140 | ## Quick Start (5 minutes)
141 | 
142 | **Prerequisites:** Python 3.10+, Git, [uv installed](https://docs.astral.sh/uv/getting-started/installation/)
143 | 
144 | **1. Get API Keys** (choose one or more):
145 | - **[OpenRouter](https://openrouter.ai/)** - Access multiple models with one API
146 | - **[Gemini](https://makersuite.google.com/app/apikey)** - Google's latest models
147 | - **[OpenAI](https://platform.openai.com/api-keys)** - O3, GPT-5 series
148 | - **[Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/)** - Enterprise deployments of GPT-4o, GPT-4.1, GPT-5 family
149 | - **[X.AI](https://console.x.ai/)** - Grok models
150 | - **[DIAL](https://dialx.ai/)** - Vendor-agnostic model access
151 | - **[Ollama](https://ollama.ai/)** - Local models (free)
152 | 
153 | **2. Install** (choose one):
154 | 
155 | **Option A: Clone and Automatic Setup** (recommended)
156 | ```bash
157 | git clone https://github.com/BeehiveInnovations/zen-mcp-server.git
158 | cd zen-mcp-server
159 | 
160 | # Handles everything: setup, config, API keys from system environment. 
161 | # Auto-configures Claude Desktop, Claude Code, Gemini CLI, Codex CLI, Qwen CLI
162 | # Enable / disable additional settings in .env
163 | ./run-server.sh  
164 | ```
165 | 
166 | **Option B: Instant Setup with [uvx](https://docs.astral.sh/uv/getting-started/installation/)**
167 | ```json
168 | // Add to ~/.claude/settings.json or .mcp.json
169 | // Don't forget to add your API keys under env
170 | {
171 |   "mcpServers": {
172 |     "zen": {
173 |       "command": "bash",
174 |       "args": ["-c", "for p in $(which uvx 2>/dev/null) $HOME/.local/bin/uvx /opt/homebrew/bin/uvx /usr/local/bin/uvx uvx; do [ -x \"$p\" ] && exec \"$p\" --from git+https://github.com/BeehiveInnovations/zen-mcp-server.git zen-mcp-server; done; echo 'uvx not found' >&2; exit 1"],
175 |       "env": {
176 |         "PATH": "/usr/local/bin:/usr/bin:/bin:/opt/homebrew/bin:~/.local/bin",
177 |         "GEMINI_API_KEY": "your-key-here",
178 |         "DISABLED_TOOLS": "analyze,refactor,testgen,secaudit,docgen,tracer",
179 |         "DEFAULT_MODEL": "auto"
180 |       }
181 |     }
182 |   }
183 | }
184 | ```
185 | 
186 | **3. Start Using!**
187 | ```
188 | "Use zen to analyze this code for security issues with gemini pro"
189 | "Debug this error with o3 and then get flash to suggest optimizations"
190 | "Plan the migration strategy with zen, get consensus from multiple models"
191 | "clink with cli_name=\"gemini\" role=\"planner\" to draft a phased rollout plan"
192 | ```
193 | 
194 | 👉 **[Complete Setup Guide](docs/getting-started.md)** with detailed installation, configuration for Gemini / Codex / Qwen, and troubleshooting
195 | 👉 **[Cursor & VS Code Setup](docs/getting-started.md#ide-clients)** for IDE integration instructions
196 | 📺 **[Watch tools in action](#-watch-tools-in-action)** to see real-world examples
197 | 
198 | ## Provider Configuration
199 | 
200 | Zen activates any provider that has credentials in your `.env`. See `.env.example` for deeper customization.
201 | 
202 | ## Core Tools
203 | 
204 | > **Note:** Each tool comes with its own multi-step workflow, parameters, and descriptions that consume valuable context window space even when not in use. To optimize performance, some tools are disabled by default. See [Tool Configuration](#tool-configuration) below to enable them.
205 | 
206 | **Collaboration & Planning** *(Enabled by default)*
207 | - **[`clink`](docs/tools/clink.md)** - Bridge requests to external AI CLIs (Gemini planner, codereviewer, etc.)
208 | - **[`chat`](docs/tools/chat.md)** - Brainstorm ideas, get second opinions, validate approaches. With capable models (GPT-5 Pro, Gemini 2.5 Pro), generates complete code / implementation
209 | - **[`thinkdeep`](docs/tools/thinkdeep.md)** - Extended reasoning, edge case analysis, alternative perspectives
210 | - **[`planner`](docs/tools/planner.md)** - Break down complex projects into structured, actionable plans
211 | - **[`consensus`](docs/tools/consensus.md)** - Get expert opinions from multiple AI models with stance steering
212 | 
213 | **Code Analysis & Quality**
214 | - **[`debug`](docs/tools/debug.md)** - Systematic investigation and root cause analysis
215 | - **[`precommit`](docs/tools/precommit.md)** - Validate changes before committing, prevent regressions
216 | - **[`codereview`](docs/tools/codereview.md)** - Professional reviews with severity levels and actionable feedback
217 | - **[`analyze`](docs/tools/analyze.md)** *(disabled by default - [enable](#tool-configuration))* - Understand architecture, patterns, dependencies across entire codebases
218 | 
219 | **Development Tools** *(Disabled by default - [enable](#tool-configuration))*
220 | - **[`refactor`](docs/tools/refactor.md)** - Intelligent code refactoring with decomposition focus
221 | - **[`testgen`](docs/tools/testgen.md)** - Comprehensive test generation with edge cases
222 | - **[`secaudit`](docs/tools/secaudit.md)** - Security audits with OWASP Top 10 analysis
223 | - **[`docgen`](docs/tools/docgen.md)** - Generate documentation with complexity analysis
224 | 
225 | **Utilities**
226 | - **[`apilookup`](docs/tools/apilookup.md)** - Forces current-year API/SDK documentation lookups in a sub-process (saves tokens within the current context window), prevents outdated training data responses
227 | - **[`challenge`](docs/tools/challenge.md)** - Prevent "You're absolutely right!" responses with critical analysis
228 | - **[`tracer`](docs/tools/tracer.md)** *(disabled by default - [enable](#tool-configuration))* - Static analysis prompts for call-flow mapping
229 | 
230 | <details>
231 | <summary><b id="tool-configuration">👉 Tool Configuration</b></summary>
232 | 
233 | ### Default Configuration
234 | 
235 | To optimize context window usage, only essential tools are enabled by default:
236 | 
237 | **Enabled by default:**
238 | - `chat`, `thinkdeep`, `planner`, `consensus` - Core collaboration tools
239 | - `codereview`, `precommit`, `debug` - Essential code quality tools
240 | - `apilookup` - Rapid API/SDK information lookup
241 | - `challenge` - Critical thinking utility
242 | 
243 | **Disabled by default:**
244 | - `analyze`, `refactor`, `testgen`, `secaudit`, `docgen`, `tracer`
245 | 
246 | ### Enabling Additional Tools
247 | 
248 | To enable additional tools, remove them from the `DISABLED_TOOLS` list:
249 | 
250 | **Option 1: Edit your .env file**
251 | ```bash
252 | # Default configuration (from .env.example)
253 | DISABLED_TOOLS=analyze,refactor,testgen,secaudit,docgen,tracer
254 | 
255 | # To enable specific tools, remove them from the list
256 | # Example: Enable analyze tool
257 | DISABLED_TOOLS=refactor,testgen,secaudit,docgen,tracer
258 | 
259 | # To enable ALL tools
260 | DISABLED_TOOLS=
261 | ```
262 | 
263 | **Option 2: Configure in MCP settings**
264 | ```json
265 | // In ~/.claude/settings.json or .mcp.json
266 | {
267 |   "mcpServers": {
268 |     "zen": {
269 |       "env": {
270 |         // Tool configuration
271 |         "DISABLED_TOOLS": "refactor,testgen,secaudit,docgen,tracer",
272 |         "DEFAULT_MODEL": "pro",
273 |         "DEFAULT_THINKING_MODE_THINKDEEP": "high",
274 |         
275 |         // API configuration
276 |         "GEMINI_API_KEY": "your-gemini-key",
277 |         "OPENAI_API_KEY": "your-openai-key",
278 |         "OPENROUTER_API_KEY": "your-openrouter-key",
279 |         
280 |         // Logging and performance
281 |         "LOG_LEVEL": "INFO",
282 |         "CONVERSATION_TIMEOUT_HOURS": "6",
283 |         "MAX_CONVERSATION_TURNS": "50"
284 |       }
285 |     }
286 |   }
287 | }
288 | ```
289 | 
290 | **Option 3: Enable all tools**
291 | ```json
292 | // Remove or empty the DISABLED_TOOLS to enable everything
293 | {
294 |   "mcpServers": {
295 |     "zen": {
296 |       "env": {
297 |         "DISABLED_TOOLS": ""
298 |       }
299 |     }
300 |   }
301 | }
302 | ```
303 | 
304 | **Note:**
305 | - Essential tools (`version`, `listmodels`) cannot be disabled
306 | - After changing tool configuration, restart your Claude session for changes to take effect
307 | - Each tool adds to context window usage, so only enable what you need
308 | 
309 | </details>
310 | 
311 | ## 📺 Watch Tools In Action
312 | 
313 | <details>
314 | <summary><b>Chat Tool</b> - Collaborative decision making and multi-turn conversations</summary>
315 | 
316 | **Picking Redis vs Memcached:**
317 | 
318 | [Chat Redis or Memcached_web.webm](https://github.com/user-attachments/assets/41076cfe-dd49-4dfc-82f5-d7461b34705d)
319 | 
320 | **Multi-turn conversation with continuation:**
321 | 
322 | [Chat With Gemini_web.webm](https://github.com/user-attachments/assets/37bd57ca-e8a6-42f7-b5fb-11de271e95db)
323 | 
324 | </details>
325 | 
326 | <details>
327 | <summary><b>Consensus Tool</b> - Multi-model debate and decision making</summary>
328 | 
329 | **Multi-model consensus debate:**
330 | 
331 | [Zen Consensus Debate](https://github.com/user-attachments/assets/76a23dd5-887a-4382-9cf0-642f5cf6219e)
332 | 
333 | </details>
334 | 
335 | <details>
336 | <summary><b>PreCommit Tool</b> - Comprehensive change validation</summary>
337 | 
338 | **Pre-commit validation workflow:**
339 | 
340 | <div align="center">
341 |   <img src="https://github.com/user-attachments/assets/584adfa6-d252-49b4-b5b0-0cd6e97fb2c6" width="950">
342 | </div>
343 | 
344 | </details>
345 | 
346 | <details>
347 | <summary><b>API Lookup Tool</b> - Current vs outdated API documentation</summary>
348 | 
349 | **Without Zen - outdated APIs:**
350 | 
351 | [API without Zen](https://github.com/user-attachments/assets/01a79dc9-ad16-4264-9ce1-76a56c3580ee)
352 | 
353 | **With Zen - current APIs:**
354 | 
355 | [API with Zen](https://github.com/user-attachments/assets/5c847326-4b66-41f7-8f30-f380453dce22)
356 | 
357 | </details>
358 | 
359 | <details>
360 | <summary><b>Challenge Tool</b> - Critical thinking vs reflexive agreement</summary>
361 | 
362 | **Without Zen:**
363 | 
364 | ![without_zen@2x](https://github.com/user-attachments/assets/64f3c9fb-7ca9-4876-b687-25e847edfd87)
365 | 
366 | **With Zen:**
367 | 
368 | ![with_zen@2x](https://github.com/user-attachments/assets/9d72f444-ba53-4ab1-83e5-250062c6ee70)
369 | 
370 | </details>
371 | 
372 | ## Key Features
373 | 
374 | **AI Orchestration**
375 | - **Auto model selection** - Claude picks the right AI for each task
376 | - **Multi-model workflows** - Chain different models in single conversations
377 | - **Conversation continuity** - Context preserved across tools and models
378 | - **[Context revival](docs/context-revival.md)** - Continue conversations even after context resets
379 | 
380 | **Model Support**
381 | - **Multiple providers** - Gemini, OpenAI, Azure, X.AI, OpenRouter, DIAL, Ollama
382 | - **Latest models** - GPT-5, Gemini 2.5 Pro, O3, Grok-4, local Llama
383 | - **[Thinking modes](docs/advanced-usage.md#thinking-modes)** - Control reasoning depth vs cost
384 | - **Vision support** - Analyze images, diagrams, screenshots
385 | 
386 | **Developer Experience**
387 | - **Guided workflows** - Systematic investigation prevents rushed analysis
388 | - **Smart file handling** - Auto-expand directories, manage token limits
389 | - **Web search integration** - Access current documentation and best practices
390 | - **[Large prompt support](docs/advanced-usage.md#working-with-large-prompts)** - Bypass MCP's 25K token limit
391 | 
392 | ## Example Workflows
393 | 
394 | **Multi-model Code Review:**
395 | ```
396 | "Perform a codereview using gemini pro and o3, then use planner to create a fix strategy"
397 | ```
398 | → Claude reviews code systematically → Consults Gemini Pro → Gets O3's perspective → Creates unified action plan
399 | 
400 | **Collaborative Debugging:**
401 | ```
402 | "Debug this race condition with max thinking mode, then validate the fix with precommit"
403 | ```
404 | → Deep investigation → Expert analysis → Solution implementation → Pre-commit validation
405 | 
406 | **Architecture Planning:**
407 | ```
408 | "Plan our microservices migration, get consensus from pro and o3 on the approach"
409 | ```
410 | → Structured planning → Multiple expert opinions → Consensus building → Implementation roadmap
411 | 
412 | 👉 **[Advanced Usage Guide](docs/advanced-usage.md)** for complex workflows, model configuration, and power-user features
413 | 
414 | ## Quick Links
415 | 
416 | **📖 Documentation**
417 | - [Docs Overview](docs/index.md) - High-level map of major guides
418 | - [Getting Started](docs/getting-started.md) - Complete setup guide
419 | - [Tools Reference](docs/tools/) - All tools with examples
420 | - [Advanced Usage](docs/advanced-usage.md) - Power user features
421 | - [Configuration](docs/configuration.md) - Environment variables, restrictions
422 | - [Adding Providers](docs/adding_providers.md) - Provider-specific setup (OpenAI, Azure, custom gateways)
423 | - [Model Ranking Guide](docs/model_ranking.md) - How intelligence scores drive auto-mode suggestions
424 | 
425 | **🔧 Setup & Support**
426 | - [WSL Setup](docs/wsl-setup.md) - Windows users
427 | - [Troubleshooting](docs/troubleshooting.md) - Common issues
428 | - [Contributing](docs/contributions.md) - Code standards, PR process
429 | 
430 | ## License
431 | 
432 | Apache 2.0 License - see [LICENSE](LICENSE) file for details.
433 | 
434 | ## Acknowledgments
435 | 
436 | Built with the power of **Multi-Model AI** collaboration 🤝
437 | - **A**ctual **I**ntelligence by real Humans
438 | - [MCP (Model Context Protocol)](https://modelcontextprotocol.com)
439 | - [Codex CLI](https://developers.openai.com/codex/cli)
440 | - [Claude Code](https://claude.ai/code)
441 | - [Gemini](https://ai.google.dev/)
442 | - [OpenAI](https://openai.com/)
443 | - [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/)
444 | 
445 | ### Star History
446 | 
447 | [![Star History Chart](https://api.star-history.com/svg?repos=BeehiveInnovations/zen-mcp-server&type=Date)](https://www.star-history.com/#BeehiveInnovations/zen-mcp-server&Date)
448 | 
```

--------------------------------------------------------------------------------
/AGENTS.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Repository Guidelines
  2 | 
  3 | See `requirements.txt` and `requirements-dev.txt`
  4 | 
  5 | Also read CLAUDE.md and CLAUDE.local.md if available.
  6 | 
  7 | ## Project Structure & Module Organization
  8 | Zen MCP Server centers on `server.py`, which exposes MCP entrypoints and coordinates multi-model workflows. 
  9 | Feature-specific tools live in `tools/`, provider integrations in `providers/`, and shared helpers in `utils/`. 
 10 | Prompt and system context assets stay in `systemprompts/`, while configuration templates and automation scripts live under `conf/`, `scripts/`, and `docker/`. 
 11 | Unit tests sit in `tests/`; simulator-driven scenarios and log utilities are in `simulator_tests/` with the `communication_simulator_test.py` harness. 
 12 | Authoritative documentation and samples live in `docs/`, and runtime diagnostics are rotated in `logs/`.
 13 | 
 14 | ## Build, Test, and Development Commands
 15 | - `source .zen_venv/bin/activate` – activate the managed Python environment.
 16 | - `./run-server.sh` – install dependencies, refresh `.env`, and launch the MCP server locally.
 17 | - `./code_quality_checks.sh` – run Ruff autofix, Black, isort, and the default pytest suite.
 18 | - `python communication_simulator_test.py --quick` – smoke-test orchestration across tools and providers.
 19 | - `./run_integration_tests.sh [--with-simulator]` – exercise provider-dependent flows against remote or Ollama models.
 20 | 
 21 | Run code quality checks:
 22 | ```bash
 23 | .zen_venv/bin/activate && ./code_quality_checks.sh
 24 | ```
 25 | 
 26 | For example, this is how we run an individual / all tests:
 27 | 
 28 | ```bash
 29 | .zen_venv/bin/activate && pytest tests/test_auto_mode_model_listing.py -q
 30 | .zen_venv/bin/activate && pytest -q
 31 | ```
 32 | 
 33 | ## Coding Style & Naming Conventions
 34 | Target Python 3.9+ with Black and isort using a 120-character line limit; Ruff enforces pycodestyle, pyflakes, bugbear, comprehension, and pyupgrade rules. Prefer explicit type hints, snake_case modules, and imperative commit-time docstrings. Extend workflows by defining hook or abstract methods instead of checking `hasattr()`/`getattr()`—inheritance-backed contracts keep behavior discoverable and testable.
 35 | 
 36 | ## Testing Guidelines
 37 | Mirror production modules inside `tests/` and name tests `test_<behavior>` or `Test<Feature>` classes. Run `python -m pytest tests/ -v -m "not integration"` before every commit, adding `--cov=. --cov-report=html` for coverage-sensitive changes. Use `python communication_simulator_test.py --verbose` or `--individual <case>` to validate cross-agent flows, and reserve `./run_integration_tests.sh` for provider or transport modifications. Capture relevant excerpts from `logs/mcp_server.log` or `logs/mcp_activity.log` when documenting failures.
 38 | 
 39 | ## Commit & Pull Request Guidelines
 40 | Follow Conventional Commits: `type(scope): summary`, where `type` is one of `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, or `chore`. Keep commits focused, referencing issues or simulator cases when helpful. Pull requests should outline intent, list validation commands executed, flag configuration or tool toggles, and attach screenshots or log snippets when user-visible behavior changes.
 41 | 
 42 | ## GitHub CLI Commands
 43 | The GitHub CLI (`gh`) streamlines issue and PR management directly from the terminal.
 44 | 
 45 | ### Viewing Issues
 46 | ```bash
 47 | # View issue details in current repository
 48 | gh issue view <issue-number>
 49 | 
 50 | # View issue from specific repository
 51 | gh issue view <issue-number> --repo owner/repo-name
 52 | 
 53 | # View issue with all comments
 54 | gh issue view <issue-number> --comments
 55 | 
 56 | # Get issue data as JSON for scripting
 57 | gh issue view <issue-number> --json title,body,author,state,labels,comments
 58 | 
 59 | # Open issue in web browser
 60 | gh issue view <issue-number> --web
 61 | ```
 62 | 
 63 | ### Managing Issues
 64 | ```bash
 65 | # List all open issues
 66 | gh issue list
 67 | 
 68 | # List issues with filters
 69 | gh issue list --label bug --state open
 70 | 
 71 | # Create a new issue
 72 | gh issue create --title "Issue title" --body "Description"
 73 | 
 74 | # Close an issue
 75 | gh issue close <issue-number>
 76 | 
 77 | # Reopen an issue
 78 | gh issue reopen <issue-number>
 79 | ```
 80 | 
 81 | ### Pull Request Operations
 82 | ```bash
 83 | # View PR details
 84 | gh pr view <pr-number>
 85 | 
 86 | # List pull requests
 87 | gh pr list
 88 | 
 89 | # Create a PR from current branch
 90 | gh pr create --title "PR title" --body "Description"
 91 | 
 92 | # Check out a PR locally
 93 | gh pr checkout <pr-number>
 94 | 
 95 | # Merge a PR
 96 | gh pr merge <pr-number>
 97 | ```
 98 | 
 99 | Install GitHub CLI: `brew install gh` (macOS) or visit https://cli.github.com for other platforms.
100 | 
101 | ## Security & Configuration Tips
102 | Store API keys and provider URLs in `.env` or your MCP client config; never commit secrets or generated log artifacts. Use `run-server.sh` to regenerate environments and verify connectivity after dependency changes. When adding providers or tools, sanitize prompts and responses, document required environment variables in `docs/`, and update `claude_config_example.json` if new capabilities ship by default.
103 | 
```

--------------------------------------------------------------------------------
/CLAUDE.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Claude Development Guide for Zen MCP Server
  2 | 
  3 | This file contains essential commands and workflows for developing and maintaining the Zen MCP Server when working with Claude. Use these instructions to efficiently run quality checks, manage the server, check logs, and run tests.
  4 | 
  5 | ## Quick Reference Commands
  6 | 
  7 | ### Code Quality Checks
  8 | 
  9 | Before making any changes or submitting PRs, always run the comprehensive quality checks:
 10 | 
 11 | ```bash
 12 | # Activate virtual environment first
 13 | source venv/bin/activate
 14 | 
 15 | # Run all quality checks (linting, formatting, tests)
 16 | ./code_quality_checks.sh
 17 | ```
 18 | 
 19 | This script automatically runs:
 20 | - Ruff linting with auto-fix
 21 | - Black code formatting 
 22 | - Import sorting with isort
 23 | - Complete unit test suite (excluding integration tests)
 24 | - Verification that all checks pass 100%
 25 | 
 26 | **Run Integration Tests (requires API keys):**
 27 | ```bash
 28 | # Run integration tests that make real API calls
 29 | ./run_integration_tests.sh
 30 | 
 31 | # Run integration tests + simulator tests
 32 | ./run_integration_tests.sh --with-simulator
 33 | ```
 34 | 
 35 | ### Server Management
 36 | 
 37 | #### Setup/Update the Server
 38 | ```bash
 39 | # Run setup script (handles everything)
 40 | ./run-server.sh
 41 | ```
 42 | 
 43 | This script will:
 44 | - Set up Python virtual environment
 45 | - Install all dependencies
 46 | - Create/update .env file
 47 | - Configure MCP with Claude
 48 | - Verify API keys
 49 | 
 50 | #### View Logs
 51 | ```bash
 52 | # Follow logs in real-time
 53 | ./run-server.sh -f
 54 | 
 55 | # Or manually view logs
 56 | tail -f logs/mcp_server.log
 57 | ```
 58 | 
 59 | ### Log Management
 60 | 
 61 | #### View Server Logs
 62 | ```bash
 63 | # View last 500 lines of server logs
 64 | tail -n 500 logs/mcp_server.log
 65 | 
 66 | # Follow logs in real-time
 67 | tail -f logs/mcp_server.log
 68 | 
 69 | # View specific number of lines
 70 | tail -n 100 logs/mcp_server.log
 71 | 
 72 | # Search logs for specific patterns
 73 | grep "ERROR" logs/mcp_server.log
 74 | grep "tool_name" logs/mcp_activity.log
 75 | ```
 76 | 
 77 | #### Monitor Tool Executions Only
 78 | ```bash
 79 | # View tool activity log (focused on tool calls and completions)
 80 | tail -n 100 logs/mcp_activity.log
 81 | 
 82 | # Follow tool activity in real-time
 83 | tail -f logs/mcp_activity.log
 84 | 
 85 | # Use simple tail commands to monitor logs
 86 | tail -f logs/mcp_activity.log | grep -E "(TOOL_CALL|TOOL_COMPLETED|ERROR|WARNING)"
 87 | ```
 88 | 
 89 | #### Available Log Files
 90 | 
 91 | **Current log files (with proper rotation):**
 92 | ```bash
 93 | # Main server log (all activity including debug info) - 20MB max, 10 backups
 94 | tail -f logs/mcp_server.log
 95 | 
 96 | # Tool activity only (TOOL_CALL, TOOL_COMPLETED, etc.) - 20MB max, 5 backups  
 97 | tail -f logs/mcp_activity.log
 98 | ```
 99 | 
100 | **For programmatic log analysis (used by tests):**
101 | ```python
102 | # Import the LogUtils class from simulator tests
103 | from simulator_tests.log_utils import LogUtils
104 | 
105 | # Get recent logs
106 | recent_logs = LogUtils.get_recent_server_logs(lines=500)
107 | 
108 | # Check for errors
109 | errors = LogUtils.check_server_logs_for_errors()
110 | 
111 | # Search for specific patterns
112 | matches = LogUtils.search_logs_for_pattern("TOOL_CALL.*debug")
113 | ```
114 | 
115 | ### Testing
116 | 
117 | Simulation tests are available to test the MCP server in a 'live' scenario, using your configured
118 | API keys to ensure the models are working and the server is able to communicate back and forth. 
119 | 
120 | **IMPORTANT**: After any code changes, restart your Claude session for the changes to take effect.
121 | 
122 | #### Run All Simulator Tests
123 | ```bash
124 | # Run the complete test suite
125 | python communication_simulator_test.py
126 | 
127 | # Run tests with verbose output
128 | python communication_simulator_test.py --verbose
129 | ```
130 | 
131 | #### Quick Test Mode (Recommended for Time-Limited Testing)
132 | ```bash
133 | # Run quick test mode - 6 essential tests that provide maximum functionality coverage
134 | python communication_simulator_test.py --quick
135 | 
136 | # Run quick test mode with verbose output
137 | python communication_simulator_test.py --quick --verbose
138 | ```
139 | 
140 | **Quick mode runs these 6 essential tests:**
141 | - `cross_tool_continuation` - Cross-tool conversation memory testing (chat, thinkdeep, codereview, analyze, debug)
142 | - `conversation_chain_validation` - Core conversation threading and memory validation
143 | - `consensus_workflow_accurate` - Consensus tool with flash model and stance testing
144 | - `codereview_validation` - CodeReview tool with flash model and multi-step workflows
145 | - `planner_validation` - Planner tool with flash model and complex planning workflows
146 | - `token_allocation_validation` - Token allocation and conversation history buildup testing
147 | 
148 | **Why these 6 tests:** They cover the core functionality including conversation memory (`utils/conversation_memory.py`), chat tool functionality, file processing and deduplication, model selection (flash/flashlite/o3), and cross-tool conversation workflows. These tests validate the most critical parts of the system in minimal time.
149 | 
150 | **Note:** Some workflow tools (analyze, codereview, planner, consensus, etc.) require specific workflow parameters and may need individual testing rather than quick mode testing.
151 | 
152 | #### Run Individual Simulator Tests (For Detailed Testing)
153 | ```bash
154 | # List all available tests
155 | python communication_simulator_test.py --list-tests
156 | 
157 | # RECOMMENDED: Run tests individually for better isolation and debugging
158 | python communication_simulator_test.py --individual basic_conversation
159 | python communication_simulator_test.py --individual content_validation
160 | python communication_simulator_test.py --individual cross_tool_continuation
161 | python communication_simulator_test.py --individual memory_validation
162 | 
163 | # Run multiple specific tests
164 | python communication_simulator_test.py --tests basic_conversation content_validation
165 | 
166 | # Run individual test with verbose output for debugging
167 | python communication_simulator_test.py --individual memory_validation --verbose
168 | ```
169 | 
170 | Available simulator tests include:
171 | - `basic_conversation` - Basic conversation flow with chat tool
172 | - `content_validation` - Content validation and duplicate detection
173 | - `per_tool_deduplication` - File deduplication for individual tools
174 | - `cross_tool_continuation` - Cross-tool conversation continuation scenarios
175 | - `cross_tool_comprehensive` - Comprehensive cross-tool file deduplication and continuation
176 | - `line_number_validation` - Line number handling validation across tools
177 | - `memory_validation` - Conversation memory validation
178 | - `model_thinking_config` - Model-specific thinking configuration behavior
179 | - `o3_model_selection` - O3 model selection and usage validation
180 | - `ollama_custom_url` - Ollama custom URL endpoint functionality
181 | - `openrouter_fallback` - OpenRouter fallback behavior when only provider
182 | - `openrouter_models` - OpenRouter model functionality and alias mapping
183 | - `token_allocation_validation` - Token allocation and conversation history validation
184 | - `testgen_validation` - TestGen tool validation with specific test function
185 | - `refactor_validation` - Refactor tool validation with codesmells
186 | - `conversation_chain_validation` - Conversation chain and threading validation
187 | - `consensus_stance` - Consensus tool validation with stance steering (for/against/neutral)
188 | 
189 | **Note**: All simulator tests should be run individually for optimal testing and better error isolation.
190 | 
191 | #### Run Unit Tests Only
192 | ```bash
193 | # Run all unit tests (excluding integration tests that require API keys)
194 | python -m pytest tests/ -v -m "not integration"
195 | 
196 | # Run specific test file
197 | python -m pytest tests/test_refactor.py -v
198 | 
199 | # Run specific test function
200 | python -m pytest tests/test_refactor.py::TestRefactorTool::test_format_response -v
201 | 
202 | # Run tests with coverage
203 | python -m pytest tests/ --cov=. --cov-report=html -m "not integration"
204 | ```
205 | 
206 | #### Run Integration Tests (Uses Free Local Models)
207 | 
208 | **Setup Requirements:**
209 | ```bash
210 | # 1. Install Ollama (if not already installed)
211 | # Visit https://ollama.ai or use brew install ollama
212 | 
213 | # 2. Start Ollama service
214 | ollama serve
215 | 
216 | # 3. Pull a model (e.g., llama3.2)
217 | ollama pull llama3.2
218 | 
219 | # 4. Set environment variable for custom provider
220 | export CUSTOM_API_URL="http://localhost:11434"
221 | ```
222 | 
223 | **Run Integration Tests:**
224 | ```bash
225 | # Run integration tests that make real API calls to local models
226 | python -m pytest tests/ -v -m "integration"
227 | 
228 | # Run specific integration test
229 | python -m pytest tests/test_prompt_regression.py::TestPromptIntegration::test_chat_normal_prompt -v
230 | 
231 | # Run all tests (unit + integration)
232 | python -m pytest tests/ -v
233 | ```
234 | 
235 | **Note**: Integration tests use the local-llama model via Ollama, which is completely FREE to run unlimited times. Requires `CUSTOM_API_URL` environment variable set to your local Ollama endpoint. They can be run safely in CI/CD but are excluded from code quality checks to keep them fast.
236 | 
237 | ### Development Workflow
238 | 
239 | #### Before Making Changes
240 | 1. Ensure virtual environment is activated: `source .zen_venv/bin/activate`
241 | 2. Run quality checks: `./code_quality_checks.sh`
242 | 3. Check logs to ensure server is healthy: `tail -n 50 logs/mcp_server.log`
243 | 
244 | #### After Making Changes
245 | 1. Run quality checks again: `./code_quality_checks.sh`
246 | 2. Run integration tests locally: `./run_integration_tests.sh`
247 | 3. Run quick test mode for fast validation: `python communication_simulator_test.py --quick`
248 | 4. Run relevant specific simulator tests if needed: `python communication_simulator_test.py --individual <test_name>`
249 | 5. Check logs for any issues: `tail -n 100 logs/mcp_server.log`
250 | 6. Restart Claude session to use updated code
251 | 
252 | #### Before Committing/PR
253 | 1. Final quality check: `./code_quality_checks.sh`
254 | 2. Run integration tests: `./run_integration_tests.sh`
255 | 3. Run quick test mode: `python communication_simulator_test.py --quick`
256 | 4. Run full simulator test suite (optional): `./run_integration_tests.sh --with-simulator`
257 | 5. Verify all tests pass 100%
258 | 
259 | ### Common Troubleshooting
260 | 
261 | #### Server Issues
262 | ```bash
263 | # Check if Python environment is set up correctly
264 | ./run-server.sh
265 | 
266 | # View recent errors
267 | grep "ERROR" logs/mcp_server.log | tail -20
268 | 
269 | # Check virtual environment
270 | which python
271 | # Should show: .../zen-mcp-server/.zen_venv/bin/python
272 | ```
273 | 
274 | #### Test Failures
275 | ```bash
276 | # First try quick test mode to see if it's a general issue
277 | python communication_simulator_test.py --quick --verbose
278 | 
279 | # Run individual failing test with verbose output
280 | python communication_simulator_test.py --individual <test_name> --verbose
281 | 
282 | # Check server logs during test execution
283 | tail -f logs/mcp_server.log
284 | 
285 | # Run tests with debug output
286 | LOG_LEVEL=DEBUG python communication_simulator_test.py --individual <test_name>
287 | ```
288 | 
289 | #### Linting Issues
290 | ```bash
291 | # Auto-fix most linting issues
292 | ruff check . --fix
293 | black .
294 | isort .
295 | 
296 | # Check what would be changed without applying
297 | ruff check .
298 | black --check .
299 | isort --check-only .
300 | ```
301 | 
302 | ### File Structure Context
303 | 
304 | - `./code_quality_checks.sh` - Comprehensive quality check script
305 | - `./run-server.sh` - Server setup and management
306 | - `communication_simulator_test.py` - End-to-end testing framework
307 | - `simulator_tests/` - Individual test modules
308 | - `tests/` - Unit test suite
309 | - `tools/` - MCP tool implementations
310 | - `providers/` - AI provider implementations
311 | - `systemprompts/` - System prompt definitions
312 | - `logs/` - Server log files
313 | 
314 | ### Environment Requirements
315 | 
316 | - Python 3.9+ with virtual environment
317 | - All dependencies from `requirements.txt` installed
318 | - Proper API keys configured in `.env` file
319 | 
320 | This guide provides everything needed to efficiently work with the Zen MCP Server codebase using Claude. Always run quality checks before and after making changes to ensure code integrity.
```

--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------

```python
1 | # Tests for Zen MCP Server
2 | 
```

--------------------------------------------------------------------------------
/conf/__init__.py:
--------------------------------------------------------------------------------

```python
1 | """Configuration data for Zen MCP Server."""
2 | 
```

--------------------------------------------------------------------------------
/.claude/settings.json:
--------------------------------------------------------------------------------

```json
1 | {
2 |   "permissions": {
3 |     "allow": [
4 |     ],
5 |     "deny": []
6 |   }
7 | }
```

--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------

```yaml
1 | # These are supported funding model platforms
2 | 
3 | github: [guidedways]
4 | 
```

--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------

```
1 | pytest>=7.4.0
2 | pytest-asyncio>=0.21.0
3 | pytest-mock>=3.11.0
4 | black>=23.0.0
5 | ruff>=0.1.0
6 | isort>=5.12.0
7 | python-semantic-release>=10.3.0
8 | build>=1.0.0
9 | 
```

--------------------------------------------------------------------------------
/clink/__init__.py:
--------------------------------------------------------------------------------

```python
1 | """Public helpers for clink components."""
2 | 
3 | from __future__ import annotations
4 | 
5 | from .registry import ClinkRegistry, get_registry
6 | 
7 | __all__ = ["ClinkRegistry", "get_registry"]
8 | 
```

--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------

```
 1 | [pytest]
 2 | testpaths = tests
 3 | python_files = test_*.py
 4 | python_classes = Test*
 5 | python_functions = test_*
 6 | asyncio_mode = auto
 7 | addopts = 
 8 |     -v
 9 |     --strict-markers
10 |     --tb=short
11 | markers =
12 |     integration: marks tests as integration tests that make real API calls with local-llama (free to run)
```

--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------

```
 1 | mcp>=1.0.0
 2 | google-genai>=1.19.0
 3 | openai>=1.55.2  # Minimum version for httpx 0.28.0 compatibility
 4 | pydantic>=2.0.0
 5 | python-dotenv>=1.0.0
 6 | importlib-resources>=5.0.0; python_version<"3.9"
 7 | 
 8 | # Development dependencies (install with pip install -r requirements-dev.txt)
 9 | # pytest>=7.4.0
10 | # pytest-asyncio>=0.21.0
11 | # pytest-mock>=3.11.0
```

--------------------------------------------------------------------------------
/providers/shared/provider_type.py:
--------------------------------------------------------------------------------

```python
 1 | """Enumeration describing which backend owns a given model."""
 2 | 
 3 | from enum import Enum
 4 | 
 5 | __all__ = ["ProviderType"]
 6 | 
 7 | 
 8 | class ProviderType(Enum):
 9 |     """Canonical identifiers for every supported provider backend."""
10 | 
11 |     GOOGLE = "google"
12 |     OPENAI = "openai"
13 |     AZURE = "azure"
14 |     XAI = "xai"
15 |     OPENROUTER = "openrouter"
16 |     CUSTOM = "custom"
17 |     DIAL = "dial"
18 | 
```

--------------------------------------------------------------------------------
/examples/claude_config_macos.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "comment": "macOS configuration using standalone server",
 3 |   "comment2": "Run './run-server.sh' to set up the environment and get exact paths",
 4 |   "comment3": "Use './run-server.sh -c' to display the correct configuration",
 5 |   "mcpServers": {
 6 |     "zen": {
 7 |       "command": "/path/to/zen-mcp-server/.zen_venv/bin/python",
 8 |       "args": ["/path/to/zen-mcp-server/server.py"]
 9 |     }
10 |   }
11 | }
```

--------------------------------------------------------------------------------
/claude_config_example.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "comment": "Example Claude Desktop configuration for Zen MCP Server",
 3 |   "comment2": "Run './run-server.sh -c' to get the exact configuration for your system",
 4 |   "comment3": "For platform-specific examples, see the examples/ directory",
 5 |   "mcpServers": {
 6 |     "zen": {
 7 |       "command": "/path/to/zen-mcp-server/.zen_venv/bin/python",
 8 |       "args": ["/path/to/zen-mcp-server/server.py"]
 9 |     }
10 |   }
11 | }
```

--------------------------------------------------------------------------------
/examples/claude_config_wsl.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "comment": "Windows configuration using WSL with standalone server",
 3 |   "comment2": "Run './run-server.sh' in WSL to set up the environment and get exact paths",
 4 |   "comment3": "Use './run-server.sh -c' to display the correct configuration",
 5 |   "mcpServers": {
 6 |     "zen": {
 7 |       "command": "wsl.exe",
 8 |       "args": [
 9 |         "/path/to/zen-mcp-server/.zen_venv/bin/python",
10 |         "/path/to/zen-mcp-server/server.py"
11 |       ]
12 |     }
13 |   }
14 | }
```

--------------------------------------------------------------------------------
/conf/cli_clients/gemini.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "name": "gemini",
 3 |   "command": "gemini",
 4 |   "additional_args": [
 5 |     "--yolo"
 6 |   ],
 7 |   "env": {},
 8 |   "roles": {
 9 |     "default": {
10 |       "prompt_path": "systemprompts/clink/default.txt",
11 |       "role_args": []
12 |     },
13 |     "planner": {
14 |       "prompt_path": "systemprompts/clink/default_planner.txt",
15 |       "role_args": []
16 |     },
17 |     "codereviewer": {
18 |       "prompt_path": "systemprompts/clink/default_codereviewer.txt",
19 |       "role_args": []
20 |     }
21 |   }
22 | }
23 | 
```

--------------------------------------------------------------------------------
/.claude/commands/fix-github-issue.md:
--------------------------------------------------------------------------------

```markdown
 1 | Please analyze and fix the GitHub issue: $ARGUMENTS.
 2 | 
 3 | Follow these steps:
 4 | 
 5 | 1. Use `gh issue view` to get the issue details
 6 | 2. Understand the problem described in the issue
 7 | 3. Search the codebase for relevant files
 8 | 4. Implement the necessary changes to fix the issue
 9 | 5. Write and run tests to verify the fix
10 | 6. Ensure code passes linting and type checking
11 | 7. Create a descriptive commit message
12 | 8. Push and create a PR
13 | 
14 | Remember to use the GitHub CLI (`gh`) for all GitHub-related tasks.
15 | 
```

--------------------------------------------------------------------------------
/tools/simple/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | Simple tools for Zen MCP.
 3 | 
 4 | Simple tools follow a basic request → AI model → response pattern.
 5 | They inherit from SimpleTool which provides streamlined functionality
 6 | for tools that don't need multi-step workflows.
 7 | 
 8 | Available simple tools:
 9 | - chat: General chat and collaborative thinking
10 | - consensus: Multi-perspective analysis
11 | - listmodels: Model listing and information
12 | - testgen: Test generation
13 | - tracer: Execution tracing
14 | """
15 | 
16 | from .base import SimpleTool
17 | 
18 | __all__ = ["SimpleTool"]
19 | 
```

--------------------------------------------------------------------------------
/conf/cli_clients/codex.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "name": "codex",
 3 |   "command": "codex",
 4 |   "additional_args": [
 5 |     "--json",
 6 |     "--dangerously-bypass-approvals-and-sandbox"
 7 |   ],
 8 |   "env": {},
 9 |   "roles": {
10 |     "default": {
11 |       "prompt_path": "systemprompts/clink/default.txt",
12 |       "role_args": []
13 |     },
14 |     "planner": {
15 |       "prompt_path": "systemprompts/clink/default_planner.txt",
16 |       "role_args": []
17 |     },
18 |     "codereviewer": {
19 |       "prompt_path": "systemprompts/clink/codex_codereviewer.txt",
20 |       "role_args": []
21 |     }
22 |   }
23 | }
24 | 
```

--------------------------------------------------------------------------------
/conf/cli_clients/claude.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "name": "claude",
 3 |   "command": "claude",
 4 |   "additional_args": [
 5 |     "--permission-mode",
 6 |     "acceptEdits",
 7 |     "--model",
 8 |     "sonnet"
 9 |   ],
10 |   "env": {},
11 |   "roles": {
12 |     "default": {
13 |       "prompt_path": "systemprompts/clink/default.txt",
14 |       "role_args": []
15 |     },
16 |     "planner": {
17 |       "prompt_path": "systemprompts/clink/default_planner.txt",
18 |       "role_args": []
19 |     },
20 |     "codereviewer": {
21 |       "prompt_path": "systemprompts/clink/default_codereviewer.txt",
22 |       "role_args": []
23 |     }
24 |   }
25 | }
26 | 
```

--------------------------------------------------------------------------------
/tools/shared/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | Shared infrastructure for Zen MCP tools.
 3 | 
 4 | This module contains the core base classes and utilities that are shared
 5 | across all tool types. It provides the foundation for the tool architecture.
 6 | """
 7 | 
 8 | from .base_models import BaseWorkflowRequest, ConsolidatedFindings, ToolRequest, WorkflowRequest
 9 | from .base_tool import BaseTool
10 | from .schema_builders import SchemaBuilder
11 | 
12 | __all__ = [
13 |     "BaseTool",
14 |     "ToolRequest",
15 |     "BaseWorkflowRequest",
16 |     "WorkflowRequest",
17 |     "ConsolidatedFindings",
18 |     "SchemaBuilder",
19 | ]
20 | 
```

--------------------------------------------------------------------------------
/providers/registries/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """Registry implementations for provider capability manifests."""
 2 | 
 3 | from .azure import AzureModelRegistry
 4 | from .custom import CustomEndpointModelRegistry
 5 | from .dial import DialModelRegistry
 6 | from .gemini import GeminiModelRegistry
 7 | from .openai import OpenAIModelRegistry
 8 | from .openrouter import OpenRouterModelRegistry
 9 | from .xai import XAIModelRegistry
10 | 
11 | __all__ = [
12 |     "AzureModelRegistry",
13 |     "CustomEndpointModelRegistry",
14 |     "DialModelRegistry",
15 |     "GeminiModelRegistry",
16 |     "OpenAIModelRegistry",
17 |     "OpenRouterModelRegistry",
18 |     "XAIModelRegistry",
19 | ]
20 | 
```

--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | Utility functions for Zen MCP Server
 3 | """
 4 | 
 5 | from .file_types import CODE_EXTENSIONS, FILE_CATEGORIES, PROGRAMMING_EXTENSIONS, TEXT_EXTENSIONS
 6 | from .file_utils import expand_paths, read_file_content, read_files
 7 | from .security_config import EXCLUDED_DIRS
 8 | from .token_utils import check_token_limit, estimate_tokens
 9 | 
10 | __all__ = [
11 |     "read_files",
12 |     "read_file_content",
13 |     "expand_paths",
14 |     "CODE_EXTENSIONS",
15 |     "PROGRAMMING_EXTENSIONS",
16 |     "TEXT_EXTENSIONS",
17 |     "FILE_CATEGORIES",
18 |     "EXCLUDED_DIRS",
19 |     "estimate_tokens",
20 |     "check_token_limit",
21 | ]
22 | 
```

--------------------------------------------------------------------------------
/providers/shared/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """Shared data structures and helpers for model providers."""
 2 | 
 3 | from .model_capabilities import ModelCapabilities
 4 | from .model_response import ModelResponse
 5 | from .provider_type import ProviderType
 6 | from .temperature import (
 7 |     DiscreteTemperatureConstraint,
 8 |     FixedTemperatureConstraint,
 9 |     RangeTemperatureConstraint,
10 |     TemperatureConstraint,
11 | )
12 | 
13 | __all__ = [
14 |     "ModelCapabilities",
15 |     "ModelResponse",
16 |     "ProviderType",
17 |     "TemperatureConstraint",
18 |     "FixedTemperatureConstraint",
19 |     "RangeTemperatureConstraint",
20 |     "DiscreteTemperatureConstraint",
21 | ]
22 | 
```

--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------

```yaml
 1 | blank_issues_enabled: false
 2 | contact_links:
 3 |   - name: 💬 General Discussion
 4 |     url: https://github.com/BeehiveInnovations/zen-mcp-server/discussions
 5 |     about: Ask questions, share ideas, or discuss usage patterns with the community
 6 |   - name: 📚 Documentation
 7 |     url: https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/README.md
 8 |     about: Check the README for setup instructions and usage examples
 9 |   - name: 🤝 Contributing Guide
10 |     url: https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/CONTRIBUTING.md
11 |     about: Learn how to contribute to the project
12 | 
13 | 
```

--------------------------------------------------------------------------------
/providers/registries/xai.py:
--------------------------------------------------------------------------------

```python
 1 | """Registry loader for X.AI model capabilities."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from ..shared import ProviderType
 6 | from .base import CapabilityModelRegistry
 7 | 
 8 | 
 9 | class XAIModelRegistry(CapabilityModelRegistry):
10 |     """Capability registry backed by ``conf/xai_models.json``."""
11 | 
12 |     def __init__(self, config_path: str | None = None) -> None:
13 |         super().__init__(
14 |             env_var_name="XAI_MODELS_CONFIG_PATH",
15 |             default_filename="xai_models.json",
16 |             provider=ProviderType.XAI,
17 |             friendly_prefix="X.AI ({model})",
18 |             config_path=config_path,
19 |         )
20 | 
```

--------------------------------------------------------------------------------
/providers/registries/dial.py:
--------------------------------------------------------------------------------

```python
 1 | """Registry loader for DIAL provider capabilities."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from ..shared import ProviderType
 6 | from .base import CapabilityModelRegistry
 7 | 
 8 | 
 9 | class DialModelRegistry(CapabilityModelRegistry):
10 |     """Capability registry backed by ``conf/dial_models.json``."""
11 | 
12 |     def __init__(self, config_path: str | None = None) -> None:
13 |         super().__init__(
14 |             env_var_name="DIAL_MODELS_CONFIG_PATH",
15 |             default_filename="dial_models.json",
16 |             provider=ProviderType.DIAL,
17 |             friendly_prefix="DIAL ({model})",
18 |             config_path=config_path,
19 |         )
20 | 
```

--------------------------------------------------------------------------------
/clink/parsers/base.py:
--------------------------------------------------------------------------------

```python
 1 | """Parser interfaces for clink runner outputs."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from dataclasses import dataclass
 6 | from typing import Any
 7 | 
 8 | 
 9 | @dataclass
10 | class ParsedCLIResponse:
11 |     """Result of parsing CLI stdout/stderr."""
12 | 
13 |     content: str
14 |     metadata: dict[str, Any]
15 | 
16 | 
17 | class ParserError(RuntimeError):
18 |     """Raised when CLI output cannot be parsed into a structured response."""
19 | 
20 | 
21 | class BaseParser:
22 |     """Base interface for CLI output parsers."""
23 | 
24 |     name: str = "base"
25 | 
26 |     def parse(self, stdout: str, stderr: str) -> ParsedCLIResponse:
27 |         raise NotImplementedError("Parsers must implement parse()")
28 | 
```

--------------------------------------------------------------------------------
/providers/registries/gemini.py:
--------------------------------------------------------------------------------

```python
 1 | """Registry loader for Gemini model capabilities."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from ..shared import ProviderType
 6 | from .base import CapabilityModelRegistry
 7 | 
 8 | 
 9 | class GeminiModelRegistry(CapabilityModelRegistry):
10 |     """Capability registry backed by ``conf/gemini_models.json``."""
11 | 
12 |     def __init__(self, config_path: str | None = None) -> None:
13 |         super().__init__(
14 |             env_var_name="GEMINI_MODELS_CONFIG_PATH",
15 |             default_filename="gemini_models.json",
16 |             provider=ProviderType.GOOGLE,
17 |             friendly_prefix="Gemini ({model})",
18 |             config_path=config_path,
19 |         )
20 | 
```

--------------------------------------------------------------------------------
/providers/registries/openai.py:
--------------------------------------------------------------------------------

```python
 1 | """Registry loader for OpenAI model capabilities."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from ..shared import ProviderType
 6 | from .base import CapabilityModelRegistry
 7 | 
 8 | 
 9 | class OpenAIModelRegistry(CapabilityModelRegistry):
10 |     """Capability registry backed by ``conf/openai_models.json``."""
11 | 
12 |     def __init__(self, config_path: str | None = None) -> None:
13 |         super().__init__(
14 |             env_var_name="OPENAI_MODELS_CONFIG_PATH",
15 |             default_filename="openai_models.json",
16 |             provider=ProviderType.OPENAI,
17 |             friendly_prefix="OpenAI ({model})",
18 |             config_path=config_path,
19 |         )
20 | 
```

--------------------------------------------------------------------------------
/providers/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """Model provider abstractions for supporting multiple AI providers."""
 2 | 
 3 | from .azure_openai import AzureOpenAIProvider
 4 | from .base import ModelProvider
 5 | from .gemini import GeminiModelProvider
 6 | from .openai import OpenAIModelProvider
 7 | from .openai_compatible import OpenAICompatibleProvider
 8 | from .openrouter import OpenRouterProvider
 9 | from .registry import ModelProviderRegistry
10 | from .shared import ModelCapabilities, ModelResponse
11 | 
12 | __all__ = [
13 |     "ModelProvider",
14 |     "ModelResponse",
15 |     "ModelCapabilities",
16 |     "ModelProviderRegistry",
17 |     "AzureOpenAIProvider",
18 |     "GeminiModelProvider",
19 |     "OpenAIModelProvider",
20 |     "OpenAICompatibleProvider",
21 |     "OpenRouterProvider",
22 | ]
23 | 
```

--------------------------------------------------------------------------------
/providers/shared/model_response.py:
--------------------------------------------------------------------------------

```python
 1 | """Dataclass used to normalise provider SDK responses."""
 2 | 
 3 | from dataclasses import dataclass, field
 4 | from typing import Any
 5 | 
 6 | from .provider_type import ProviderType
 7 | 
 8 | __all__ = ["ModelResponse"]
 9 | 
10 | 
11 | @dataclass
12 | class ModelResponse:
13 |     """Portable representation of a provider completion."""
14 | 
15 |     content: str
16 |     usage: dict[str, int] = field(default_factory=dict)
17 |     model_name: str = ""
18 |     friendly_name: str = ""
19 |     provider: ProviderType = ProviderType.GOOGLE
20 |     metadata: dict[str, Any] = field(default_factory=dict)
21 | 
22 |     @property
23 |     def total_tokens(self) -> int:
24 |         """Return the total token count if the provider reported usage data."""
25 | 
26 |         return self.usage.get("total_tokens", 0)
27 | 
```

--------------------------------------------------------------------------------
/clink/agents/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """Agent factory for clink CLI integrations."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from clink.models import ResolvedCLIClient
 6 | 
 7 | from .base import AgentOutput, BaseCLIAgent, CLIAgentError
 8 | from .claude import ClaudeAgent
 9 | from .codex import CodexAgent
10 | from .gemini import GeminiAgent
11 | 
12 | _AGENTS: dict[str, type[BaseCLIAgent]] = {
13 |     "gemini": GeminiAgent,
14 |     "codex": CodexAgent,
15 |     "claude": ClaudeAgent,
16 | }
17 | 
18 | 
19 | def create_agent(client: ResolvedCLIClient) -> BaseCLIAgent:
20 |     agent_key = (client.runner or client.name).lower()
21 |     agent_cls = _AGENTS.get(agent_key, BaseCLIAgent)
22 |     return agent_cls(client)
23 | 
24 | 
25 | __all__ = [
26 |     "AgentOutput",
27 |     "BaseCLIAgent",
28 |     "CLIAgentError",
29 |     "create_agent",
30 | ]
31 | 
```

--------------------------------------------------------------------------------
/tests/test_clink_parsers.py:
--------------------------------------------------------------------------------

```python
 1 | import pytest
 2 | 
 3 | from clink.parsers.base import ParserError
 4 | from clink.parsers.codex import CodexJSONLParser
 5 | 
 6 | 
 7 | def test_codex_parser_success():
 8 |     parser = CodexJSONLParser()
 9 |     stdout = """
10 | {"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"Hello"}}
11 | {"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":5}}
12 | """
13 |     parsed = parser.parse(stdout=stdout, stderr="")
14 |     assert parsed.content == "Hello"
15 |     assert parsed.metadata["usage"]["output_tokens"] == 5
16 | 
17 | 
18 | def test_codex_parser_requires_agent_message():
19 |     parser = CodexJSONLParser()
20 |     stdout = '{"type":"turn.completed"}'
21 |     with pytest.raises(ParserError):
22 |         parser.parse(stdout=stdout, stderr="")
23 | 
```

--------------------------------------------------------------------------------
/tools/shared/exceptions.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | Custom exceptions for Zen MCP tools.
 3 | 
 4 | These exceptions allow tools to signal protocol-level errors that should be surfaced
 5 | to MCP clients using the `isError` flag on `CallToolResult`. Raising one of these
 6 | exceptions ensures the low-level server adapter marks the result as an error while
 7 | preserving the structured payload we pass through the exception message.
 8 | """
 9 | 
10 | 
11 | class ToolExecutionError(RuntimeError):
12 |     """Raised to indicate a tool-level failure that must set `isError=True`."""
13 | 
14 |     def __init__(self, payload: str):
15 |         """
16 |         Args:
17 |             payload: Serialized error payload (typically JSON) to return to the client.
18 |         """
19 |         super().__init__(payload)
20 |         self.payload = payload
21 | 
```

--------------------------------------------------------------------------------
/systemprompts/clink/default.txt:
--------------------------------------------------------------------------------

```
1 | You are an external CLI agent operating inside the Zen MCP server with full repository access.
2 | 
3 | - Use terminal tools to inspect files and gather context before responding; cite exact paths, symbols, or commands when they matter.
4 | - Provide concise, actionable responses in Markdown tailored to engineers working from the CLI.
5 | - Keep output tight—prefer summaries and short bullet lists, and avoid quoting large sections of source unless essential.
6 | - Surface assumptions, missing inputs, or follow-up checks that would improve confidence in the result.
7 | - If a request is unsafe or unsupported, explain the limitation and suggest a safer alternative.
8 | - Always conclude with `<SUMMARY>...</SUMMARY>` containing a terse (≤500 words) recap of key findings and immediate next steps.
9 | 
```

--------------------------------------------------------------------------------
/clink/parsers/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """Parser registry for clink."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from .base import BaseParser, ParsedCLIResponse, ParserError
 6 | from .claude import ClaudeJSONParser
 7 | from .codex import CodexJSONLParser
 8 | from .gemini import GeminiJSONParser
 9 | 
10 | _PARSER_CLASSES: dict[str, type[BaseParser]] = {
11 |     CodexJSONLParser.name: CodexJSONLParser,
12 |     GeminiJSONParser.name: GeminiJSONParser,
13 |     ClaudeJSONParser.name: ClaudeJSONParser,
14 | }
15 | 
16 | 
17 | def get_parser(name: str) -> BaseParser:
18 |     normalized = (name or "").lower()
19 |     if normalized not in _PARSER_CLASSES:
20 |         raise ParserError(f"No parser registered for '{name}'")
21 |     parser_cls = _PARSER_CLASSES[normalized]
22 |     return parser_cls()
23 | 
24 | 
25 | __all__ = [
26 |     "BaseParser",
27 |     "ParsedCLIResponse",
28 |     "ParserError",
29 |     "get_parser",
30 | ]
31 | 
```

--------------------------------------------------------------------------------
/tools/workflow/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | Workflow tools for Zen MCP.
 3 | 
 4 | Workflow tools follow a multi-step pattern with forced pauses between steps
 5 | to encourage thorough investigation and analysis. They inherit from WorkflowTool
 6 | which combines BaseTool with BaseWorkflowMixin.
 7 | 
 8 | Available workflow tools:
 9 | - debug: Systematic investigation and root cause analysis
10 | - planner: Sequential planning (special case - no AI calls)
11 | - analyze: Code analysis workflow
12 | - codereview: Code review workflow
13 | - precommit: Pre-commit validation workflow
14 | - refactor: Refactoring analysis workflow
15 | - thinkdeep: Deep thinking workflow
16 | """
17 | 
18 | from .base import WorkflowTool
19 | from .schema_builders import WorkflowSchemaBuilder
20 | from .workflow_mixin import BaseWorkflowMixin
21 | 
22 | __all__ = ["WorkflowTool", "WorkflowSchemaBuilder", "BaseWorkflowMixin"]
23 | 
```

--------------------------------------------------------------------------------
/systemprompts/clink/default_planner.txt:
--------------------------------------------------------------------------------

```
1 | You are the planning agent operating through the Zen MCP server.
2 | 
3 | - Respond with JSON only using the planning schema fields (status, step_number, total_steps, metadata, plan_summary, etc.); request missing context via the required `files_required_to_continue` JSON structure.
4 | - Inspect any relevant files, scripts, or docs before outlining the plan; leverage your full CLI access for research.
5 | - Break work into numbered phases with dependencies, validation gates, alternatives, and explicit next actions; highlight risks with mitigations.
6 | - Keep each step concise—avoid repeating source excerpts and limit descriptions to the essentials another engineer needs to execute.
7 | - Ensure the `plan_summary` (when planning is complete) is compact (≤500 words) and captures phases, risks, and immediate next actions.
8 | 
```

--------------------------------------------------------------------------------
/tests/test_consensus_schema.py:
--------------------------------------------------------------------------------

```python
 1 | """Schema-related tests for ConsensusTool."""
 2 | 
 3 | from types import MethodType
 4 | 
 5 | from tools.consensus import ConsensusTool
 6 | 
 7 | 
 8 | def test_consensus_models_field_includes_available_models(monkeypatch):
 9 |     """Consensus schema should surface available model guidance like single-model tools."""
10 | 
11 |     tool = ConsensusTool()
12 | 
13 |     monkeypatch.setattr(
14 |         tool,
15 |         "_get_ranked_model_summaries",
16 |         MethodType(lambda self, limit=5: (["gemini-2.5-pro (score 100, 1.0M ctx, thinking)"], 1, False), tool),
17 |     )
18 |     monkeypatch.setattr(tool, "_get_restriction_note", MethodType(lambda self: None, tool))
19 | 
20 |     schema = tool.get_input_schema()
21 |     models_field_description = schema["properties"]["models"]["description"]
22 | 
23 |     assert "listmodels" in models_field_description
24 |     assert "Top models" in models_field_description
25 | 
```

--------------------------------------------------------------------------------
/systemprompts/clink/default_codereviewer.txt:
--------------------------------------------------------------------------------

```
1 | You are an external CLI code reviewer operating inside the Zen MCP server with full repository access.
2 | 
3 | - Inspect any relevant files directly—run linters or tests as needed—and mention important commands you rely on.
4 | - Report findings in severity order (Critical, High, Medium, Low) across security, correctness, performance, and maintainability while staying within the provided scope.
5 | - Keep feedback succinct—prioritise the highest-impact issues, avoid large code dumps, and summarise recommendations clearly.
6 | - For each issue cite precise references (file:line plus a short excerpt or symbol name), describe the impact, and recommend a concrete fix or mitigation.
7 | - Recognise positive practices worth keeping so peers understand what to preserve.
8 | - Always conclude with `<SUMMARY>...</SUMMARY>` highlighting the top risks, recommended fixes, and key positives in ≤500 words.
9 | 
```

--------------------------------------------------------------------------------
/systemprompts/clink/codex_codereviewer.txt:
--------------------------------------------------------------------------------

```
1 | /review You are the Codex CLI code reviewer operating inside the Zen MCP server with full repository access.
2 | 
3 | - Inspect any relevant files directly—use your full repository access, run linters or tests as needed, and mention key commands when they inform your findings.
4 | - Report issues in severity order (Critical, High, Medium, Low) spanning security, correctness, performance, and maintainability while staying within scope.
5 | - Keep the review succinct—prioritize the highest-impact findings, avoid extensive code dumps, and summarise recommendations clearly.
6 | - For each issue cite precise references (file:line plus a short excerpt or symbol name), describe the impact, and recommend a concrete fix or mitigation.
7 | - Recognise positive practices worth keeping so peers understand what to preserve.
8 | - Always conclude with `<SUMMARY>...</SUMMARY>` capturing the top issues, fixes, and positives in ≤500 words.
9 | 
```

--------------------------------------------------------------------------------
/scripts/sync_version.py:
--------------------------------------------------------------------------------

```python
 1 | #!/usr/bin/env python3
 2 | """
 3 | Sync version from pyproject.toml to config.py
 4 | This script is called by GitHub Actions after semantic-release updates the version
 5 | """
 6 | 
 7 | import re
 8 | from datetime import datetime
 9 | 
10 | import toml
11 | 
12 | 
13 | def update_config_version():
14 |     # Read version from pyproject.toml
15 |     with open("pyproject.toml") as f:
16 |         data = toml.load(f)
17 |         version = data["project"]["version"]
18 | 
19 |     # Read current config.py
20 |     with open("config.py") as f:
21 |         content = f.read()
22 | 
23 |     # Update version
24 |     content = re.sub(r'__version__ = "[^"]*"', f'__version__ = "{version}"', content)
25 | 
26 |     # Update date to current date
27 |     today = datetime.now().strftime("%Y-%m-%d")
28 |     content = re.sub(r'__updated__ = "[^"]*"', f'__updated__ = "{today}"', content)
29 | 
30 |     # Write back
31 |     with open("config.py", "w") as f:
32 |         f.write(content)
33 | 
34 |     print(f"Updated config.py to version {version}")
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     update_config_version()
39 | 
```

--------------------------------------------------------------------------------
/providers/registries/custom.py:
--------------------------------------------------------------------------------

```python
 1 | """Registry loader for custom OpenAI-compatible endpoints."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from ..shared import ModelCapabilities, ProviderType
 6 | from .base import CAPABILITY_FIELD_NAMES, CapabilityModelRegistry
 7 | 
 8 | 
 9 | class CustomEndpointModelRegistry(CapabilityModelRegistry):
10 |     """Capability registry backed by ``conf/custom_models.json``."""
11 | 
12 |     def __init__(self, config_path: str | None = None) -> None:
13 |         super().__init__(
14 |             env_var_name="CUSTOM_MODELS_CONFIG_PATH",
15 |             default_filename="custom_models.json",
16 |             provider=ProviderType.CUSTOM,
17 |             friendly_prefix="Custom ({model})",
18 |             config_path=config_path,
19 |         )
20 | 
21 |     def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]:
22 |         filtered = {k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES}
23 |         filtered.setdefault("provider", ProviderType.CUSTOM)
24 |         capability = ModelCapabilities(**filtered)
25 |         return capability, {}
26 | 
```

--------------------------------------------------------------------------------
/systemprompts/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | System prompts for Gemini tools
 3 | """
 4 | 
 5 | from .analyze_prompt import ANALYZE_PROMPT
 6 | from .chat_prompt import CHAT_PROMPT
 7 | from .codereview_prompt import CODEREVIEW_PROMPT
 8 | from .consensus_prompt import CONSENSUS_PROMPT
 9 | from .debug_prompt import DEBUG_ISSUE_PROMPT
10 | from .docgen_prompt import DOCGEN_PROMPT
11 | from .generate_code_prompt import GENERATE_CODE_PROMPT
12 | from .planner_prompt import PLANNER_PROMPT
13 | from .precommit_prompt import PRECOMMIT_PROMPT
14 | from .refactor_prompt import REFACTOR_PROMPT
15 | from .secaudit_prompt import SECAUDIT_PROMPT
16 | from .testgen_prompt import TESTGEN_PROMPT
17 | from .thinkdeep_prompt import THINKDEEP_PROMPT
18 | from .tracer_prompt import TRACER_PROMPT
19 | 
20 | __all__ = [
21 |     "THINKDEEP_PROMPT",
22 |     "CODEREVIEW_PROMPT",
23 |     "DEBUG_ISSUE_PROMPT",
24 |     "DOCGEN_PROMPT",
25 |     "GENERATE_CODE_PROMPT",
26 |     "ANALYZE_PROMPT",
27 |     "CHAT_PROMPT",
28 |     "CONSENSUS_PROMPT",
29 |     "PLANNER_PROMPT",
30 |     "PRECOMMIT_PROMPT",
31 |     "REFACTOR_PROMPT",
32 |     "SECAUDIT_PROMPT",
33 |     "TESTGEN_PROMPT",
34 |     "TRACER_PROMPT",
35 | ]
36 | 
```

--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | Tool implementations for Zen MCP Server
 3 | """
 4 | 
 5 | from .analyze import AnalyzeTool
 6 | from .apilookup import LookupTool
 7 | from .challenge import ChallengeTool
 8 | from .chat import ChatTool
 9 | from .clink import CLinkTool
10 | from .codereview import CodeReviewTool
11 | from .consensus import ConsensusTool
12 | from .debug import DebugIssueTool
13 | from .docgen import DocgenTool
14 | from .listmodels import ListModelsTool
15 | from .planner import PlannerTool
16 | from .precommit import PrecommitTool
17 | from .refactor import RefactorTool
18 | from .secaudit import SecauditTool
19 | from .testgen import TestGenTool
20 | from .thinkdeep import ThinkDeepTool
21 | from .tracer import TracerTool
22 | from .version import VersionTool
23 | 
24 | __all__ = [
25 |     "ThinkDeepTool",
26 |     "CodeReviewTool",
27 |     "DebugIssueTool",
28 |     "DocgenTool",
29 |     "AnalyzeTool",
30 |     "LookupTool",
31 |     "ChatTool",
32 |     "CLinkTool",
33 |     "ConsensusTool",
34 |     "ListModelsTool",
35 |     "PlannerTool",
36 |     "PrecommitTool",
37 |     "ChallengeTool",
38 |     "RefactorTool",
39 |     "SecauditTool",
40 |     "TestGenTool",
41 |     "TracerTool",
42 |     "VersionTool",
43 | ]
44 | 
```

--------------------------------------------------------------------------------
/docs/gemini-setup.md:
--------------------------------------------------------------------------------

```markdown
 1 | # Gemini CLI Setup
 2 | 
 3 | > **Note**: While Zen MCP Server connects successfully to Gemini CLI, tool invocation is not working
 4 | > correctly yet. We'll update this guide once the integration is fully functional.
 5 | 
 6 | This guide explains how to configure Zen MCP Server to work with [Gemini CLI](https://github.com/google-gemini/gemini-cli).
 7 | 
 8 | ## Prerequisites
 9 | 
10 | - Zen MCP Server installed and configured
11 | - Gemini CLI installed
12 | - At least one API key configured in your `.env` file
13 | 
14 | ## Configuration
15 | 
16 | 1. Edit `~/.gemini/settings.json` and add:
17 | 
18 | ```json
19 | {
20 |   "mcpServers": {
21 |     "zen": {
22 |       "command": "/path/to/zen-mcp-server/zen-mcp-server"
23 |     }
24 |   }
25 | }
26 | ```
27 | 
28 | 2. Replace `/path/to/zen-mcp-server` with your actual Zen installation path.
29 | 
30 | 3. If the `zen-mcp-server` wrapper script doesn't exist, create it:
31 | 
32 | ```bash
33 | #!/bin/bash
34 | DIR="$(cd "$(dirname "$0")" && pwd)"
35 | cd "$DIR"
36 | exec .zen_venv/bin/python server.py "$@"
37 | ```
38 | 
39 | Then make it executable: `chmod +x zen-mcp-server`
40 | 
41 | 4. Restart Gemini CLI.
42 | 
43 | All 15 Zen tools are now available in your Gemini CLI session.
```

--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | Tests for configuration
 3 | """
 4 | 
 5 | from config import (
 6 |     DEFAULT_MODEL,
 7 |     TEMPERATURE_ANALYTICAL,
 8 |     TEMPERATURE_BALANCED,
 9 |     TEMPERATURE_CREATIVE,
10 |     __author__,
11 |     __updated__,
12 |     __version__,
13 | )
14 | 
15 | 
16 | class TestConfig:
17 |     """Test configuration values"""
18 | 
19 |     def test_version_info(self):
20 |         """Test version information exists and has correct format"""
21 |         # Check version format (e.g., "2.4.1")
22 |         assert isinstance(__version__, str)
23 |         assert len(__version__.split(".")) == 3  # Major.Minor.Patch
24 | 
25 |         # Check author
26 |         assert __author__ == "Fahad Gilani"
27 | 
28 |         # Check updated date exists (don't assert on specific format/value)
29 |         assert isinstance(__updated__, str)
30 | 
31 |     def test_model_config(self):
32 |         """Test model configuration"""
33 |         # DEFAULT_MODEL is set in conftest.py for tests
34 |         assert DEFAULT_MODEL == "gemini-2.5-flash"
35 | 
36 |     def test_temperature_defaults(self):
37 |         """Test temperature constants"""
38 |         assert TEMPERATURE_ANALYTICAL == 0.2
39 |         assert TEMPERATURE_BALANCED == 0.5
40 |         assert TEMPERATURE_CREATIVE == 0.7
41 | 
```

--------------------------------------------------------------------------------
/clink/agents/codex.py:
--------------------------------------------------------------------------------

```python
 1 | """Codex-specific CLI agent hooks."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from clink.models import ResolvedCLIClient
 6 | from clink.parsers.base import ParserError
 7 | 
 8 | from .base import AgentOutput, BaseCLIAgent
 9 | 
10 | 
11 | class CodexAgent(BaseCLIAgent):
12 |     """Codex CLI agent with JSONL recovery support."""
13 | 
14 |     def __init__(self, client: ResolvedCLIClient):
15 |         super().__init__(client)
16 | 
17 |     def _recover_from_error(
18 |         self,
19 |         *,
20 |         returncode: int,
21 |         stdout: str,
22 |         stderr: str,
23 |         sanitized_command: list[str],
24 |         duration_seconds: float,
25 |         output_file_content: str | None,
26 |     ) -> AgentOutput | None:
27 |         try:
28 |             parsed = self._parser.parse(stdout, stderr)
29 |         except ParserError:
30 |             return None
31 | 
32 |         return AgentOutput(
33 |             parsed=parsed,
34 |             sanitized_command=sanitized_command,
35 |             returncode=returncode,
36 |             stdout=stdout,
37 |             stderr=stderr,
38 |             duration_seconds=duration_seconds,
39 |             parser_name=self._parser.name,
40 |             output_file_content=output_file_content,
41 |         )
42 | 
```

--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------

```markdown
 1 | # Zen MCP Server Documentation
 2 | 
 3 | | Document | Description |
 4 | |----------|-------------|
 5 | | [Getting Started](getting-started.md) | Installation paths, prerequisite setup, and first-run guidance. |
 6 | | [Adding Providers](adding_providers.md) | How to register new AI providers and advertise capabilities. |
 7 | | [Azure OpenAI](azure_openai.md) | Configure Azure deployments, capability overrides, and env mappings. |
 8 | | [Model Ranking](model_ranking.md) | How intelligence scores translate into auto-mode ordering. |
 9 | | [Custom Models](custom_models.md) | Configure OpenRouter/custom models and aliases. |
10 | | [Adding Tools](adding_tools.md) | Create new tools using the shared base classes. |
11 | | [Advanced Usage](advanced-usage.md) | Auto-mode tricks, workflow tools, and collaboration tips. |
12 | | [Configuration](configuration.md) | .env options, restriction policies, logging levels. |
13 | | [Testing](testing.md) | Test strategy, command cheats, and coverage notes. |
14 | | [Troubleshooting](troubleshooting.md) | Common issues and resolutions. |
15 | 
16 | Additional docs live in this directory; start with the table above to orient yourself.
17 | 
```

--------------------------------------------------------------------------------
/docker/scripts/build.sh:
--------------------------------------------------------------------------------

```bash
 1 | #!/bin/bash
 2 | set -euo pipefail
 3 | 
 4 | # Colors for output
 5 | GREEN='\033[0;32m'
 6 | YELLOW='\033[1;33m'
 7 | RED='\033[0;31m'
 8 | NC='\033[0m'
 9 | 
10 | echo -e "${GREEN}=== Building Zen MCP Server Docker Image ===${NC}"
11 | 
12 | # Check if .env file exists
13 | if [[ ! -f .env ]]; then
14 |     echo -e "${YELLOW}Warning: .env file not found. Copying from .env.example${NC}"
15 |     if [[ -f .env.example ]]; then
16 |         cp .env.example .env
17 |         echo -e "${YELLOW}Please edit .env file with your API keys before running the server${NC}"
18 |     else
19 |         echo -e "${RED}Error: .env.example not found${NC}"
20 |         exit 1
21 |     fi
22 | fi
23 | 
24 | # Build the Docker image
25 | echo -e "${GREEN}Building Docker image...${NC}"
26 | docker-compose build --no-cache
27 | 
28 | # Verify the build
29 | if docker images | grep -q "zen-mcp-server"; then
30 |     echo -e "${GREEN}✓ Docker image built successfully${NC}"
31 |     echo -e "${GREEN}Image details:${NC}"
32 |     docker images | grep zen-mcp-server
33 | else
34 |     echo -e "${RED}✗ Failed to build Docker image${NC}"
35 |     exit 1
36 | fi
37 | 
38 | echo -e "${GREEN}=== Build Complete ===${NC}"
39 | echo -e "${YELLOW}Next steps:${NC}"
40 | echo -e "  1. Edit .env file with your API keys"
41 | echo -e "  2. Run: ${GREEN}docker-compose up -d${NC}"
42 | 
```

--------------------------------------------------------------------------------
/tests/test_conversation_continuation_integration.py:
--------------------------------------------------------------------------------

```python
 1 | """Integration test for conversation continuation persistence."""
 2 | 
 3 | from tools.chat import ChatRequest, ChatTool
 4 | from utils.conversation_memory import get_thread
 5 | from utils.storage_backend import get_storage_backend
 6 | 
 7 | 
 8 | def test_first_response_persisted_in_conversation_history(tmp_path):
 9 |     """Ensure the assistant's initial reply is stored for newly created threads."""
10 | 
11 |     # Clear in-memory storage to avoid cross-test contamination
12 |     storage = get_storage_backend()
13 |     storage._store.clear()  # type: ignore[attr-defined]
14 | 
15 |     tool = ChatTool()
16 |     request = ChatRequest(
17 |         prompt="First question?",
18 |         model="local-llama",
19 |         working_directory_absolute_path=str(tmp_path),
20 |     )
21 |     response_text = "Here is the initial answer."
22 | 
23 |     # Mimic the first tool invocation (no continuation_id supplied)
24 |     continuation_data = tool._create_continuation_offer(request, model_info={"model_name": "local-llama"})
25 |     tool._create_continuation_offer_response(
26 |         response_text,
27 |         continuation_data,
28 |         request,
29 |         {"model_name": "local-llama", "provider": "custom"},
30 |     )
31 | 
32 |     thread_id = continuation_data["continuation_id"]
33 |     thread = get_thread(thread_id)
34 | 
35 |     assert thread is not None
36 |     assert [turn.role for turn in thread.turns] == ["user", "assistant"]
37 |     assert thread.turns[-1].content == response_text
38 | 
39 |     # Cleanup storage for subsequent tests
40 |     storage._store.clear()  # type: ignore[attr-defined]
41 | 
```

--------------------------------------------------------------------------------
/tests/mock_helpers.py:
--------------------------------------------------------------------------------

```python
 1 | """Helper functions for test mocking."""
 2 | 
 3 | from unittest.mock import Mock
 4 | 
 5 | from providers.shared import ModelCapabilities, ProviderType, RangeTemperatureConstraint
 6 | 
 7 | 
 8 | def create_mock_provider(model_name="gemini-2.5-flash", context_window=1_048_576):
 9 |     """Create a properly configured mock provider."""
10 |     mock_provider = Mock()
11 | 
12 |     # Set up capabilities
13 |     mock_capabilities = ModelCapabilities(
14 |         provider=ProviderType.GOOGLE,
15 |         model_name=model_name,
16 |         friendly_name="Gemini",
17 |         context_window=context_window,
18 |         max_output_tokens=8192,
19 |         supports_extended_thinking=False,
20 |         supports_system_prompts=True,
21 |         supports_streaming=True,
22 |         supports_function_calling=True,
23 |         temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 0.7),
24 |     )
25 | 
26 |     mock_provider.get_capabilities.return_value = mock_capabilities
27 |     mock_provider.get_provider_type.return_value = ProviderType.GOOGLE
28 |     mock_provider.validate_model_name.return_value = True
29 | 
30 |     # Set up generate_content response
31 |     mock_response = Mock()
32 |     mock_response.content = "Test response"
33 |     mock_response.usage = {"input_tokens": 10, "output_tokens": 20}
34 |     mock_response.model_name = model_name
35 |     mock_response.friendly_name = "Gemini"
36 |     mock_response.provider = ProviderType.GOOGLE
37 |     mock_response.metadata = {"finish_reason": "STOP"}
38 | 
39 |     mock_provider.generate_content.return_value = mock_response
40 | 
41 |     return mock_provider
42 | 
```

--------------------------------------------------------------------------------
/.github/workflows/semantic-pr.yml:
--------------------------------------------------------------------------------

```yaml
 1 | ---
 2 | name: Semantic PR
 3 | 
 4 | on:
 5 |   pull_request:
 6 |     types: [opened, edited, synchronize]
 7 | 
 8 | concurrency:
 9 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
10 |   cancel-in-progress: true
11 | 
12 | permissions:
13 |   contents: read
14 |   pull-requests: write
15 | 
16 | jobs:
17 |   semantic-pr:
18 |     name: Validate PR
19 |     runs-on: ubuntu-latest
20 |     timeout-minutes: 5
21 |     steps:
22 |       - name: Check PR Title
23 |         id: lint-pr-title
24 |         uses: amannn/action-semantic-pull-request@0723387faaf9b38adef4775cd42cfd5155ed6017 # v5.5.3
25 |         env:
26 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
27 | 
28 |       - name: Add PR error comment
29 |         uses: marocchino/sticky-pull-request-comment@d2ad0de260ae8b0235ce059e63f2949ba9e05943 # v2.9.3
30 |         if: always() && (steps.lint-pr-title.outputs.error_message != null)
31 |         with:
32 |           header: pr-title-lint-error
33 |           message: |
34 |             We require pull request titles to follow the [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0/) and it looks like your proposed title needs to be adjusted.
35 | 
36 |             Details:
37 | 
38 |             ```
39 |             ${{ steps.lint-pr-title.outputs.error_message }}
40 |             ```
41 | 
42 |       - name: Delete PR error comment
43 |         uses: marocchino/sticky-pull-request-comment@d2ad0de260ae8b0235ce059e63f2949ba9e05943 # v2.9.3
44 |         if: ${{ steps.lint-pr-title.outputs.error_message == null }}
45 |         with:
46 |           header: pr-title-lint-error
47 |           delete: true
```

--------------------------------------------------------------------------------
/clink/agents/claude.py:
--------------------------------------------------------------------------------

```python
 1 | """Claude-specific CLI agent hooks."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from clink.models import ResolvedCLIRole
 6 | from clink.parsers.base import ParserError
 7 | 
 8 | from .base import AgentOutput, BaseCLIAgent
 9 | 
10 | 
11 | class ClaudeAgent(BaseCLIAgent):
12 |     """Claude CLI agent with system-prompt injection support."""
13 | 
14 |     def _build_command(self, *, role: ResolvedCLIRole, system_prompt: str | None) -> list[str]:
15 |         command = list(self.client.executable)
16 |         command.extend(self.client.internal_args)
17 |         command.extend(self.client.config_args)
18 | 
19 |         if system_prompt and "--append-system-prompt" not in self.client.config_args:
20 |             command.extend(["--append-system-prompt", system_prompt])
21 | 
22 |         command.extend(role.role_args)
23 |         return command
24 | 
25 |     def _recover_from_error(
26 |         self,
27 |         *,
28 |         returncode: int,
29 |         stdout: str,
30 |         stderr: str,
31 |         sanitized_command: list[str],
32 |         duration_seconds: float,
33 |         output_file_content: str | None,
34 |     ) -> AgentOutput | None:
35 |         try:
36 |             parsed = self._parser.parse(stdout, stderr)
37 |         except ParserError:
38 |             return None
39 | 
40 |         return AgentOutput(
41 |             parsed=parsed,
42 |             sanitized_command=sanitized_command,
43 |             returncode=returncode,
44 |             stdout=stdout,
45 |             stderr=stderr,
46 |             duration_seconds=duration_seconds,
47 |             parser_name=self._parser.name,
48 |             output_file_content=output_file_content,
49 |         )
50 | 
```

--------------------------------------------------------------------------------
/tests/test_clink_gemini_parser.py:
--------------------------------------------------------------------------------

```python
 1 | """Tests for the Gemini CLI JSON parser."""
 2 | 
 3 | import pytest
 4 | 
 5 | from clink.parsers.gemini import GeminiJSONParser, ParserError
 6 | 
 7 | 
 8 | def _build_rate_limit_stdout() -> str:
 9 |     return (
10 |         "{\n"
11 |         '  "response": "",\n'
12 |         '  "stats": {\n'
13 |         '    "models": {\n'
14 |         '      "gemini-2.5-pro": {\n'
15 |         '        "api": {\n'
16 |         '          "totalRequests": 5,\n'
17 |         '          "totalErrors": 5,\n'
18 |         '          "totalLatencyMs": 13319\n'
19 |         "        },\n"
20 |         '        "tokens": {"prompt": 0, "candidates": 0, "total": 0, "cached": 0, "thoughts": 0, "tool": 0}\n'
21 |         "      }\n"
22 |         "    },\n"
23 |         '    "tools": {"totalCalls": 0},\n'
24 |         '    "files": {"totalLinesAdded": 0, "totalLinesRemoved": 0}\n'
25 |         "  }\n"
26 |         "}"
27 |     )
28 | 
29 | 
30 | def test_gemini_parser_handles_rate_limit_empty_response():
31 |     parser = GeminiJSONParser()
32 |     stdout = _build_rate_limit_stdout()
33 |     stderr = "Attempt 1 failed with status 429. Retrying with backoff... ApiError: quota exceeded"
34 | 
35 |     parsed = parser.parse(stdout, stderr)
36 | 
37 |     assert "429" in parsed.content
38 |     assert parsed.metadata.get("rate_limit_status") == 429
39 |     assert parsed.metadata.get("empty_response") is True
40 |     assert "Attempt 1 failed" in parsed.metadata.get("stderr", "")
41 | 
42 | 
43 | def test_gemini_parser_still_errors_when_no_fallback_available():
44 |     parser = GeminiJSONParser()
45 |     stdout = '{"response": "", "stats": {}}'
46 | 
47 |     with pytest.raises(ParserError):
48 |         parser.parse(stdout, stderr="")
49 | 
```

--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------

```yaml
 1 | name: Tests
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [main]
 6 | 
 7 | jobs:
 8 |   test:
 9 |     runs-on: ubuntu-latest
10 |     strategy:
11 |       matrix:
12 |         python-version: ["3.10", "3.11", "3.12"]
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v4
16 | 
17 |       - name: Set up Python ${{ matrix.python-version }}
18 |         uses: actions/setup-python@v4
19 |         with:
20 |           python-version: ${{ matrix.python-version }}
21 | 
22 |       - name: Install dependencies
23 |         run: |
24 |           python -m pip install --upgrade pip
25 |           pip install -r requirements.txt
26 |           pip install -r requirements-dev.txt
27 | 
28 |       - name: Run unit tests
29 |         run: |
30 |           # Run only unit tests (exclude simulation tests and integration tests)
31 |           # Integration tests require local-llama which isn't available in CI
32 |           python -m pytest tests/ -v --ignore=simulator_tests/ -m "not integration"
33 |         env:
34 |           # Ensure no API key is accidentally used in CI
35 |           GEMINI_API_KEY: ""
36 |           OPENAI_API_KEY: ""
37 | 
38 |   lint:
39 |     runs-on: ubuntu-latest
40 |     steps:
41 |       - uses: actions/checkout@v4
42 | 
43 |       - name: Set up Python
44 |         uses: actions/setup-python@v4
45 |         with:
46 |           python-version: "3.11"
47 | 
48 |       - name: Install dependencies
49 |         run: |
50 |           python -m pip install --upgrade pip
51 |           pip install -r requirements-dev.txt
52 | 
53 |       - name: Run black formatter check
54 |         run: black --check . --exclude="test_simulation_files/"
55 | 
56 |       - name: Run ruff linter
57 |         run: ruff check . --exclude test_simulation_files
58 | 
```

--------------------------------------------------------------------------------
/clink/constants.py:
--------------------------------------------------------------------------------

```python
 1 | """Internal defaults and constants for clink."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from dataclasses import dataclass, field
 6 | from pathlib import Path
 7 | 
 8 | DEFAULT_TIMEOUT_SECONDS = 1800
 9 | DEFAULT_STREAM_LIMIT = 10 * 1024 * 1024  # 10MB per stream
10 | 
11 | PROJECT_ROOT = Path(__file__).resolve().parent.parent
12 | BUILTIN_PROMPTS_DIR = PROJECT_ROOT / "systemprompts" / "clink"
13 | CONFIG_DIR = PROJECT_ROOT / "conf" / "cli_clients"
14 | USER_CONFIG_DIR = Path.home() / ".zen" / "cli_clients"
15 | 
16 | 
17 | @dataclass(frozen=True)
18 | class CLIInternalDefaults:
19 |     """Internal defaults applied to a CLI client during registry load."""
20 | 
21 |     parser: str
22 |     additional_args: list[str] = field(default_factory=list)
23 |     env: dict[str, str] = field(default_factory=dict)
24 |     default_role_prompt: str | None = None
25 |     timeout_seconds: int = DEFAULT_TIMEOUT_SECONDS
26 |     runner: str | None = None
27 | 
28 | 
29 | INTERNAL_DEFAULTS: dict[str, CLIInternalDefaults] = {
30 |     "gemini": CLIInternalDefaults(
31 |         parser="gemini_json",
32 |         additional_args=["-o", "json"],
33 |         default_role_prompt="systemprompts/clink/default.txt",
34 |         runner="gemini",
35 |     ),
36 |     "codex": CLIInternalDefaults(
37 |         parser="codex_jsonl",
38 |         additional_args=["exec"],
39 |         default_role_prompt="systemprompts/clink/default.txt",
40 |         runner="codex",
41 |     ),
42 |     "claude": CLIInternalDefaults(
43 |         parser="claude_json",
44 |         additional_args=["--print", "--output-format", "json"],
45 |         default_role_prompt="systemprompts/clink/default.txt",
46 |         runner="claude",
47 |     ),
48 | }
49 | 
```

--------------------------------------------------------------------------------
/docs/tools/challenge.md:
--------------------------------------------------------------------------------

```markdown
 1 | # challenge - Challenge an approach or validate ideas with confidence
 2 | 
 3 | The `challenge` tool encourages thoughtful critical thinking instead of automatic agreement with the dreaded **You're absolutely right!** responses - especially 
 4 | when you're not. This tool wraps your comment with instructions that prompt critical thinking and honest analysis instead of blind agreement.
 5 | 
 6 | ## Quick Example
 7 | 
 8 | ```
 9 | challenge but do we even need all this extra caching because it'll just slow the app down?
10 | ```
11 | 
12 | ```
13 | challenge I don't think this approach solves my original complaint
14 | ```
15 | 
16 | Normally, your favorite coding agent will enthusiastically reply with **“You’re absolutely right!”**—then proceed to 
17 | reverse the _correct_ strategy entirely, without stopping to consider that you might actually be wrong, missing the 
18 | bigger picture or ignoring architectural constraints.
19 | 
20 | `challenge` fixes this. Claude can even _detect_ when you're challenging something and automatically invokes this tool
21 | to ensure thoughtful analysis instead of reflexive agreement.
22 | 
23 | **Without Zen:**
24 | ![without_zen@2x](https://github.com/user-attachments/assets/64f3c9fb-7ca9-4876-b687-25e847edfd87)
25 | 
26 | **With Zen:**
27 | ![with_zen@2x](https://github.com/user-attachments/assets/9d72f444-ba53-4ab1-83e5-250062c6ee70)
28 | 
29 | ## Why Use Challenge?
30 | 
31 | AI assistants sometimes tend to agree too readily. The challenge tool helps you:
32 | - Get genuine critical evaluation of your ideas
33 | - Challenge assumptions constructively
34 | - Receive honest feedback on proposals
35 | - Validate approaches with thoughtful analysis
```

--------------------------------------------------------------------------------
/providers/registries/openrouter.py:
--------------------------------------------------------------------------------

```python
 1 | """OpenRouter model registry for managing model configurations and aliases."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from ..shared import ModelCapabilities, ProviderType
 6 | from .base import CAPABILITY_FIELD_NAMES, CapabilityModelRegistry
 7 | 
 8 | 
 9 | class OpenRouterModelRegistry(CapabilityModelRegistry):
10 |     """Capability registry backed by ``conf/openrouter_models.json``."""
11 | 
12 |     def __init__(self, config_path: str | None = None) -> None:
13 |         super().__init__(
14 |             env_var_name="OPENROUTER_MODELS_CONFIG_PATH",
15 |             default_filename="openrouter_models.json",
16 |             provider=ProviderType.OPENROUTER,
17 |             friendly_prefix="OpenRouter ({model})",
18 |             config_path=config_path,
19 |         )
20 | 
21 |     def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]:
22 |         provider_override = entry.get("provider")
23 |         if isinstance(provider_override, str):
24 |             entry_provider = ProviderType(provider_override.lower())
25 |         elif isinstance(provider_override, ProviderType):
26 |             entry_provider = provider_override
27 |         else:
28 |             entry_provider = ProviderType.OPENROUTER
29 | 
30 |         if entry_provider == ProviderType.CUSTOM:
31 |             entry.setdefault("friendly_name", f"Custom ({entry['model_name']})")
32 |         else:
33 |             entry.setdefault("friendly_name", f"OpenRouter ({entry['model_name']})")
34 | 
35 |         filtered = {k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES}
36 |         filtered.setdefault("provider", entry_provider)
37 |         capability = ModelCapabilities(**filtered)
38 |         return capability, {}
39 | 
```

--------------------------------------------------------------------------------
/docs/logging.md:
--------------------------------------------------------------------------------

```markdown
 1 | # Logging
 2 | 
 3 | ## Quick Start - Follow Logs
 4 | 
 5 | The easiest way to monitor logs is to use the `-f` flag when starting the server:
 6 | 
 7 | ```bash
 8 | # Start server and automatically follow MCP logs
 9 | ./run-server.sh -f
10 | ```
11 | 
12 | This will start the server and immediately begin tailing the MCP server logs.
13 | 
14 | ## Log Files
15 | 
16 | Logs are stored in the `logs/` directory within your project folder:
17 | 
18 | - **`mcp_server.log`** - Main server operations, API calls, and errors
19 | - **`mcp_activity.log`** - Tool calls and conversation tracking
20 | 
21 | Log files rotate automatically when they reach 20MB, keeping up to 10 rotated files.
22 | 
23 | ## Viewing Logs
24 | 
25 | To monitor MCP server activity:
26 | 
27 | ```bash
28 | # Follow logs in real-time
29 | tail -f logs/mcp_server.log
30 | 
31 | # View last 100 lines
32 | tail -n 100 logs/mcp_server.log
33 | 
34 | # View activity logs (tool calls only)
35 | tail -f logs/mcp_activity.log
36 | 
37 | # Search for specific patterns
38 | grep "ERROR" logs/mcp_server.log
39 | grep "tool_name" logs/mcp_activity.log
40 | ```
41 | 
42 | ## Log Level
43 | 
44 | Set verbosity with `LOG_LEVEL` in your `.env` file:
45 | 
46 | ```env
47 | # Options: DEBUG, INFO, WARNING, ERROR
48 | LOG_LEVEL=INFO
49 | ```
50 | 
51 | - **DEBUG**: Detailed information for debugging
52 | - **INFO**: General operational messages (default)
53 | - **WARNING**: Warning messages
54 | - **ERROR**: Only error messages
55 | 
56 | ## Log Format
57 | 
58 | Logs use a standardized format with timestamps:
59 | 
60 | ```
61 | 2024-06-14 10:30:45,123 - module.name - INFO - Message here
62 | ```
63 | 
64 | ## Tips
65 | 
66 | - Use `./run-server.sh -f` for the easiest log monitoring experience
67 | - Activity logs show only tool-related events for cleaner output
68 | - Main server logs include all operational details
69 | - Logs persist across server restarts
```

--------------------------------------------------------------------------------
/providers/registries/azure.py:
--------------------------------------------------------------------------------

```python
 1 | """Registry loader for Azure OpenAI model configurations."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import logging
 6 | 
 7 | from ..shared import ModelCapabilities, ProviderType, TemperatureConstraint
 8 | from .base import CAPABILITY_FIELD_NAMES, CustomModelRegistryBase
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | class AzureModelRegistry(CustomModelRegistryBase):
14 |     """Load Azure-specific model metadata from configuration files."""
15 | 
16 |     def __init__(self, config_path: str | None = None) -> None:
17 |         super().__init__(
18 |             env_var_name="AZURE_MODELS_CONFIG_PATH",
19 |             default_filename="azure_models.json",
20 |             config_path=config_path,
21 |         )
22 |         self.reload()
23 | 
24 |     def _extra_keys(self) -> set[str]:
25 |         return {"deployment", "deployment_name"}
26 | 
27 |     def _provider_default(self) -> ProviderType:
28 |         return ProviderType.AZURE
29 | 
30 |     def _default_friendly_name(self, model_name: str) -> str:
31 |         return f"Azure OpenAI ({model_name})"
32 | 
33 |     def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]:
34 |         deployment = entry.pop("deployment", None) or entry.pop("deployment_name", None)
35 |         if not deployment:
36 |             raise ValueError(f"Azure model '{entry.get('model_name')}' is missing required 'deployment' field")
37 | 
38 |         temp_hint = entry.get("temperature_constraint")
39 |         if isinstance(temp_hint, str):
40 |             entry["temperature_constraint"] = TemperatureConstraint.create(temp_hint)
41 | 
42 |         filtered = {k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES}
43 |         filtered.setdefault("provider", ProviderType.AZURE)
44 |         capability = ModelCapabilities(**filtered)
45 |         return capability, {"deployment": deployment}
46 | 
```

--------------------------------------------------------------------------------
/tests/test_line_numbers_integration.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | Integration test demonstrating that all tools get line numbers by default.
 3 | """
 4 | 
 5 | from tools.analyze import AnalyzeTool
 6 | from tools.chat import ChatTool
 7 | from tools.codereview import CodeReviewTool
 8 | from tools.debug import DebugIssueTool
 9 | from tools.precommit import PrecommitTool
10 | from tools.refactor import RefactorTool
11 | from tools.testgen import TestGenTool
12 | 
13 | 
14 | class TestLineNumbersIntegration:
15 |     """Test that all tools inherit line number behavior correctly."""
16 | 
17 |     def test_all_tools_want_line_numbers(self):
18 |         """Verify that all tools want line numbers by default."""
19 |         tools = [
20 |             ChatTool(),
21 |             AnalyzeTool(),
22 |             CodeReviewTool(),
23 |             DebugIssueTool(),
24 |             RefactorTool(),
25 |             TestGenTool(),
26 |             PrecommitTool(),
27 |         ]
28 | 
29 |         for tool in tools:
30 |             assert tool.wants_line_numbers_by_default(), f"{tool.get_name()} should want line numbers by default"
31 | 
32 |     def test_no_tools_override_line_numbers(self):
33 |         """Verify that no tools override the base class line number behavior."""
34 |         # Check that tools don't have their own wants_line_numbers_by_default method
35 |         tools_classes = [
36 |             ChatTool,
37 |             AnalyzeTool,
38 |             CodeReviewTool,
39 |             DebugIssueTool,
40 |             RefactorTool,
41 |             TestGenTool,
42 |             PrecommitTool,
43 |         ]
44 | 
45 |         for tool_class in tools_classes:
46 |             # Check if the method is defined in the tool class itself
47 |             # (not inherited from base)
48 |             has_override = "wants_line_numbers_by_default" in tool_class.__dict__
49 |             assert not has_override, f"{tool_class.__name__} should not override wants_line_numbers_by_default"
50 | 
```

--------------------------------------------------------------------------------
/tests/transport_helpers.py:
--------------------------------------------------------------------------------

```python
 1 | """Helper functions for HTTP transport injection in tests."""
 2 | 
 3 | from tests.http_transport_recorder import TransportFactory
 4 | 
 5 | 
 6 | def inject_transport(monkeypatch, cassette_path: str):
 7 |     """Inject HTTP transport into OpenAICompatibleProvider for testing.
 8 | 
 9 |     This helper simplifies the monkey patching pattern used across tests
10 |     to inject custom HTTP transports for recording/replaying API calls.
11 | 
12 |     Also ensures OpenAI provider is properly registered for tests that need it.
13 | 
14 |     Args:
15 |         monkeypatch: pytest monkeypatch fixture
16 |         cassette_path: Path to cassette file for recording/replay
17 | 
18 |     Returns:
19 |         The created transport instance
20 | 
21 |     Example:
22 |         transport = inject_transport(monkeypatch, "path/to/cassette.json")
23 |     """
24 |     # Ensure OpenAI provider is registered - always needed for transport injection
25 |     from providers.openai import OpenAIModelProvider
26 |     from providers.registry import ModelProviderRegistry
27 |     from providers.shared import ProviderType
28 | 
29 |     # Always register OpenAI provider for transport tests (API key might be dummy)
30 |     ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
31 | 
32 |     # Create transport
33 |     transport = TransportFactory.create_transport(str(cassette_path))
34 | 
35 |     # Inject transport using the established pattern
36 |     from providers.openai_compatible import OpenAICompatibleProvider
37 | 
38 |     original_client_property = OpenAICompatibleProvider.client
39 | 
40 |     def patched_client_getter(self):
41 |         if self._client is None:
42 |             self._test_transport = transport
43 |         return original_client_property.fget(self)
44 | 
45 |     monkeypatch.setattr(OpenAICompatibleProvider, "client", property(patched_client_getter))
46 | 
47 |     return transport
48 | 
```

--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation.yml:
--------------------------------------------------------------------------------

```yaml
 1 | name: 📖 Documentation Improvement
 2 | description: Report an issue or suggest an improvement for the documentation
 3 | labels: ["documentation", "good first issue"]
 4 | body:
 5 |   - type: input
 6 |     id: location
 7 |     attributes:
 8 |       label: Documentation Location
 9 |       description: "Which file or page has the issue? (e.g., README.md, CONTRIBUTING.md, CLAUDE.md)"
10 |       placeholder: "e.g., README.md"
11 |     validations:
12 |       required: true
13 | 
14 |   - type: dropdown
15 |     id: issue-type
16 |     attributes:
17 |       label: Type of Documentation Issue
18 |       description: What kind of documentation improvement is this?
19 |       options:
20 |         - Typo or grammar error
21 |         - Unclear or confusing explanation
22 |         - Outdated information
23 |         - Missing information
24 |         - Code example doesn't work
25 |         - Installation/setup instructions unclear
26 |         - Tool usage examples need improvement
27 |         - Other
28 |     validations:
29 |       required: true
30 | 
31 |   - type: textarea
32 |     id: problem
33 |     attributes:
34 |       label: What is wrong with the documentation?
35 |       description: "Please describe the problem. Be specific about what is unclear, incorrect, or missing."
36 |       placeholder: "README is missing some details"
37 |     validations:
38 |       required: true
39 | 
40 |   - type: textarea
41 |     id: suggestion
42 |     attributes:
43 |       label: Suggested Improvement
44 |       description: "How can we make it better? If you can, please provide the exact text or changes you'd like to see."
45 |       placeholder: "Please improve...."
46 | 
47 | 
48 |   - type: dropdown
49 |     id: audience
50 |     attributes:
51 |       label: Target Audience
52 |       description: Which audience would benefit most from this improvement?
53 |       options:
54 |         - New users (first-time setup)
55 |         - Developers (contributing to the project)
56 |         - Advanced users (complex workflows)
57 |         - All users
58 |     validations:
59 |       required: true
60 | 
61 | 
```

--------------------------------------------------------------------------------
/utils/token_utils.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | Token counting utilities for managing API context limits
 3 | 
 4 | This module provides functions for estimating token counts to ensure
 5 | requests stay within the Gemini API's context window limits.
 6 | 
 7 | Note: The estimation uses a simple character-to-token ratio which is
 8 | approximate. For production systems requiring precise token counts,
 9 | consider using the actual tokenizer for the specific model.
10 | """
11 | 
12 | # Default fallback for token limit (conservative estimate)
13 | DEFAULT_CONTEXT_WINDOW = 200_000  # Conservative fallback for unknown models
14 | 
15 | 
16 | def estimate_tokens(text: str) -> int:
17 |     """
18 |     Estimate token count using a character-based approximation.
19 | 
20 |     This uses a rough heuristic where 1 token ≈ 4 characters, which is
21 |     a reasonable approximation for English text. The actual token count
22 |     may vary based on:
23 |     - Language (non-English text may have different ratios)
24 |     - Code vs prose (code often has more tokens per character)
25 |     - Special characters and formatting
26 | 
27 |     Args:
28 |         text: The text to estimate tokens for
29 | 
30 |     Returns:
31 |         int: Estimated number of tokens
32 |     """
33 |     return len(text) // 4
34 | 
35 | 
36 | def check_token_limit(text: str, context_window: int = DEFAULT_CONTEXT_WINDOW) -> tuple[bool, int]:
37 |     """
38 |     Check if text exceeds the specified token limit.
39 | 
40 |     This function is used to validate that prepared prompts will fit
41 |     within the model's context window, preventing API errors and ensuring
42 |     reliable operation.
43 | 
44 |     Args:
45 |         text: The text to check
46 |         context_window: The model's context window size (defaults to conservative fallback)
47 | 
48 |     Returns:
49 |         Tuple[bool, int]: (is_within_limit, estimated_tokens)
50 |         - is_within_limit: True if the text fits within context_window
51 |         - estimated_tokens: The estimated token count
52 |     """
53 |     estimated = estimate_tokens(text)
54 |     return estimated <= context_window, estimated
55 | 
```

--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------

```yaml
 1 | name: 🐞 Bug Report
 2 | description: Create a report to help us improve
 3 | labels: ["bug", "needs-triage"]
 4 | body:
 5 |   - type: markdown
 6 |     attributes:
 7 |       value: |
 8 |         Thank you for taking the time to file a bug report! Please provide as much detail as possible to help us reproduce and fix the issue.
 9 | 
10 |   - type: input
11 |     id: version
12 |     attributes:
13 |       label: Project Version
14 |       description: "Which version are you using? (To see version: ./run-server.sh -v)"
15 |       placeholder: "e.g., 5.1.0"
16 |     validations:
17 |       required: true
18 | 
19 |   - type: textarea
20 |     id: description
21 |     attributes:
22 |       label: Bug Description
23 |       description: A clear and concise description of what the bug is.
24 |       placeholder: "When I run the `codereview` nothing happens"
25 |     validations:
26 |       required: true
27 | 
28 |   - type: textarea
29 |     id: logs
30 |     attributes:
31 |       label: Relevant Log Output
32 |       description: "Please copy and paste any relevant log output. Logs are stored under the `logs` folder in the zen folder. You an also use `./run-server.sh -f` to see logs"
33 |       render: shell
34 | 
35 |   - type: dropdown
36 |     id: environment
37 |     attributes:
38 |       label: Operating System
39 |       description: What operating system are you running the Docker client on?
40 |       options:
41 |         - macOS
42 |         - Windows
43 |         - Linux
44 |     validations:
45 |       required: true
46 | 
47 |   - type: checkboxes
48 |     id: no-duplicate-issues
49 |     attributes:
50 |       label: Sanity Checks
51 |       description: "Before submitting, please confirm the following:"
52 |       options:
53 |         - label: I have searched the existing issues and this is not a duplicate.
54 |           required: true
55 |         - label: I am using `GEMINI_API_KEY`
56 |           required: true
57 |         - label: I am using `OPENAI_API_KEY`
58 |           required: true
59 |         - label: I am using `OPENROUTER_API_KEY`
60 |           required: true
61 |         - label: I am using `CUSTOM_API_URL`
62 |           required: true
63 | 
```

--------------------------------------------------------------------------------
/utils/security_config.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Security configuration and path validation constants
  3 | 
  4 | This module contains security-related constants and configurations
  5 | for file access control.
  6 | """
  7 | 
  8 | from pathlib import Path
  9 | 
 10 | # Dangerous paths that should never be scanned
 11 | # These would give overly broad access and pose security risks
 12 | DANGEROUS_PATHS = {
 13 |     "/",
 14 |     "/etc",
 15 |     "/usr",
 16 |     "/bin",
 17 |     "/var",
 18 |     "/root",
 19 |     "/home",
 20 |     "C:\\",
 21 |     "C:\\Windows",
 22 |     "C:\\Program Files",
 23 |     "C:\\Users",
 24 | }
 25 | 
 26 | # Directories to exclude from recursive file search
 27 | # These typically contain generated code, dependencies, or build artifacts
 28 | EXCLUDED_DIRS = {
 29 |     # Python
 30 |     "__pycache__",
 31 |     ".venv",
 32 |     "venv",
 33 |     "env",
 34 |     ".env",
 35 |     "*.egg-info",
 36 |     ".eggs",
 37 |     "wheels",
 38 |     ".Python",
 39 |     ".mypy_cache",
 40 |     ".pytest_cache",
 41 |     ".tox",
 42 |     "htmlcov",
 43 |     ".coverage",
 44 |     "coverage",
 45 |     # Node.js / JavaScript
 46 |     "node_modules",
 47 |     ".next",
 48 |     ".nuxt",
 49 |     "bower_components",
 50 |     ".sass-cache",
 51 |     # Version Control
 52 |     ".git",
 53 |     ".svn",
 54 |     ".hg",
 55 |     # Build Output
 56 |     "build",
 57 |     "dist",
 58 |     "target",
 59 |     "out",
 60 |     # IDEs
 61 |     ".idea",
 62 |     ".vscode",
 63 |     ".sublime",
 64 |     ".atom",
 65 |     ".brackets",
 66 |     # Temporary / Cache
 67 |     ".cache",
 68 |     ".temp",
 69 |     ".tmp",
 70 |     "*.swp",
 71 |     "*.swo",
 72 |     "*~",
 73 |     # OS-specific
 74 |     ".DS_Store",
 75 |     "Thumbs.db",
 76 |     # Java / JVM
 77 |     ".gradle",
 78 |     ".m2",
 79 |     # Documentation build
 80 |     "_build",
 81 |     "site",
 82 |     # Mobile development
 83 |     ".expo",
 84 |     ".flutter",
 85 |     # Package managers
 86 |     "vendor",
 87 | }
 88 | 
 89 | 
 90 | def is_dangerous_path(path: Path) -> bool:
 91 |     """
 92 |     Check if a path is in the dangerous paths list.
 93 | 
 94 |     Args:
 95 |         path: Path to check
 96 | 
 97 |     Returns:
 98 |         True if the path is dangerous and should not be accessed
 99 |     """
100 |     try:
101 |         resolved = path.resolve()
102 |         return str(resolved) in DANGEROUS_PATHS or resolved.parent == resolved
103 |     except Exception:
104 |         return True  # If we can't resolve, consider it dangerous
105 | 
```

--------------------------------------------------------------------------------
/.github/workflows/semantic-release.yml:
--------------------------------------------------------------------------------

```yaml
 1 | name: Semantic Release
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | permissions:
 9 |   contents: write
10 |   issues: write
11 |   pull-requests: write
12 | 
13 | jobs:
14 |   release:
15 |     runs-on: ubuntu-latest
16 |     concurrency: release
17 | 
18 |     steps:
19 |       - name: Checkout
20 |         uses: actions/checkout@v4
21 |         with:
22 |           fetch-depth: 0
23 |           token: ${{ secrets.GITHUB_TOKEN }}
24 |           persist-credentials: true
25 | 
26 |       - name: Setup Python
27 |         uses: actions/setup-python@v4
28 |         with:
29 |           python-version: "3.11"
30 | 
31 |       - name: Install dependencies
32 |         run: |
33 |           python -m pip install --upgrade pip
34 |           pip install python-semantic-release
35 | 
36 |       - name: Verify tests pass
37 |         run: |
38 |           pip install -r requirements.txt
39 |           pip install -r requirements-dev.txt
40 |           python -m pytest tests/ -v --ignore=simulator_tests/ -m "not integration"
41 | 
42 |       - name: Run semantic release
43 |         env:
44 |           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
45 |         run: |
46 |           git config --global user.name "github-actions[bot]"
47 |           git config --global user.email "github-actions[bot]@users.noreply.github.com"
48 |           semantic-release version
49 |           semantic-release publish
50 |           
51 |       - name: Sync version to config.py
52 |         run: |
53 |           pip install toml
54 |           python scripts/sync_version.py
55 |           if git diff --quiet config.py; then
56 |             echo "No version changes in config.py"
57 |           else
58 |             git add config.py
59 |             git commit -m "chore: sync version to config.py [skip ci]"
60 |             git push
61 |           fi
62 | 
63 |       - name: Upload build artifacts to release
64 |         if: hashFiles('dist/*') != ''
65 |         run: |
66 |           # Get the latest release tag
67 |           LATEST_TAG=$(gh release list --limit 1 --json tagName --jq '.[0].tagName')
68 |           if [ ! -z "$LATEST_TAG" ]; then
69 |             echo "Uploading artifacts to release $LATEST_TAG"
70 |             gh release upload "$LATEST_TAG" dist/* --clobber
71 |           fi
72 |         env:
73 |           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
74 | 
```

--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------

```yaml
 1 | name: ✨ Feature Request
 2 | description: Suggest an idea for this project
 3 | labels: ["enhancement", "needs-triage"]
 4 | body:
 5 |   - type: textarea
 6 |     id: problem-description
 7 |     attributes:
 8 |       label: What problem is this feature trying to solve?
 9 |       description: "A clear and concise description of the problem or user need. Why is this change needed?"
10 |       placeholder: "Currently, I can only use one Gemini tool at a time. I want to be able to chain multiple tools together (e.g., analyze -> codereview -> thinkdeep) in a single workflow."
11 |     validations:
12 |       required: true
13 | 
14 |   - type: textarea
15 |     id: proposed-solution
16 |     attributes:
17 |       label: Describe the solution you'd like
18 |       description: A clear and concise description of what you want to happen. How would it work from a user's perspective?
19 |       placeholder: "I'd like to be able to specify a workflow like 'analyze src/ then codereview the findings then use thinkdeep to suggest improvements' in a single command or configuration."
20 |     validations:
21 |       required: true
22 | 
23 |   - type: textarea
24 |     id: alternatives
25 |     attributes:
26 |       label: Describe alternatives you've considered
27 |       description: A clear and concise description of any alternative solutions or features you've considered.
28 |       placeholder: "I considered manually running each tool sequentially, but automatic workflow chaining would be more efficient and ensure context is preserved between steps."
29 | 
30 |   - type: dropdown
31 |     id: feature-type
32 |     attributes:
33 |       label: Feature Category
34 |       description: What type of enhancement is this?
35 |       options:
36 |         - New tool (chat, codereview, debug, etc.)
37 |         - Workflow improvement
38 |         - Integration enhancement
39 |         - Performance optimization
40 |         - User experience improvement
41 |         - Documentation enhancement
42 |         - Other
43 |     validations:
44 |       required: true
45 | 
46 |   - type: checkboxes
47 |     id: contribution
48 |     attributes:
49 |       label: Contribution
50 |       options:
51 |         - label: I am willing to submit a Pull Request to implement this feature.
52 | 
53 | 
```

--------------------------------------------------------------------------------
/tests/test_mcp_error_handling.py:
--------------------------------------------------------------------------------

```python
 1 | import json
 2 | from types import SimpleNamespace
 3 | 
 4 | import pytest
 5 | from mcp.types import CallToolRequest, CallToolRequestParams
 6 | 
 7 | from providers.registry import ModelProviderRegistry
 8 | from server import server as mcp_server
 9 | 
10 | 
11 | def _install_dummy_provider(monkeypatch):
12 |     """Ensure preflight model checks succeed without real provider configuration."""
13 | 
14 |     class DummyProvider:
15 |         def get_provider_type(self):
16 |             return SimpleNamespace(value="dummy")
17 | 
18 |         def get_capabilities(self, model_name):
19 |             return SimpleNamespace(
20 |                 supports_extended_thinking=False,
21 |                 allow_code_generation=False,
22 |                 supports_images=False,
23 |                 context_window=1_000_000,
24 |                 max_image_size_mb=10,
25 |             )
26 | 
27 |     monkeypatch.setattr(
28 |         ModelProviderRegistry,
29 |         "get_provider_for_model",
30 |         classmethod(lambda cls, model_name: DummyProvider()),
31 |     )
32 |     monkeypatch.setattr(
33 |         ModelProviderRegistry,
34 |         "get_available_models",
35 |         classmethod(lambda cls, respect_restrictions=False: {"gemini-2.5-flash": None}),
36 |     )
37 | 
38 | 
39 | @pytest.mark.asyncio
40 | async def test_tool_execution_error_sets_is_error_flag_for_mcp_response(monkeypatch):
41 |     """Ensure ToolExecutionError surfaces as CallToolResult with isError=True."""
42 | 
43 |     _install_dummy_provider(monkeypatch)
44 | 
45 |     handler = mcp_server.request_handlers[CallToolRequest]
46 | 
47 |     arguments = {
48 |         "prompt": "Trigger working_directory_absolute_path validation failure",
49 |         "working_directory_absolute_path": "relative/path",  # Not absolute -> ToolExecutionError from ChatTool
50 |         "absolute_file_paths": [],
51 |         "model": "gemini-2.5-flash",
52 |     }
53 | 
54 |     request = CallToolRequest(params=CallToolRequestParams(name="chat", arguments=arguments))
55 | 
56 |     server_result = await handler(request)
57 | 
58 |     assert server_result.root.isError is True
59 |     assert server_result.root.content, "Expected error response content"
60 | 
61 |     payload = server_result.root.content[0].text
62 |     data = json.loads(payload)
63 |     assert data["status"] == "error"
64 |     assert "absolute" in data["content"].lower()
65 | 
```

--------------------------------------------------------------------------------
/docker/scripts/build.ps1:
--------------------------------------------------------------------------------

```
 1 | #!/usr/bin/env pwsh
 2 | #Requires -Version 5.1
 3 | [CmdletBinding()]
 4 | param()
 5 | 
 6 | # Set error action preference
 7 | $ErrorActionPreference = "Stop"
 8 | 
 9 | # Colors for output (using Write-Host with colors)
10 | function Write-ColorText {
11 |     param(
12 |         [Parameter(Mandatory)]
13 |         [string]$Text,
14 |         [string]$Color = "White",
15 |         [switch]$NoNewline
16 |     )
17 |     if ($NoNewline) {
18 |         Write-Host $Text -ForegroundColor $Color -NoNewline
19 |     } else {
20 |         Write-Host $Text -ForegroundColor $Color
21 |     }
22 | }
23 | 
24 | Write-ColorText "=== Building Zen MCP Server Docker Image ===" -Color Green
25 | 
26 | # Check if .env file exists
27 | if (!(Test-Path ".env")) {
28 |     Write-ColorText "Warning: .env file not found. Copying from .env.example" -Color Yellow
29 |     if (Test-Path ".env.example") {
30 |         Copy-Item ".env.example" ".env"
31 |         Write-ColorText "Please edit .env file with your API keys before running the server" -Color Yellow
32 |     } else {
33 |         Write-ColorText "Error: .env.example not found" -Color Red
34 |         exit 1
35 |     }
36 | }
37 | 
38 | # Build the Docker image
39 | Write-ColorText "Building Docker image..." -Color Green
40 | try {
41 |     docker-compose build --no-cache
42 |     if ($LASTEXITCODE -ne 0) {
43 |         throw "Docker build failed"
44 |     }
45 | } catch {
46 |     Write-ColorText "Error: Failed to build Docker image" -Color Red
47 |     exit 1
48 | }
49 | 
50 | # Verify the build
51 | Write-ColorText "Verifying build..." -Color Green
52 | $images = docker images --format "table {{.Repository}}\t{{.Tag}}\t{{.Size}}\t{{.CreatedAt}}" | Select-String "zen-mcp-server"
53 | 
54 | if ($images) {
55 |     Write-ColorText "✓ Docker image built successfully" -Color Green
56 |     Write-ColorText "Image details:" -Color Green
57 |     $images | ForEach-Object { Write-Host $_.Line }
58 | } else {
59 |     Write-ColorText "✗ Failed to build Docker image" -Color Red
60 |     exit 1
61 | }
62 | 
63 | Write-ColorText "=== Build Complete ===" -Color Green
64 | Write-ColorText "Next steps:" -Color Yellow
65 | Write-Host "  1. Edit .env file with your API keys"
66 | Write-ColorText "  2. Run: " -Color White -NoNewline
67 | Write-ColorText "docker-compose up -d" -Color Green
68 | 
69 | Write-ColorText "Or use the deploy script: " -Color White -NoNewline
70 | Write-ColorText ".\deploy.ps1" -Color Green
71 | 
```

--------------------------------------------------------------------------------
/tests/test_clink_claude_parser.py:
--------------------------------------------------------------------------------

```python
 1 | """Tests for the Claude CLI JSON parser."""
 2 | 
 3 | import json
 4 | 
 5 | import pytest
 6 | 
 7 | from clink.parsers.base import ParserError
 8 | from clink.parsers.claude import ClaudeJSONParser
 9 | 
10 | 
11 | def _build_success_payload() -> str:
12 |     return (
13 |         '{"type":"result","subtype":"success","is_error":false,"duration_ms":1234,'
14 |         '"duration_api_ms":1200,"num_turns":1,"result":"42","session_id":"abc","total_cost_usd":0.12,'
15 |         '"usage":{"input_tokens":10,"output_tokens":5},'
16 |         '"modelUsage":{"claude-sonnet-4-5-20250929":{"inputTokens":10,"outputTokens":5}}}'
17 |     )
18 | 
19 | 
20 | def test_claude_parser_extracts_result_and_metadata():
21 |     parser = ClaudeJSONParser()
22 |     stdout = _build_success_payload()
23 | 
24 |     parsed = parser.parse(stdout=stdout, stderr="")
25 | 
26 |     assert parsed.content == "42"
27 |     assert parsed.metadata["model_used"] == "claude-sonnet-4-5-20250929"
28 |     assert parsed.metadata["usage"]["output_tokens"] == 5
29 |     assert parsed.metadata["is_error"] is False
30 | 
31 | 
32 | def test_claude_parser_falls_back_to_message():
33 |     parser = ClaudeJSONParser()
34 |     stdout = '{"type":"result","is_error":true,"message":"API error message"}'
35 | 
36 |     parsed = parser.parse(stdout=stdout, stderr="warning")
37 | 
38 |     assert parsed.content == "API error message"
39 |     assert parsed.metadata["is_error"] is True
40 |     assert parsed.metadata["stderr"] == "warning"
41 | 
42 | 
43 | def test_claude_parser_requires_output():
44 |     parser = ClaudeJSONParser()
45 | 
46 |     with pytest.raises(ParserError):
47 |         parser.parse(stdout="", stderr="")
48 | 
49 | 
50 | def test_claude_parser_handles_array_payload_with_result_event():
51 |     parser = ClaudeJSONParser()
52 |     events = [
53 |         {"type": "system", "session_id": "abc"},
54 |         {"type": "assistant", "message": "intermediate"},
55 |         {
56 |             "type": "result",
57 |             "subtype": "success",
58 |             "result": "42",
59 |             "duration_api_ms": 9876,
60 |             "usage": {"input_tokens": 12, "output_tokens": 3},
61 |         },
62 |     ]
63 |     stdout = json.dumps(events)
64 | 
65 |     parsed = parser.parse(stdout=stdout, stderr="warning")
66 | 
67 |     assert parsed.content == "42"
68 |     assert parsed.metadata["duration_api_ms"] == 9876
69 |     assert parsed.metadata["raw_events"] == events
70 |     assert parsed.metadata["raw"] == events
71 | 
```

--------------------------------------------------------------------------------
/clink/parsers/codex.py:
--------------------------------------------------------------------------------

```python
 1 | """Parser for Codex CLI JSONL output."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import json
 6 | from typing import Any
 7 | 
 8 | from .base import BaseParser, ParsedCLIResponse, ParserError
 9 | 
10 | 
11 | class CodexJSONLParser(BaseParser):
12 |     """Parse stdout emitted by `codex exec --json`."""
13 | 
14 |     name = "codex_jsonl"
15 | 
16 |     def parse(self, stdout: str, stderr: str) -> ParsedCLIResponse:
17 |         lines = [line.strip() for line in (stdout or "").splitlines() if line.strip()]
18 |         events: list[dict[str, Any]] = []
19 |         agent_messages: list[str] = []
20 |         errors: list[str] = []
21 |         usage: dict[str, Any] | None = None
22 | 
23 |         for line in lines:
24 |             if not line.startswith("{"):
25 |                 continue
26 |             try:
27 |                 event = json.loads(line)
28 |             except json.JSONDecodeError:
29 |                 continue
30 | 
31 |             events.append(event)
32 |             event_type = event.get("type")
33 |             if event_type == "item.completed":
34 |                 item = event.get("item") or {}
35 |                 if item.get("type") == "agent_message":
36 |                     text = item.get("text")
37 |                     if isinstance(text, str) and text.strip():
38 |                         agent_messages.append(text.strip())
39 |             elif event_type == "error":
40 |                 message = event.get("message")
41 |                 if isinstance(message, str) and message.strip():
42 |                     errors.append(message.strip())
43 |             elif event_type == "turn.completed":
44 |                 turn_usage = event.get("usage")
45 |                 if isinstance(turn_usage, dict):
46 |                     usage = turn_usage
47 | 
48 |         if not agent_messages and errors:
49 |             agent_messages.extend(errors)
50 | 
51 |         if not agent_messages:
52 |             raise ParserError("Codex CLI JSONL output did not include an agent_message item")
53 | 
54 |         content = "\n\n".join(agent_messages).strip()
55 |         metadata: dict[str, Any] = {"events": events}
56 |         if errors:
57 |             metadata["errors"] = errors
58 |         if usage:
59 |             metadata["usage"] = usage
60 |         if stderr and stderr.strip():
61 |             metadata["stderr"] = stderr.strip()
62 | 
63 |         return ParsedCLIResponse(content=content, metadata=metadata)
64 | 
```

--------------------------------------------------------------------------------
/docs/wsl-setup.md:
--------------------------------------------------------------------------------

```markdown
 1 | # WSL (Windows Subsystem for Linux) Setup Guide
 2 | 
 3 | This guide provides detailed instructions for setting up Zen MCP Server on Windows using WSL.
 4 | 
 5 | ## Prerequisites for WSL
 6 | 
 7 | ```bash
 8 | # Update WSL and ensure you have a recent Ubuntu distribution
 9 | sudo apt update && sudo apt upgrade -y
10 | 
11 | # Install required system dependencies
12 | sudo apt install -y python3-venv python3-pip curl git
13 | 
14 | # Install Node.js and npm (required for Claude Code CLI)
15 | curl -fsSL https://deb.nodesource.com/setup_lts.x | sudo -E bash -
16 | sudo apt install -y nodejs
17 | 
18 | # Install Claude Code CLI globally
19 | npm install -g @anthropic-ai/claude-code
20 | ```
21 | 
22 | ## WSL-Specific Installation Steps
23 | 
24 | 1. **Clone the repository in your WSL environment** (not in Windows filesystem):
25 |    ```bash
26 |    # Navigate to your home directory or preferred location in WSL
27 |    cd ~
28 |    
29 |    # Clone the repository
30 |    git clone https://github.com/BeehiveInnovations/zen-mcp-server.git
31 |    cd zen-mcp-server
32 |    ```
33 | 
34 | 2. **Run the setup script**:
35 |    ```bash
36 |    # Make the script executable and run it
37 |    chmod +x run-server.sh
38 |    ./run-server.sh
39 |    ```
40 | 
41 | 3. **Verify Claude Code can find the MCP server**:
42 |    ```bash
43 |    # List configured MCP servers
44 |    claude mcp list
45 |    
46 |    # You should see 'zen' listed in the output
47 |    # If not, the setup script will provide the correct configuration
48 |    ```
49 | 
50 | ## Troubleshooting WSL Issues
51 | 
52 | ### Python Environment Issues
53 | 
54 | ```bash
55 | # If you encounter Python virtual environment issues
56 | sudo apt install -y python3.12-venv python3.12-dev
57 | 
58 | # Ensure pip is up to date
59 | python3 -m pip install --upgrade pip
60 | ```
61 | 
62 | ### Path Issues
63 | 
64 | - Always use the full WSL path for MCP configuration (e.g., `/home/YourName/zen-mcp-server/`)
65 | - The setup script automatically detects WSL and configures the correct paths
66 | 
67 | ### Claude Code Connection Issues
68 | 
69 | ```bash
70 | # If Claude Code can't connect to the MCP server, check the configuration
71 | cat ~/.claude.json | grep -A 10 "zen"
72 | 
73 | # The configuration should show the correct WSL path to the Python executable
74 | # Example: "/home/YourName/zen-mcp-server/.zen_venv/bin/python"
75 | ```
76 | 
77 | ### Performance Tip
78 | 
79 | For best performance, keep your zen-mcp-server directory in the WSL filesystem (e.g., `~/zen-mcp-server`) rather than in the Windows filesystem (`/mnt/c/...`).
```

--------------------------------------------------------------------------------
/conf/custom_models.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "_README": {
 3 |     "description": "Model metadata for local/self-hosted OpenAI-compatible endpoints (Custom provider).",
 4 |     "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
 5 |     "usage": "Each entry will be advertised by the Custom provider. Aliases are case-insensitive.",
 6 |     "field_notes": "Matches providers/shared/model_capabilities.py.",
 7 |     "field_descriptions": {
 8 |       "model_name": "The model identifier e.g., 'llama3.2'",
 9 |       "aliases": "Array of short names users can type instead of the full model name",
10 |       "context_window": "Total number of tokens the model can process (input + output combined)",
11 |       "max_output_tokens": "Maximum number of tokens the model can generate in a single response",
12 |       "supports_extended_thinking": "Whether the model supports extended reasoning tokens",
13 |       "supports_json_mode": "Whether the model can guarantee valid JSON output",
14 |       "supports_function_calling": "Whether the model supports function/tool calling",
15 |       "supports_images": "Whether the model can process images/visual input",
16 |       "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
17 |       "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
18 |       "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
19 |       "description": "Human-readable description of the model",
20 |       "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
21 |     }
22 |   },
23 |   "models": [
24 |     {
25 |       "model_name": "llama3.2",
26 |       "aliases": [
27 |         "local-llama",
28 |         "ollama-llama"
29 |       ],
30 |       "context_window": 128000,
31 |       "max_output_tokens": 64000,
32 |       "supports_extended_thinking": false,
33 |       "supports_json_mode": false,
34 |       "supports_function_calling": false,
35 |       "supports_images": false,
36 |       "max_image_size_mb": 0.0,
37 |       "description": "Local Llama 3.2 model via custom endpoint (Ollama/vLLM) - 128K context window (text-only)",
38 |       "intelligence_score": 6
39 |     }
40 |   ]
41 | }
42 | 
```

--------------------------------------------------------------------------------
/tests/test_conversation_missing_files.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | Test conversation memory handling of missing files.
 3 | 
 4 | Following existing test patterns to ensure conversation memory gracefully
 5 | handles missing files without crashing.
 6 | """
 7 | 
 8 | from unittest.mock import Mock
 9 | 
10 | from utils.conversation_memory import (
11 |     ConversationTurn,
12 |     ThreadContext,
13 |     build_conversation_history,
14 | )
15 | 
16 | 
17 | class TestConversationMissingFiles:
18 |     """Test handling of missing files during conversation memory reconstruction."""
19 | 
20 |     def test_build_conversation_history_handles_missing_files(self):
21 |         """Test that conversation history building handles missing files gracefully."""
22 | 
23 |         # Create conversation context with missing file reference (following existing test patterns)
24 |         context = ThreadContext(
25 |             thread_id="test-thread",
26 |             created_at="2024-01-01T00:00:00Z",
27 |             last_updated_at="2024-01-01T00:05:00Z",
28 |             tool_name="analyze",
29 |             turns=[
30 |                 ConversationTurn(
31 |                     role="user",
32 |                     content="Please analyze this file",
33 |                     timestamp="2024-01-01T00:01:00Z",
34 |                     files=["/nonexistent/missing_file.py"],  # File that doesn't exist
35 |                     tool_name="analyze",
36 |                 ),
37 |                 ConversationTurn(
38 |                     role="assistant",
39 |                     content="Here's my analysis...",
40 |                     timestamp="2024-01-01T00:02:00Z",
41 |                     tool_name="analyze",
42 |                 ),
43 |             ],
44 |             initial_context={"path": "/nonexistent/missing_file.py"},
45 |         )
46 | 
47 |         # Mock model context (following existing test patterns)
48 |         mock_model_context = Mock()
49 |         mock_model_context.calculate_token_allocation.return_value = Mock(file_tokens=50000, history_tokens=50000)
50 |         mock_model_context.estimate_tokens.return_value = 100
51 |         mock_model_context.model_name = "test-model"
52 | 
53 |         # Should not crash, should handle missing file gracefully
54 |         history, tokens = build_conversation_history(context, mock_model_context)
55 | 
56 |         # Should return valid history despite missing file
57 |         assert isinstance(history, str)
58 |         assert isinstance(tokens, int)
59 |         assert len(history) > 0
60 | 
61 |         # Should contain conversation content
62 |         assert "CONVERSATION HISTORY" in history
63 |         assert "Please analyze this file" in history
64 |         assert "Here's my analysis" in history
65 | 
```

--------------------------------------------------------------------------------
/tests/test_workflow_prompt_size_validation_simple.py:
--------------------------------------------------------------------------------

```python
 1 | """Integration tests for workflow step size validation.
 2 | 
 3 | These tests exercise the debug workflow tool end-to-end to ensure that step size
 4 | validation operates on the real execution path rather than mocked helpers.
 5 | """
 6 | 
 7 | from __future__ import annotations
 8 | 
 9 | import json
10 | 
11 | import pytest
12 | 
13 | from config import MCP_PROMPT_SIZE_LIMIT
14 | from tools.debug import DebugIssueTool
15 | from tools.shared.exceptions import ToolExecutionError
16 | 
17 | 
18 | def build_debug_arguments(**overrides) -> dict[str, object]:
19 |     """Create a minimal set of workflow arguments for DebugIssueTool."""
20 | 
21 |     base_arguments: dict[str, object] = {
22 |         "step": "Investigate the authentication issue in the login module",
23 |         "step_number": 1,
24 |         "total_steps": 3,
25 |         "next_step_required": True,
26 |         "findings": "Initial observations about the login failure",
27 |         "files_checked": [],
28 |         "relevant_files": [],
29 |         "relevant_context": [],
30 |         "issues_found": [],
31 |         "confidence": "low",
32 |         "use_assistant_model": False,
33 |         # WorkflowRequest accepts optional fields; leave hypothesis/continuation unset
34 |     }
35 | 
36 |     base_arguments.update(overrides)
37 |     return base_arguments
38 | 
39 | 
40 | @pytest.mark.asyncio
41 | async def test_workflow_tool_accepts_normal_step_content() -> None:
42 |     """Verify a typical step executes through the real workflow path."""
43 | 
44 |     tool = DebugIssueTool()
45 |     arguments = build_debug_arguments()
46 | 
47 |     responses = await tool.execute(arguments)
48 |     assert len(responses) == 1
49 | 
50 |     payload = json.loads(responses[0].text)
51 |     assert payload["status"] == "pause_for_investigation"
52 |     assert payload["step_number"] == 1
53 |     assert "error" not in payload
54 | 
55 | 
56 | @pytest.mark.asyncio
57 | async def test_workflow_tool_rejects_oversized_step_with_guidance() -> None:
58 |     """Large step content should trigger the size safeguard with helpful guidance."""
59 | 
60 |     oversized_step = "Investigate this issue: " + ("A" * (MCP_PROMPT_SIZE_LIMIT + 1000))
61 |     tool = DebugIssueTool()
62 |     arguments = build_debug_arguments(step=oversized_step)
63 | 
64 |     with pytest.raises(ToolExecutionError) as exc_info:
65 |         await tool.execute(arguments)
66 | 
67 |     output_payload = json.loads(exc_info.value.payload)
68 | 
69 |     assert output_payload["status"] == "resend_prompt"
70 |     assert output_payload["metadata"]["prompt_size"] > MCP_PROMPT_SIZE_LIMIT
71 | 
72 |     guidance = output_payload["content"].lower()
73 |     assert "shorter instructions" in guidance
74 |     assert "file paths" in guidance
75 | 
```

--------------------------------------------------------------------------------
/tests/test_provider_retry_logic.py:
--------------------------------------------------------------------------------

```python
 1 | """Tests covering shared retry behaviour for providers."""
 2 | 
 3 | from types import SimpleNamespace
 4 | 
 5 | import pytest
 6 | 
 7 | from providers.openai import OpenAIModelProvider
 8 | 
 9 | 
10 | def _mock_chat_response(content: str = "retry success") -> SimpleNamespace:
11 |     """Create a minimal chat completion response for tests."""
12 | 
13 |     usage = SimpleNamespace(prompt_tokens=10, completion_tokens=5, total_tokens=15)
14 |     message = SimpleNamespace(content=content)
15 |     choice = SimpleNamespace(message=message, finish_reason="stop")
16 |     return SimpleNamespace(choices=[choice], model="gpt-4.1", id="resp-1", created=123, usage=usage)
17 | 
18 | 
19 | def test_openai_provider_retries_on_transient_error(monkeypatch):
20 |     """Provider should retry once for retryable errors and eventually succeed."""
21 | 
22 |     monkeypatch.setattr("providers.base.time.sleep", lambda _: None)
23 | 
24 |     provider = OpenAIModelProvider(api_key="test-key")
25 | 
26 |     attempts = {"count": 0}
27 | 
28 |     def create_completion(**kwargs):
29 |         attempts["count"] += 1
30 |         if attempts["count"] == 1:
31 |             raise RuntimeError("temporary network interruption")
32 |         return _mock_chat_response("second attempt response")
33 | 
34 |     provider._client = SimpleNamespace(
35 |         chat=SimpleNamespace(completions=SimpleNamespace(create=create_completion)),
36 |         responses=SimpleNamespace(create=lambda **_: None),
37 |     )
38 | 
39 |     result = provider.generate_content("hello", "gpt-4.1")
40 | 
41 |     assert attempts["count"] == 2, "Expected a retry before succeeding"
42 |     assert result.content == "second attempt response"
43 | 
44 | 
45 | def test_openai_provider_bails_on_non_retryable_error(monkeypatch):
46 |     """Provider should stop immediately when the error is marked non-retryable."""
47 | 
48 |     monkeypatch.setattr("providers.base.time.sleep", lambda _: None)
49 | 
50 |     provider = OpenAIModelProvider(api_key="test-key")
51 | 
52 |     attempts = {"count": 0}
53 | 
54 |     def create_completion(**kwargs):
55 |         attempts["count"] += 1
56 |         raise RuntimeError("context length exceeded 429")
57 | 
58 |     provider._client = SimpleNamespace(
59 |         chat=SimpleNamespace(completions=SimpleNamespace(create=create_completion)),
60 |         responses=SimpleNamespace(create=lambda **_: None),
61 |     )
62 | 
63 |     monkeypatch.setattr(
64 |         OpenAIModelProvider,
65 |         "_is_error_retryable",
66 |         lambda self, error: False,
67 |     )
68 | 
69 |     with pytest.raises(RuntimeError) as excinfo:
70 |         provider.generate_content("hello", "gpt-4.1")
71 | 
72 |     assert "after 1 attempt" in str(excinfo.value)
73 |     assert attempts["count"] == 1
74 | 
```