beehiveinnovations/gemini-mcp-server # codebase.md

This is page 23 of 25. Use http://codebase.md/beehiveinnovations/gemini-mcp-server?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .claude
│   ├── commands
│   │   └── fix-github-issue.md
│   └── settings.json
├── .coveragerc
├── .dockerignore
├── .env.example
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE
│   │   ├── bug_report.yml
│   │   ├── config.yml
│   │   ├── documentation.yml
│   │   ├── feature_request.yml
│   │   └── tool_addition.yml
│   ├── pull_request_template.md
│   └── workflows
│       ├── docker-pr.yml
│       ├── docker-release.yml
│       ├── semantic-pr.yml
│       ├── semantic-release.yml
│       └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── AGENTS.md
├── CHANGELOG.md
├── claude_config_example.json
├── CLAUDE.md
├── clink
│   ├── __init__.py
│   ├── agents
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── claude.py
│   │   ├── codex.py
│   │   └── gemini.py
│   ├── constants.py
│   ├── models.py
│   ├── parsers
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── claude.py
│   │   ├── codex.py
│   │   └── gemini.py
│   └── registry.py
├── code_quality_checks.ps1
├── code_quality_checks.sh
├── communication_simulator_test.py
├── conf
│   ├── __init__.py
│   ├── azure_models.json
│   ├── cli_clients
│   │   ├── claude.json
│   │   ├── codex.json
│   │   └── gemini.json
│   ├── custom_models.json
│   ├── dial_models.json
│   ├── gemini_models.json
│   ├── openai_models.json
│   ├── openrouter_models.json
│   └── xai_models.json
├── config.py
├── docker
│   ├── README.md
│   └── scripts
│       ├── build.ps1
│       ├── build.sh
│       ├── deploy.ps1
│       ├── deploy.sh
│       └── healthcheck.py
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── adding_providers.md
│   ├── adding_tools.md
│   ├── advanced-usage.md
│   ├── ai_banter.md
│   ├── ai-collaboration.md
│   ├── azure_openai.md
│   ├── configuration.md
│   ├── context-revival.md
│   ├── contributions.md
│   ├── custom_models.md
│   ├── docker-deployment.md
│   ├── gemini-setup.md
│   ├── getting-started.md
│   ├── index.md
│   ├── locale-configuration.md
│   ├── logging.md
│   ├── model_ranking.md
│   ├── testing.md
│   ├── tools
│   │   ├── analyze.md
│   │   ├── apilookup.md
│   │   ├── challenge.md
│   │   ├── chat.md
│   │   ├── clink.md
│   │   ├── codereview.md
│   │   ├── consensus.md
│   │   ├── debug.md
│   │   ├── docgen.md
│   │   ├── listmodels.md
│   │   ├── planner.md
│   │   ├── precommit.md
│   │   ├── refactor.md
│   │   ├── secaudit.md
│   │   ├── testgen.md
│   │   ├── thinkdeep.md
│   │   ├── tracer.md
│   │   └── version.md
│   ├── troubleshooting.md
│   ├── vcr-testing.md
│   └── wsl-setup.md
├── examples
│   ├── claude_config_macos.json
│   └── claude_config_wsl.json
├── LICENSE
├── providers
│   ├── __init__.py
│   ├── azure_openai.py
│   ├── base.py
│   ├── custom.py
│   ├── dial.py
│   ├── gemini.py
│   ├── openai_compatible.py
│   ├── openai.py
│   ├── openrouter.py
│   ├── registries
│   │   ├── __init__.py
│   │   ├── azure.py
│   │   ├── base.py
│   │   ├── custom.py
│   │   ├── dial.py
│   │   ├── gemini.py
│   │   ├── openai.py
│   │   ├── openrouter.py
│   │   └── xai.py
│   ├── registry_provider_mixin.py
│   ├── registry.py
│   ├── shared
│   │   ├── __init__.py
│   │   ├── model_capabilities.py
│   │   ├── model_response.py
│   │   ├── provider_type.py
│   │   └── temperature.py
│   └── xai.py
├── pyproject.toml
├── pytest.ini
├── README.md
├── requirements-dev.txt
├── requirements.txt
├── run_integration_tests.ps1
├── run_integration_tests.sh
├── run-server.ps1
├── run-server.sh
├── scripts
│   └── sync_version.py
├── server.py
├── simulator_tests
│   ├── __init__.py
│   ├── base_test.py
│   ├── conversation_base_test.py
│   ├── log_utils.py
│   ├── test_analyze_validation.py
│   ├── test_basic_conversation.py
│   ├── test_chat_simple_validation.py
│   ├── test_codereview_validation.py
│   ├── test_consensus_conversation.py
│   ├── test_consensus_three_models.py
│   ├── test_consensus_workflow_accurate.py
│   ├── test_content_validation.py
│   ├── test_conversation_chain_validation.py
│   ├── test_cross_tool_comprehensive.py
│   ├── test_cross_tool_continuation.py
│   ├── test_debug_certain_confidence.py
│   ├── test_debug_validation.py
│   ├── test_line_number_validation.py
│   ├── test_logs_validation.py
│   ├── test_model_thinking_config.py
│   ├── test_o3_model_selection.py
│   ├── test_o3_pro_expensive.py
│   ├── test_ollama_custom_url.py
│   ├── test_openrouter_fallback.py
│   ├── test_openrouter_models.py
│   ├── test_per_tool_deduplication.py
│   ├── test_planner_continuation_history.py
│   ├── test_planner_validation_old.py
│   ├── test_planner_validation.py
│   ├── test_precommitworkflow_validation.py
│   ├── test_prompt_size_limit_bug.py
│   ├── test_refactor_validation.py
│   ├── test_secaudit_validation.py
│   ├── test_testgen_validation.py
│   ├── test_thinkdeep_validation.py
│   ├── test_token_allocation_validation.py
│   ├── test_vision_capability.py
│   └── test_xai_models.py
├── systemprompts
│   ├── __init__.py
│   ├── analyze_prompt.py
│   ├── chat_prompt.py
│   ├── clink
│   │   ├── codex_codereviewer.txt
│   │   ├── default_codereviewer.txt
│   │   ├── default_planner.txt
│   │   └── default.txt
│   ├── codereview_prompt.py
│   ├── consensus_prompt.py
│   ├── debug_prompt.py
│   ├── docgen_prompt.py
│   ├── generate_code_prompt.py
│   ├── planner_prompt.py
│   ├── precommit_prompt.py
│   ├── refactor_prompt.py
│   ├── secaudit_prompt.py
│   ├── testgen_prompt.py
│   ├── thinkdeep_prompt.py
│   └── tracer_prompt.py
├── tests
│   ├── __init__.py
│   ├── CASSETTE_MAINTENANCE.md
│   ├── conftest.py
│   ├── gemini_cassettes
│   │   ├── chat_codegen
│   │   │   └── gemini25_pro_calculator
│   │   │       └── mldev.json
│   │   ├── chat_cross
│   │   │   └── step1_gemini25_flash_number
│   │   │       └── mldev.json
│   │   └── consensus
│   │       └── step2_gemini25_flash_against
│   │           └── mldev.json
│   ├── http_transport_recorder.py
│   ├── mock_helpers.py
│   ├── openai_cassettes
│   │   ├── chat_cross_step2_gpt5_reminder.json
│   │   ├── chat_gpt5_continuation.json
│   │   ├── chat_gpt5_moon_distance.json
│   │   ├── consensus_step1_gpt5_for.json
│   │   └── o3_pro_basic_math.json
│   ├── pii_sanitizer.py
│   ├── sanitize_cassettes.py
│   ├── test_alias_target_restrictions.py
│   ├── test_auto_mode_comprehensive.py
│   ├── test_auto_mode_custom_provider_only.py
│   ├── test_auto_mode_model_listing.py
│   ├── test_auto_mode_provider_selection.py
│   ├── test_auto_mode.py
│   ├── test_auto_model_planner_fix.py
│   ├── test_azure_openai_provider.py
│   ├── test_buggy_behavior_prevention.py
│   ├── test_cassette_semantic_matching.py
│   ├── test_challenge.py
│   ├── test_chat_codegen_integration.py
│   ├── test_chat_cross_model_continuation.py
│   ├── test_chat_openai_integration.py
│   ├── test_chat_simple.py
│   ├── test_clink_claude_agent.py
│   ├── test_clink_claude_parser.py
│   ├── test_clink_codex_agent.py
│   ├── test_clink_gemini_agent.py
│   ├── test_clink_gemini_parser.py
│   ├── test_clink_integration.py
│   ├── test_clink_parsers.py
│   ├── test_clink_tool.py
│   ├── test_collaboration.py
│   ├── test_config.py
│   ├── test_consensus_integration.py
│   ├── test_consensus_schema.py
│   ├── test_consensus.py
│   ├── test_conversation_continuation_integration.py
│   ├── test_conversation_field_mapping.py
│   ├── test_conversation_file_features.py
│   ├── test_conversation_memory.py
│   ├── test_conversation_missing_files.py
│   ├── test_custom_openai_temperature_fix.py
│   ├── test_custom_provider.py
│   ├── test_debug.py
│   ├── test_deploy_scripts.py
│   ├── test_dial_provider.py
│   ├── test_directory_expansion_tracking.py
│   ├── test_disabled_tools.py
│   ├── test_docker_claude_desktop_integration.py
│   ├── test_docker_config_complete.py
│   ├── test_docker_healthcheck.py
│   ├── test_docker_implementation.py
│   ├── test_docker_mcp_validation.py
│   ├── test_docker_security.py
│   ├── test_docker_volume_persistence.py
│   ├── test_file_protection.py
│   ├── test_gemini_token_usage.py
│   ├── test_image_support_integration.py
│   ├── test_image_validation.py
│   ├── test_integration_utf8.py
│   ├── test_intelligent_fallback.py
│   ├── test_issue_245_simple.py
│   ├── test_large_prompt_handling.py
│   ├── test_line_numbers_integration.py
│   ├── test_listmodels_restrictions.py
│   ├── test_listmodels.py
│   ├── test_mcp_error_handling.py
│   ├── test_model_enumeration.py
│   ├── test_model_metadata_continuation.py
│   ├── test_model_resolution_bug.py
│   ├── test_model_restrictions.py
│   ├── test_o3_pro_output_text_fix.py
│   ├── test_o3_temperature_fix_simple.py
│   ├── test_openai_compatible_token_usage.py
│   ├── test_openai_provider.py
│   ├── test_openrouter_provider.py
│   ├── test_openrouter_registry.py
│   ├── test_parse_model_option.py
│   ├── test_per_tool_model_defaults.py
│   ├── test_pii_sanitizer.py
│   ├── test_pip_detection_fix.py
│   ├── test_planner.py
│   ├── test_precommit_workflow.py
│   ├── test_prompt_regression.py
│   ├── test_prompt_size_limit_bug_fix.py
│   ├── test_provider_retry_logic.py
│   ├── test_provider_routing_bugs.py
│   ├── test_provider_utf8.py
│   ├── test_providers.py
│   ├── test_rate_limit_patterns.py
│   ├── test_refactor.py
│   ├── test_secaudit.py
│   ├── test_server.py
│   ├── test_supported_models_aliases.py
│   ├── test_thinking_modes.py
│   ├── test_tools.py
│   ├── test_tracer.py
│   ├── test_utf8_localization.py
│   ├── test_utils.py
│   ├── test_uvx_resource_packaging.py
│   ├── test_uvx_support.py
│   ├── test_workflow_file_embedding.py
│   ├── test_workflow_metadata.py
│   ├── test_workflow_prompt_size_validation_simple.py
│   ├── test_workflow_utf8.py
│   ├── test_xai_provider.py
│   ├── transport_helpers.py
│   └── triangle.png
├── tools
│   ├── __init__.py
│   ├── analyze.py
│   ├── apilookup.py
│   ├── challenge.py
│   ├── chat.py
│   ├── clink.py
│   ├── codereview.py
│   ├── consensus.py
│   ├── debug.py
│   ├── docgen.py
│   ├── listmodels.py
│   ├── models.py
│   ├── planner.py
│   ├── precommit.py
│   ├── refactor.py
│   ├── secaudit.py
│   ├── shared
│   │   ├── __init__.py
│   │   ├── base_models.py
│   │   ├── base_tool.py
│   │   ├── exceptions.py
│   │   └── schema_builders.py
│   ├── simple
│   │   ├── __init__.py
│   │   └── base.py
│   ├── testgen.py
│   ├── thinkdeep.py
│   ├── tracer.py
│   ├── version.py
│   └── workflow
│       ├── __init__.py
│       ├── base.py
│       ├── schema_builders.py
│       └── workflow_mixin.py
├── utils
│   ├── __init__.py
│   ├── client_info.py
│   ├── conversation_memory.py
│   ├── env.py
│   ├── file_types.py
│   ├── file_utils.py
│   ├── image_utils.py
│   ├── model_context.py
│   ├── model_restrictions.py
│   ├── security_config.py
│   ├── storage_backend.py
│   └── token_utils.py
└── zen-mcp-server
```

# Files

--------------------------------------------------------------------------------
/tools/shared/base_tool.py:
--------------------------------------------------------------------------------

```python
   1 | """
   2 | Core Tool Infrastructure for Zen MCP Tools
   3 | 
   4 | This module provides the fundamental base class for all tools:
   5 | - BaseTool: Abstract base class defining the tool interface
   6 | 
   7 | The BaseTool class defines the core contract that tools must implement and provides
   8 | common functionality for request validation, error handling, model management,
   9 | conversation handling, file processing, and response formatting.
  10 | """
  11 | 
  12 | import logging
  13 | import os
  14 | from abc import ABC, abstractmethod
  15 | from typing import TYPE_CHECKING, Any, Optional
  16 | 
  17 | from mcp.types import TextContent
  18 | 
  19 | if TYPE_CHECKING:
  20 |     from providers.shared import ModelCapabilities
  21 |     from tools.models import ToolModelCategory
  22 | 
  23 | from config import MCP_PROMPT_SIZE_LIMIT
  24 | from providers import ModelProvider, ModelProviderRegistry
  25 | from utils import estimate_tokens
  26 | from utils.conversation_memory import (
  27 |     ConversationTurn,
  28 |     get_conversation_file_list,
  29 |     get_thread,
  30 | )
  31 | from utils.env import get_env
  32 | from utils.file_utils import read_file_content, read_files
  33 | 
  34 | # Import models from tools.models for compatibility
  35 | try:
  36 |     from tools.models import SPECIAL_STATUS_MODELS, ContinuationOffer, ToolOutput
  37 | except ImportError:
  38 |     # Fallback in case models haven't been set up yet
  39 |     SPECIAL_STATUS_MODELS = {}
  40 |     ContinuationOffer = None
  41 |     ToolOutput = None
  42 | 
  43 | logger = logging.getLogger(__name__)
  44 | 
  45 | 
  46 | class BaseTool(ABC):
  47 |     """
  48 |     Abstract base class for all Zen MCP tools.
  49 | 
  50 |     This class defines the interface that all tools must implement and provides
  51 |     common functionality for request handling, model creation, and response formatting.
  52 | 
  53 |     CONVERSATION-AWARE FILE PROCESSING:
  54 |     This base class implements the sophisticated dual prioritization strategy for
  55 |     conversation-aware file handling across all tools:
  56 | 
  57 |     1. FILE DEDUPLICATION WITH NEWEST-FIRST PRIORITY:
  58 |        - When same file appears in multiple conversation turns, newest reference wins
  59 |        - Prevents redundant file embedding while preserving most recent file state
  60 |        - Cross-tool file tracking ensures consistent behavior across analyze → codereview → debug
  61 | 
  62 |     2. CONVERSATION CONTEXT INTEGRATION:
  63 |        - All tools receive enhanced prompts with conversation history via reconstruct_thread_context()
  64 |        - File references from previous turns are preserved and accessible
  65 |        - Cross-tool knowledge transfer maintains full context without manual file re-specification
  66 | 
  67 |     3. TOKEN-AWARE FILE EMBEDDING:
  68 |        - Respects model-specific token allocation budgets from ModelContext
  69 |        - Prioritizes conversation history, then newest files, then remaining content
  70 |        - Graceful degradation when token limits are approached
  71 | 
  72 |     4. STATELESS-TO-STATEFUL BRIDGING:
  73 |        - Tools operate on stateless MCP requests but access full conversation state
  74 |        - Conversation memory automatically injected via continuation_id parameter
  75 |        - Enables natural AI-to-AI collaboration across tool boundaries
  76 | 
  77 |     To create a new tool:
  78 |     1. Create a new class that inherits from BaseTool
  79 |     2. Implement all abstract methods
  80 |     3. Define a request model that inherits from ToolRequest
  81 |     4. Register the tool in server.py's TOOLS dictionary
  82 |     """
  83 | 
  84 |     # Class-level cache for OpenRouter registry to avoid multiple loads
  85 |     _openrouter_registry_cache = None
  86 |     _custom_registry_cache = None
  87 | 
  88 |     @classmethod
  89 |     def _get_openrouter_registry(cls):
  90 |         """Get cached OpenRouter registry instance, creating if needed."""
  91 |         # Use BaseTool class directly to ensure cache is shared across all subclasses
  92 |         if BaseTool._openrouter_registry_cache is None:
  93 |             from providers.registries.openrouter import OpenRouterModelRegistry
  94 | 
  95 |             BaseTool._openrouter_registry_cache = OpenRouterModelRegistry()
  96 |             logger.debug("Created cached OpenRouter registry instance")
  97 |         return BaseTool._openrouter_registry_cache
  98 | 
  99 |     @classmethod
 100 |     def _get_custom_registry(cls):
 101 |         """Get cached custom-endpoint registry instance."""
 102 |         if BaseTool._custom_registry_cache is None:
 103 |             from providers.registries.custom import CustomEndpointModelRegistry
 104 | 
 105 |             BaseTool._custom_registry_cache = CustomEndpointModelRegistry()
 106 |             logger.debug("Created cached Custom registry instance")
 107 |         return BaseTool._custom_registry_cache
 108 | 
 109 |     def __init__(self):
 110 |         # Cache tool metadata at initialization to avoid repeated calls
 111 |         self.name = self.get_name()
 112 |         self.description = self.get_description()
 113 |         self.default_temperature = self.get_default_temperature()
 114 |         # Tool initialization complete
 115 | 
 116 |     @abstractmethod
 117 |     def get_name(self) -> str:
 118 |         """
 119 |         Return the unique name identifier for this tool.
 120 | 
 121 |         This name is used by MCP clients to invoke the tool and must be
 122 |         unique across all registered tools.
 123 | 
 124 |         Returns:
 125 |             str: The tool's unique name (e.g., "review_code", "analyze")
 126 |         """
 127 |         pass
 128 | 
 129 |     @abstractmethod
 130 |     def get_description(self) -> str:
 131 |         """
 132 |         Return a detailed description of what this tool does.
 133 | 
 134 |         This description is shown to MCP clients (like Claude / Codex / Gemini) to help them
 135 |         understand when and how to use the tool. It should be comprehensive
 136 |         and include trigger phrases.
 137 | 
 138 |         Returns:
 139 |             str: Detailed tool description with usage examples
 140 |         """
 141 |         pass
 142 | 
 143 |     @abstractmethod
 144 |     def get_input_schema(self) -> dict[str, Any]:
 145 |         """
 146 |         Return the JSON Schema that defines this tool's parameters.
 147 | 
 148 |         This schema is used by MCP clients to validate inputs before
 149 |         sending requests. It should match the tool's request model.
 150 | 
 151 |         Returns:
 152 |             Dict[str, Any]: JSON Schema object defining required and optional parameters
 153 |         """
 154 |         pass
 155 | 
 156 |     @abstractmethod
 157 |     def get_system_prompt(self) -> str:
 158 |         """
 159 |         Return the system prompt that configures the AI model's behavior.
 160 | 
 161 |         This prompt sets the context and instructions for how the model
 162 |         should approach the task. It's prepended to the user's request.
 163 | 
 164 |         Returns:
 165 |             str: System prompt with role definition and instructions
 166 |         """
 167 |         pass
 168 | 
 169 |     def get_capability_system_prompts(self, capabilities: Optional["ModelCapabilities"]) -> list[str]:
 170 |         """Return additional system prompt snippets gated on model capabilities.
 171 | 
 172 |         Subclasses can override this hook to append capability-specific
 173 |         instructions (for example, enabling code-generation exports when a
 174 |         model advertises support). The default implementation returns an empty
 175 |         list so no extra instructions are appended.
 176 | 
 177 |         Args:
 178 |             capabilities: The resolved capabilities for the active model.
 179 | 
 180 |         Returns:
 181 |             List of prompt fragments to append after the base system prompt.
 182 |         """
 183 | 
 184 |         return []
 185 | 
 186 |     def _augment_system_prompt_with_capabilities(
 187 |         self, base_prompt: str, capabilities: Optional["ModelCapabilities"]
 188 |     ) -> str:
 189 |         """Merge capability-driven prompt addenda with the base system prompt."""
 190 | 
 191 |         additions: list[str] = []
 192 |         if capabilities is not None:
 193 |             additions = [fragment.strip() for fragment in self.get_capability_system_prompts(capabilities) if fragment]
 194 | 
 195 |         if not additions:
 196 |             return base_prompt
 197 | 
 198 |         addition_text = "\n\n".join(additions)
 199 |         if not base_prompt:
 200 |             return addition_text
 201 | 
 202 |         suffix = "" if base_prompt.endswith("\n\n") else "\n\n"
 203 |         return f"{base_prompt}{suffix}{addition_text}"
 204 | 
 205 |     def get_annotations(self) -> Optional[dict[str, Any]]:
 206 |         """
 207 |         Return optional annotations for this tool.
 208 | 
 209 |         Annotations provide hints about tool behavior without being security-critical.
 210 |         They help MCP clients make better decisions about tool usage.
 211 | 
 212 |         Returns:
 213 |             Optional[dict]: Dictionary with annotation fields like readOnlyHint, destructiveHint, etc.
 214 |                            Returns None if no annotations are needed.
 215 |         """
 216 |         return None
 217 | 
 218 |     def requires_model(self) -> bool:
 219 |         """
 220 |         Return whether this tool requires AI model access.
 221 | 
 222 |         Tools that override execute() to do pure data processing (like planner)
 223 |         should return False to skip model resolution at the MCP boundary.
 224 | 
 225 |         Returns:
 226 |             bool: True if tool needs AI model access (default), False for data-only tools
 227 |         """
 228 |         return True
 229 | 
 230 |     def is_effective_auto_mode(self) -> bool:
 231 |         """
 232 |         Check if we're in effective auto mode for schema generation.
 233 | 
 234 |         This determines whether the model parameter should be required in the tool schema.
 235 |         Used at initialization time when schemas are generated.
 236 | 
 237 |         Returns:
 238 |             bool: True if model parameter should be required in the schema
 239 |         """
 240 |         from config import DEFAULT_MODEL
 241 |         from providers.registry import ModelProviderRegistry
 242 | 
 243 |         # Case 1: Explicit auto mode
 244 |         if DEFAULT_MODEL.lower() == "auto":
 245 |             return True
 246 | 
 247 |         # Case 2: Model not available (fallback to auto mode)
 248 |         if DEFAULT_MODEL.lower() != "auto":
 249 |             provider = ModelProviderRegistry.get_provider_for_model(DEFAULT_MODEL)
 250 |             if not provider:
 251 |                 return True
 252 | 
 253 |         return False
 254 | 
 255 |     def _should_require_model_selection(self, model_name: str) -> bool:
 256 |         """
 257 |         Check if we should require the CLI to select a model at runtime.
 258 | 
 259 |         This is called during request execution to determine if we need
 260 |         to return an error asking the CLI to provide a model parameter.
 261 | 
 262 |         Args:
 263 |             model_name: The model name from the request or DEFAULT_MODEL
 264 | 
 265 |         Returns:
 266 |             bool: True if we should require model selection
 267 |         """
 268 |         # Case 1: Model is explicitly "auto"
 269 |         if model_name.lower() == "auto":
 270 |             return True
 271 | 
 272 |         # Case 2: Requested model is not available
 273 |         from providers.registry import ModelProviderRegistry
 274 | 
 275 |         provider = ModelProviderRegistry.get_provider_for_model(model_name)
 276 |         if not provider:
 277 |             logger = logging.getLogger(f"tools.{self.name}")
 278 |             logger.warning(f"Model '{model_name}' is not available with current API keys. Requiring model selection.")
 279 |             return True
 280 | 
 281 |         return False
 282 | 
 283 |     def _get_available_models(self) -> list[str]:
 284 |         """
 285 |         Get list of models available from enabled providers.
 286 | 
 287 |         Only returns models from providers that have valid API keys configured.
 288 |         This fixes the namespace collision bug where models from disabled providers
 289 |         were shown to the CLI, causing routing conflicts.
 290 | 
 291 |         Returns:
 292 |             List of model names from enabled providers only
 293 |         """
 294 |         from providers.registry import ModelProviderRegistry
 295 | 
 296 |         # Get models from enabled providers only (those with valid API keys)
 297 |         all_models = ModelProviderRegistry.get_available_model_names()
 298 | 
 299 |         # Add OpenRouter models if OpenRouter is configured
 300 |         openrouter_key = get_env("OPENROUTER_API_KEY")
 301 |         if openrouter_key and openrouter_key != "your_openrouter_api_key_here":
 302 |             try:
 303 |                 registry = self._get_openrouter_registry()
 304 |                 # Add all aliases from the registry (includes OpenRouter cloud models)
 305 |                 for alias in registry.list_aliases():
 306 |                     if alias not in all_models:
 307 |                         all_models.append(alias)
 308 |             except Exception as e:
 309 |                 import logging
 310 | 
 311 |                 logging.debug(f"Failed to add OpenRouter models to enum: {e}")
 312 | 
 313 |         # Add custom models if custom API is configured
 314 |         custom_url = get_env("CUSTOM_API_URL")
 315 |         if custom_url:
 316 |             try:
 317 |                 registry = self._get_custom_registry()
 318 |                 for alias in registry.list_aliases():
 319 |                     if alias not in all_models:
 320 |                         all_models.append(alias)
 321 |             except Exception as e:
 322 |                 import logging
 323 | 
 324 |                 logging.debug(f"Failed to add custom models to enum: {e}")
 325 | 
 326 |         # Remove duplicates while preserving order
 327 |         seen = set()
 328 |         unique_models = []
 329 |         for model in all_models:
 330 |             if model not in seen:
 331 |                 seen.add(model)
 332 |                 unique_models.append(model)
 333 | 
 334 |         return unique_models
 335 | 
 336 |     def _format_available_models_list(self) -> str:
 337 |         """Return a human-friendly list of available models or guidance when none found."""
 338 | 
 339 |         summaries, total, has_restrictions = self._get_ranked_model_summaries()
 340 |         if not summaries:
 341 |             return (
 342 |                 "No models detected. Configure provider credentials or set DEFAULT_MODEL to a valid option. "
 343 |                 "If the user requested a specific model, respond with this notice instead of substituting another model."
 344 |             )
 345 |         display = "; ".join(summaries)
 346 |         remainder = total - len(summaries)
 347 |         if remainder > 0:
 348 |             display = f"{display}; +{remainder} more (use the `listmodels` tool for the full roster)"
 349 |         return display
 350 | 
 351 |     @staticmethod
 352 |     def _format_context_window(tokens: int) -> Optional[str]:
 353 |         """Convert a raw context window into a short display string."""
 354 | 
 355 |         if not tokens or tokens <= 0:
 356 |             return None
 357 | 
 358 |         if tokens >= 1_000_000:
 359 |             if tokens % 1_000_000 == 0:
 360 |                 return f"{tokens // 1_000_000}M ctx"
 361 |             return f"{tokens / 1_000_000:.1f}M ctx"
 362 | 
 363 |         if tokens >= 1_000:
 364 |             if tokens % 1_000 == 0:
 365 |                 return f"{tokens // 1_000}K ctx"
 366 |             return f"{tokens / 1_000:.1f}K ctx"
 367 | 
 368 |         return f"{tokens} ctx"
 369 | 
 370 |     def _collect_ranked_capabilities(self) -> list[tuple[int, str, Any]]:
 371 |         """Gather available model capabilities sorted by capability rank."""
 372 | 
 373 |         from providers.registry import ModelProviderRegistry
 374 | 
 375 |         ranked: list[tuple[int, str, Any]] = []
 376 |         available = ModelProviderRegistry.get_available_models(respect_restrictions=True)
 377 | 
 378 |         for model_name, provider_type in available.items():
 379 |             provider = ModelProviderRegistry.get_provider(provider_type)
 380 |             if not provider:
 381 |                 continue
 382 | 
 383 |             try:
 384 |                 capabilities = provider.get_capabilities(model_name)
 385 |             except ValueError:
 386 |                 continue
 387 | 
 388 |             rank = capabilities.get_effective_capability_rank()
 389 |             ranked.append((rank, model_name, capabilities))
 390 | 
 391 |         ranked.sort(key=lambda item: (-item[0], item[1]))
 392 |         return ranked
 393 | 
 394 |     @staticmethod
 395 |     def _normalize_model_identifier(name: str) -> str:
 396 |         """Normalize model names for deduplication across providers."""
 397 | 
 398 |         normalized = name.lower()
 399 |         if ":" in normalized:
 400 |             normalized = normalized.split(":", 1)[0]
 401 |         if "/" in normalized:
 402 |             normalized = normalized.split("/", 1)[-1]
 403 |         return normalized
 404 | 
 405 |     def _get_ranked_model_summaries(self, limit: int = 5) -> tuple[list[str], int, bool]:
 406 |         """Return formatted, ranked model summaries and restriction status."""
 407 | 
 408 |         ranked = self._collect_ranked_capabilities()
 409 | 
 410 |         # Build allowlist map (provider -> lowercase names) when restrictions are active
 411 |         allowed_map: dict[Any, set[str]] = {}
 412 |         try:
 413 |             from utils.model_restrictions import get_restriction_service
 414 | 
 415 |             restriction_service = get_restriction_service()
 416 |             if restriction_service:
 417 |                 from providers.shared import ProviderType
 418 | 
 419 |                 for provider_type in ProviderType:
 420 |                     allowed = restriction_service.get_allowed_models(provider_type)
 421 |                     if allowed:
 422 |                         allowed_map[provider_type] = {name.lower() for name in allowed if name}
 423 |         except Exception:
 424 |             allowed_map = {}
 425 | 
 426 |         filtered: list[tuple[int, str, Any]] = []
 427 |         seen_normalized: set[str] = set()
 428 | 
 429 |         for rank, model_name, capabilities in ranked:
 430 |             canonical_name = getattr(capabilities, "model_name", model_name)
 431 |             canonical_lower = canonical_name.lower()
 432 |             alias_lower = model_name.lower()
 433 |             provider_type = getattr(capabilities, "provider", None)
 434 | 
 435 |             if allowed_map:
 436 |                 if provider_type not in allowed_map:
 437 |                     continue
 438 |                 allowed_set = allowed_map[provider_type]
 439 |                 if canonical_lower not in allowed_set and alias_lower not in allowed_set:
 440 |                     continue
 441 | 
 442 |             normalized = self._normalize_model_identifier(canonical_name)
 443 |             if normalized in seen_normalized:
 444 |                 continue
 445 | 
 446 |             seen_normalized.add(normalized)
 447 |             filtered.append((rank, canonical_name, capabilities))
 448 | 
 449 |         summaries: list[str] = []
 450 |         for rank, canonical_name, capabilities in filtered[:limit]:
 451 |             details: list[str] = []
 452 | 
 453 |             context_str = self._format_context_window(capabilities.context_window)
 454 |             if context_str:
 455 |                 details.append(context_str)
 456 | 
 457 |             if capabilities.supports_extended_thinking:
 458 |                 details.append("thinking")
 459 | 
 460 |             if capabilities.allow_code_generation:
 461 |                 details.append("code-gen")
 462 | 
 463 |             base = f"{canonical_name} (score {rank}"
 464 |             if details:
 465 |                 base = f"{base}, {', '.join(details)}"
 466 |             summaries.append(f"{base})")
 467 | 
 468 |         return summaries, len(filtered), bool(allowed_map)
 469 | 
 470 |     def _get_restriction_note(self) -> Optional[str]:
 471 |         """Return a string describing active per-provider allowlists, if any."""
 472 | 
 473 |         env_labels = {
 474 |             "OPENAI_ALLOWED_MODELS": "OpenAI",
 475 |             "GOOGLE_ALLOWED_MODELS": "Google",
 476 |             "XAI_ALLOWED_MODELS": "X.AI",
 477 |             "OPENROUTER_ALLOWED_MODELS": "OpenRouter",
 478 |             "DIAL_ALLOWED_MODELS": "DIAL",
 479 |         }
 480 | 
 481 |         notes: list[str] = []
 482 |         for env_var, label in env_labels.items():
 483 |             raw = get_env(env_var)
 484 |             if not raw:
 485 |                 continue
 486 | 
 487 |             models = sorted({token.strip() for token in raw.split(",") if token.strip()})
 488 |             if not models:
 489 |                 continue
 490 | 
 491 |             notes.append(f"{label}: {', '.join(models)}")
 492 | 
 493 |         if not notes:
 494 |             return None
 495 | 
 496 |         return "Policy allows only → " + "; ".join(notes)
 497 | 
 498 |     def _build_model_unavailable_message(self, model_name: str) -> str:
 499 |         """Compose a consistent error message for unavailable model scenarios."""
 500 | 
 501 |         tool_category = self.get_model_category()
 502 |         suggested_model = ModelProviderRegistry.get_preferred_fallback_model(tool_category)
 503 |         available_models_text = self._format_available_models_list()
 504 | 
 505 |         return (
 506 |             f"Model '{model_name}' is not available with current API keys. "
 507 |             f"Available models: {available_models_text}. "
 508 |             f"Suggested model for {self.get_name()}: '{suggested_model}' "
 509 |             f"(category: {tool_category.value}). If the user explicitly requested a model, you MUST use that exact name or report this error back—do not substitute another model."
 510 |         )
 511 | 
 512 |     def _build_auto_mode_required_message(self) -> str:
 513 |         """Compose the auto-mode prompt when an explicit model selection is required."""
 514 | 
 515 |         tool_category = self.get_model_category()
 516 |         suggested_model = ModelProviderRegistry.get_preferred_fallback_model(tool_category)
 517 |         available_models_text = self._format_available_models_list()
 518 | 
 519 |         return (
 520 |             "Model parameter is required in auto mode. "
 521 |             f"Available models: {available_models_text}. "
 522 |             f"Suggested model for {self.get_name()}: '{suggested_model}' "
 523 |             f"(category: {tool_category.value}). When the user names a model, relay that exact name—never swap in another option."
 524 |         )
 525 | 
 526 |     def get_model_field_schema(self) -> dict[str, Any]:
 527 |         """
 528 |         Generate the model field schema based on auto mode configuration.
 529 | 
 530 |         When auto mode is enabled, the model parameter becomes required
 531 |         and includes detailed descriptions of each model's capabilities.
 532 | 
 533 |         Returns:
 534 |             Dict containing the model field JSON schema
 535 |         """
 536 | 
 537 |         from config import DEFAULT_MODEL
 538 | 
 539 |         # Use the centralized effective auto mode check
 540 |         if self.is_effective_auto_mode():
 541 |             description = (
 542 |                 "Currently in auto model selection mode. CRITICAL: When the user names a model, you MUST use that exact name unless the server rejects it. "
 543 |                 "If no model is provided, you may use the `listmodels` tool to review options and select an appropriate match."
 544 |             )
 545 |             summaries, total, restricted = self._get_ranked_model_summaries()
 546 |             remainder = max(0, total - len(summaries))
 547 |             if summaries:
 548 |                 top_line = "; ".join(summaries)
 549 |                 if remainder > 0:
 550 |                     label = "Allowed models" if restricted else "Top models"
 551 |                     top_line = f"{label}: {top_line}; +{remainder} more via `listmodels`."
 552 |                 else:
 553 |                     label = "Allowed models" if restricted else "Top models"
 554 |                     top_line = f"{label}: {top_line}."
 555 |                 description = f"{description} {top_line}"
 556 | 
 557 |             restriction_note = self._get_restriction_note()
 558 |             if restriction_note and (remainder > 0 or not summaries):
 559 |                 description = f"{description} {restriction_note}."
 560 |             return {
 561 |                 "type": "string",
 562 |                 "description": description,
 563 |             }
 564 | 
 565 |         description = (
 566 |             f"The default model is '{DEFAULT_MODEL}'. Override only when the user explicitly requests a different model, and use that exact name. "
 567 |             "If the requested model fails validation, surface the server error instead of substituting another model. When unsure, use the `listmodels` tool for details."
 568 |         )
 569 |         summaries, total, restricted = self._get_ranked_model_summaries()
 570 |         remainder = max(0, total - len(summaries))
 571 |         if summaries:
 572 |             top_line = "; ".join(summaries)
 573 |             if remainder > 0:
 574 |                 label = "Allowed models" if restricted else "Preferred alternatives"
 575 |                 top_line = f"{label}: {top_line}; +{remainder} more via `listmodels`."
 576 |             else:
 577 |                 label = "Allowed models" if restricted else "Preferred alternatives"
 578 |                 top_line = f"{label}: {top_line}."
 579 |             description = f"{description} {top_line}"
 580 | 
 581 |         restriction_note = self._get_restriction_note()
 582 |         if restriction_note and (remainder > 0 or not summaries):
 583 |             description = f"{description} {restriction_note}."
 584 | 
 585 |         return {
 586 |             "type": "string",
 587 |             "description": description,
 588 |         }
 589 | 
 590 |     def get_default_temperature(self) -> float:
 591 |         """
 592 |         Return the default temperature setting for this tool.
 593 | 
 594 |         Override this method to set tool-specific temperature defaults.
 595 |         Lower values (0.0-0.3) for analytical tasks, higher (0.7-1.0) for creative tasks.
 596 | 
 597 |         Returns:
 598 |             float: Default temperature between 0.0 and 1.0
 599 |         """
 600 |         return 0.5
 601 | 
 602 |     def wants_line_numbers_by_default(self) -> bool:
 603 |         """
 604 |         Return whether this tool wants line numbers added to code files by default.
 605 | 
 606 |         By default, ALL tools get line numbers for precise code references.
 607 |         Line numbers are essential for accurate communication about code locations.
 608 | 
 609 |         Returns:
 610 |             bool: True if line numbers should be added by default for this tool
 611 |         """
 612 |         return True  # All tools get line numbers by default for consistency
 613 | 
 614 |     def get_default_thinking_mode(self) -> str:
 615 |         """
 616 |         Return the default thinking mode for this tool.
 617 | 
 618 |         Thinking mode controls computational budget for reasoning.
 619 |         Override for tools that need more or less reasoning depth.
 620 | 
 621 |         Returns:
 622 |             str: One of "minimal", "low", "medium", "high", "max"
 623 |         """
 624 |         return "medium"  # Default to medium thinking for better reasoning
 625 | 
 626 |     def get_model_category(self) -> "ToolModelCategory":
 627 |         """
 628 |         Return the model category for this tool.
 629 | 
 630 |         Model category influences which model is selected in auto mode.
 631 |         Override to specify whether your tool needs extended reasoning,
 632 |         fast response, or balanced capabilities.
 633 | 
 634 |         Returns:
 635 |             ToolModelCategory: Category that influences model selection
 636 |         """
 637 |         from tools.models import ToolModelCategory
 638 | 
 639 |         return ToolModelCategory.BALANCED
 640 | 
 641 |     @abstractmethod
 642 |     def get_request_model(self):
 643 |         """
 644 |         Return the Pydantic model class used for validating requests.
 645 | 
 646 |         This model should inherit from ToolRequest and define all
 647 |         parameters specific to this tool.
 648 | 
 649 |         Returns:
 650 |             Type[ToolRequest]: The request model class
 651 |         """
 652 |         pass
 653 | 
 654 |     def validate_file_paths(self, request) -> Optional[str]:
 655 |         """
 656 |         Validate that all file paths in the request are absolute.
 657 | 
 658 |         This is a critical security function that prevents path traversal attacks
 659 |         and ensures all file access is properly controlled. All file paths must
 660 |         be absolute to avoid ambiguity and security issues.
 661 | 
 662 |         Args:
 663 |             request: The validated request object
 664 | 
 665 |         Returns:
 666 |             Optional[str]: Error message if validation fails, None if all paths are valid
 667 |         """
 668 |         # Only validate files/paths if they exist in the request
 669 |         file_fields = [
 670 |             "absolute_file_paths",
 671 |             "file",
 672 |             "path",
 673 |             "directory",
 674 |             "notebooks",
 675 |             "test_examples",
 676 |             "style_guide_examples",
 677 |             "files_checked",
 678 |             "relevant_files",
 679 |         ]
 680 | 
 681 |         for field_name in file_fields:
 682 |             if hasattr(request, field_name):
 683 |                 field_value = getattr(request, field_name)
 684 |                 if field_value is None:
 685 |                     continue
 686 | 
 687 |                 # Handle both single paths and lists of paths
 688 |                 paths_to_check = field_value if isinstance(field_value, list) else [field_value]
 689 | 
 690 |                 for path in paths_to_check:
 691 |                     if path and not os.path.isabs(path):
 692 |                         return f"All file paths must be FULL absolute paths. Invalid path: '{path}'"
 693 | 
 694 |         return None
 695 | 
 696 |     def _validate_token_limit(self, content: str, content_type: str = "Content") -> None:
 697 |         """
 698 |         Validate that user-provided content doesn't exceed the MCP prompt size limit.
 699 | 
 700 |         This enforcement is strictly for text crossing the MCP transport boundary
 701 |         (i.e., user input). Internal prompt construction may exceed this size and is
 702 |         governed by model-specific token limits.
 703 | 
 704 |         Args:
 705 |             content: The user-originated content to validate
 706 |             content_type: Description of the content type for error messages
 707 | 
 708 |         Raises:
 709 |             ValueError: If content exceeds the character size limit
 710 |         """
 711 |         if not content:
 712 |             logger.debug(f"{self.name} tool {content_type.lower()} validation skipped (no content)")
 713 |             return
 714 | 
 715 |         char_count = len(content)
 716 |         if char_count > MCP_PROMPT_SIZE_LIMIT:
 717 |             token_estimate = estimate_tokens(content)
 718 |             error_msg = (
 719 |                 f"{char_count:,} characters (~{token_estimate:,} tokens). "
 720 |                 f"Maximum is {MCP_PROMPT_SIZE_LIMIT:,} characters."
 721 |             )
 722 |             logger.error(f"{self.name} tool {content_type.lower()} validation failed: {error_msg}")
 723 |             raise ValueError(f"{content_type} too large: {error_msg}")
 724 | 
 725 |         token_estimate = estimate_tokens(content)
 726 |         logger.debug(
 727 |             f"{self.name} tool {content_type.lower()} validation passed: "
 728 |             f"{char_count:,} characters (~{token_estimate:,} tokens)"
 729 |         )
 730 | 
 731 |     def get_model_provider(self, model_name: str) -> ModelProvider:
 732 |         """
 733 |         Get the appropriate model provider for the given model name.
 734 | 
 735 |         This method performs runtime validation to ensure the requested model
 736 |         is actually available with the current API key configuration.
 737 | 
 738 |         Args:
 739 |             model_name: Name of the model to get provider for
 740 | 
 741 |         Returns:
 742 |             ModelProvider: The provider instance for the model
 743 | 
 744 |         Raises:
 745 |             ValueError: If the model is not available or provider not found
 746 |         """
 747 |         try:
 748 |             provider = ModelProviderRegistry.get_provider_for_model(model_name)
 749 |             if not provider:
 750 |                 logger.error(f"No provider found for model '{model_name}' in {self.name} tool")
 751 |                 raise ValueError(self._build_model_unavailable_message(model_name))
 752 | 
 753 |             return provider
 754 |         except Exception as e:
 755 |             logger.error(f"Failed to get provider for model '{model_name}' in {self.name} tool: {e}")
 756 |             raise
 757 | 
 758 |     # === CONVERSATION AND FILE HANDLING METHODS ===
 759 | 
 760 |     def get_conversation_embedded_files(self, continuation_id: Optional[str]) -> list[str]:
 761 |         """
 762 |         Get list of files already embedded in conversation history.
 763 | 
 764 |         This method returns the list of files that have already been embedded
 765 |         in the conversation history for a given continuation thread. Tools can
 766 |         use this to avoid re-embedding files that are already available in the
 767 |         conversation context.
 768 | 
 769 |         Args:
 770 |             continuation_id: Thread continuation ID, or None for new conversations
 771 | 
 772 |         Returns:
 773 |             list[str]: List of file paths already embedded in conversation history
 774 |         """
 775 |         if not continuation_id:
 776 |             # New conversation, no files embedded yet
 777 |             return []
 778 | 
 779 |         thread_context = get_thread(continuation_id)
 780 |         if not thread_context:
 781 |             # Thread not found, no files embedded
 782 |             return []
 783 | 
 784 |         embedded_files = get_conversation_file_list(thread_context)
 785 |         logger.debug(f"[FILES] {self.name}: Found {len(embedded_files)} embedded files")
 786 |         return embedded_files
 787 | 
 788 |     def filter_new_files(self, requested_files: list[str], continuation_id: Optional[str]) -> list[str]:
 789 |         """
 790 |         Filter out files that are already embedded in conversation history.
 791 | 
 792 |         This method prevents duplicate file embeddings by filtering out files that have
 793 |         already been embedded in the conversation history. This optimizes token usage
 794 |         while ensuring tools still have logical access to all requested files through
 795 |         conversation history references.
 796 | 
 797 |         Args:
 798 |             requested_files: List of files requested for current tool execution
 799 |             continuation_id: Thread continuation ID, or None for new conversations
 800 | 
 801 |         Returns:
 802 |             list[str]: List of files that need to be embedded (not already in history)
 803 |         """
 804 |         logger.debug(f"[FILES] {self.name}: Filtering {len(requested_files)} requested files")
 805 | 
 806 |         if not continuation_id:
 807 |             # New conversation, all files are new
 808 |             logger.debug(f"[FILES] {self.name}: New conversation, all {len(requested_files)} files are new")
 809 |             return requested_files
 810 | 
 811 |         try:
 812 |             embedded_files = set(self.get_conversation_embedded_files(continuation_id))
 813 |             logger.debug(f"[FILES] {self.name}: Found {len(embedded_files)} embedded files in conversation")
 814 | 
 815 |             # Safety check: If no files are marked as embedded but we have a continuation_id,
 816 |             # this might indicate an issue with conversation history. Be conservative.
 817 |             if not embedded_files:
 818 |                 logger.debug(f"{self.name} tool: No files found in conversation history for thread {continuation_id}")
 819 |                 logger.debug(
 820 |                     f"[FILES] {self.name}: No embedded files found, returning all {len(requested_files)} requested files"
 821 |                 )
 822 |                 return requested_files
 823 | 
 824 |             # Return only files that haven't been embedded yet
 825 |             new_files = [f for f in requested_files if f not in embedded_files]
 826 |             logger.debug(
 827 |                 f"[FILES] {self.name}: After filtering: {len(new_files)} new files, {len(requested_files) - len(new_files)} already embedded"
 828 |             )
 829 |             logger.debug(f"[FILES] {self.name}: New files to embed: {new_files}")
 830 | 
 831 |             # Log filtering results for debugging
 832 |             if len(new_files) < len(requested_files):
 833 |                 skipped = [f for f in requested_files if f in embedded_files]
 834 |                 logger.debug(
 835 |                     f"{self.name} tool: Filtering {len(skipped)} files already in conversation history: {', '.join(skipped)}"
 836 |                 )
 837 |                 logger.debug(f"[FILES] {self.name}: Skipped (already embedded): {skipped}")
 838 | 
 839 |             return new_files
 840 | 
 841 |         except Exception as e:
 842 |             # If there's any issue with conversation history lookup, be conservative
 843 |             # and include all files rather than risk losing access to needed files
 844 |             logger.warning(f"{self.name} tool: Error checking conversation history for {continuation_id}: {e}")
 845 |             logger.warning(f"{self.name} tool: Including all requested files as fallback")
 846 |             logger.debug(
 847 |                 f"[FILES] {self.name}: Exception in filter_new_files, returning all {len(requested_files)} files as fallback"
 848 |             )
 849 |             return requested_files
 850 | 
 851 |     def format_conversation_turn(self, turn: ConversationTurn) -> list[str]:
 852 |         """
 853 |         Format a conversation turn for display in conversation history.
 854 | 
 855 |         Tools can override this to provide custom formatting for their responses
 856 |         while maintaining the standard structure for cross-tool compatibility.
 857 | 
 858 |         This method is called by build_conversation_history when reconstructing
 859 |         conversation context, allowing each tool to control how its responses
 860 |         appear in subsequent conversation turns.
 861 | 
 862 |         Args:
 863 |             turn: The conversation turn to format (from utils.conversation_memory)
 864 | 
 865 |         Returns:
 866 |             list[str]: Lines of formatted content for this turn
 867 | 
 868 |         Example:
 869 |             Default implementation returns:
 870 |             ["Files used in this turn: file1.py, file2.py", "", "Response content..."]
 871 | 
 872 |             Tools can override to add custom sections, formatting, or metadata display.
 873 |         """
 874 |         parts = []
 875 | 
 876 |         # Add files context if present
 877 |         if turn.files:
 878 |             parts.append(f"Files used in this turn: {', '.join(turn.files)}")
 879 |             parts.append("")  # Empty line for readability
 880 | 
 881 |         # Add the actual content
 882 |         parts.append(turn.content)
 883 | 
 884 |         return parts
 885 | 
 886 |     def handle_prompt_file(self, files: Optional[list[str]]) -> tuple[Optional[str], Optional[list[str]]]:
 887 |         """
 888 |         Check for and handle prompt.txt in the absolute file paths list.
 889 | 
 890 |         If prompt.txt is found, reads its content and removes it from the files list.
 891 |         This file is treated specially as the main prompt, not as an embedded file.
 892 | 
 893 |         This mechanism allows us to work around MCP's ~25K token limit by having
 894 |         the CLI save large prompts to a file, effectively using the file transfer
 895 |         mechanism to bypass token constraints while preserving response capacity.
 896 | 
 897 |         Args:
 898 |             files: List of absolute file paths (will be translated for current environment)
 899 | 
 900 |         Returns:
 901 |             tuple: (prompt_content, updated_files_list)
 902 |         """
 903 |         if not files:
 904 |             return None, files
 905 | 
 906 |         prompt_content = None
 907 |         updated_files = []
 908 | 
 909 |         for file_path in files:
 910 | 
 911 |             # Check if the filename is exactly "prompt.txt"
 912 |             # This ensures we don't match files like "myprompt.txt" or "prompt.txt.bak"
 913 |             if os.path.basename(file_path) == "prompt.txt":
 914 |                 try:
 915 |                     # Read prompt.txt content and extract just the text
 916 |                     content, _ = read_file_content(file_path)
 917 |                     # Extract the content between the file markers
 918 |                     if "--- BEGIN FILE:" in content and "--- END FILE:" in content:
 919 |                         lines = content.split("\n")
 920 |                         in_content = False
 921 |                         content_lines = []
 922 |                         for line in lines:
 923 |                             if line.startswith("--- BEGIN FILE:"):
 924 |                                 in_content = True
 925 |                                 continue
 926 |                             elif line.startswith("--- END FILE:"):
 927 |                                 break
 928 |                             elif in_content:
 929 |                                 content_lines.append(line)
 930 |                         prompt_content = "\n".join(content_lines)
 931 |                     else:
 932 |                         # Fallback: if it's already raw content (from tests or direct input)
 933 |                         # and doesn't have error markers, use it directly
 934 |                         if not content.startswith("\n--- ERROR"):
 935 |                             prompt_content = content
 936 |                         else:
 937 |                             prompt_content = None
 938 |                 except Exception:
 939 |                     # If we can't read the file, we'll just skip it
 940 |                     # The error will be handled elsewhere
 941 |                     pass
 942 |             else:
 943 |                 # Keep the original path in the files list (will be translated later by read_files)
 944 |                 updated_files.append(file_path)
 945 | 
 946 |         return prompt_content, updated_files if updated_files else None
 947 | 
 948 |     def get_prompt_content_for_size_validation(self, user_content: str) -> str:
 949 |         """
 950 |         Get the content that should be validated for MCP prompt size limits.
 951 | 
 952 |         This hook method allows tools to specify what content should be checked
 953 |         against the MCP transport size limit. By default, it returns the user content,
 954 |         but can be overridden to exclude conversation history when needed.
 955 | 
 956 |         Args:
 957 |             user_content: The user content that would normally be validated
 958 | 
 959 |         Returns:
 960 |             The content that should actually be validated for size limits
 961 |         """
 962 |         # Default implementation: validate the full user content
 963 |         return user_content
 964 | 
 965 |     def check_prompt_size(self, text: str) -> Optional[dict[str, Any]]:
 966 |         """
 967 |         Check if USER INPUT text is too large for MCP transport boundary.
 968 | 
 969 |         IMPORTANT: This method should ONLY be used to validate user input that crosses
 970 |         the CLI ↔ MCP Server transport boundary. It should NOT be used to limit
 971 |         internal MCP Server operations.
 972 | 
 973 |         Args:
 974 |             text: The user input text to check (NOT internal prompt content)
 975 | 
 976 |         Returns:
 977 |             Optional[Dict[str, Any]]: Response asking for file handling if too large, None otherwise
 978 |         """
 979 |         if text and len(text) > MCP_PROMPT_SIZE_LIMIT:
 980 |             return {
 981 |                 "status": "resend_prompt",
 982 |                 "content": (
 983 |                     f"MANDATORY ACTION REQUIRED: The prompt is too large for MCP's token limits (>{MCP_PROMPT_SIZE_LIMIT:,} characters). "
 984 |                     "YOU MUST IMMEDIATELY save the prompt text to a temporary file named 'prompt.txt' in the working directory. "
 985 |                     "DO NOT attempt to shorten or modify the prompt. SAVE IT AS-IS to 'prompt.txt'. "
 986 |                     "Then resend the request, passing the absolute file path to 'prompt.txt' as part of the tool call, "
 987 |                     "along with any other files you wish to share as context. Leave the prompt text itself empty or very brief in the new request. "
 988 |                     "This is the ONLY way to handle large prompts - you MUST follow these exact steps."
 989 |                 ),
 990 |                 "content_type": "text",
 991 |                 "metadata": {
 992 |                     "prompt_size": len(text),
 993 |                     "limit": MCP_PROMPT_SIZE_LIMIT,
 994 |                     "instructions": "MANDATORY: Save prompt to 'prompt.txt' in current folder and provide full path when recalling this tool.",
 995 |                 },
 996 |             }
 997 |         return None
 998 | 
 999 |     def _prepare_file_content_for_prompt(
1000 |         self,
1001 |         request_files: list[str],
1002 |         continuation_id: Optional[str],
1003 |         context_description: str = "New files",
1004 |         max_tokens: Optional[int] = None,
1005 |         reserve_tokens: int = 1_000,
1006 |         remaining_budget: Optional[int] = None,
1007 |         arguments: Optional[dict] = None,
1008 |         model_context: Optional[Any] = None,
1009 |     ) -> tuple[str, list[str]]:
1010 |         """
1011 |         Centralized file processing implementing dual prioritization strategy.
1012 | 
1013 |         This method is the heart of conversation-aware file processing across all tools.
1014 | 
1015 |         Args:
1016 |             request_files: List of files requested for current tool execution
1017 |             continuation_id: Thread continuation ID, or None for new conversations
1018 |             context_description: Description for token limit validation (e.g. "Code", "New files")
1019 |             max_tokens: Maximum tokens to use (defaults to remaining budget or model-specific content allocation)
1020 |             reserve_tokens: Tokens to reserve for additional prompt content (default 1K)
1021 |             remaining_budget: Remaining token budget after conversation history (from server.py)
1022 |             arguments: Original tool arguments (used to extract _remaining_tokens if available)
1023 |             model_context: Model context object with all model information including token allocation
1024 | 
1025 |         Returns:
1026 |             tuple[str, list[str]]: (formatted_file_content, actually_processed_files)
1027 |                 - formatted_file_content: Formatted file content string ready for prompt inclusion
1028 |                 - actually_processed_files: List of individual file paths that were actually read and embedded
1029 |                   (directories are expanded to individual files)
1030 |         """
1031 |         if not request_files:
1032 |             return "", []
1033 | 
1034 |         # Extract remaining budget from arguments if available
1035 |         if remaining_budget is None:
1036 |             # Use provided arguments or fall back to stored arguments from execute()
1037 |             args_to_use = arguments or getattr(self, "_current_arguments", {})
1038 |             remaining_budget = args_to_use.get("_remaining_tokens")
1039 | 
1040 |         # Use remaining budget if provided, otherwise fall back to max_tokens or model-specific default
1041 |         if remaining_budget is not None:
1042 |             effective_max_tokens = remaining_budget - reserve_tokens
1043 |         elif max_tokens is not None:
1044 |             effective_max_tokens = max_tokens - reserve_tokens
1045 |         else:
1046 |             # Use model_context for token allocation
1047 |             if not model_context:
1048 |                 # Try to get from stored attributes as fallback
1049 |                 model_context = getattr(self, "_model_context", None)
1050 |                 if not model_context:
1051 |                     logger.error(
1052 |                         f"[FILES] {self.name}: _prepare_file_content_for_prompt called without model_context. "
1053 |                         "This indicates an incorrect call sequence in the tool's implementation."
1054 |                     )
1055 |                     raise RuntimeError("Model context not provided for file preparation.")
1056 | 
1057 |             # This is now the single source of truth for token allocation.
1058 |             try:
1059 |                 token_allocation = model_context.calculate_token_allocation()
1060 |                 # Standardize on `file_tokens` for consistency and correctness.
1061 |                 effective_max_tokens = token_allocation.file_tokens - reserve_tokens
1062 |                 logger.debug(
1063 |                     f"[FILES] {self.name}: Using model context for {model_context.model_name}: "
1064 |                     f"{token_allocation.file_tokens:,} file tokens from {token_allocation.total_tokens:,} total"
1065 |                 )
1066 |             except Exception as e:
1067 |                 logger.error(
1068 |                     f"[FILES] {self.name}: Failed to calculate token allocation from model context: {e}", exc_info=True
1069 |                 )
1070 |                 # If the context exists but calculation fails, we still need to prevent a crash.
1071 |                 # A loud error is logged, and we fall back to a safe default.
1072 |                 effective_max_tokens = 100_000 - reserve_tokens
1073 | 
1074 |         # Ensure we have a reasonable minimum budget
1075 |         effective_max_tokens = max(1000, effective_max_tokens)
1076 | 
1077 |         files_to_embed = self.filter_new_files(request_files, continuation_id)
1078 |         logger.debug(f"[FILES] {self.name}: Will embed {len(files_to_embed)} files after filtering")
1079 | 
1080 |         # Log the specific files for debugging/testing
1081 |         if files_to_embed:
1082 |             logger.info(
1083 |                 f"[FILE_PROCESSING] {self.name} tool will embed new files: {', '.join([os.path.basename(f) for f in files_to_embed])}"
1084 |             )
1085 |         else:
1086 |             logger.info(
1087 |                 f"[FILE_PROCESSING] {self.name} tool: No new files to embed (all files already in conversation history)"
1088 |             )
1089 | 
1090 |         content_parts = []
1091 |         actually_processed_files = []
1092 | 
1093 |         # Read content of new files only
1094 |         if files_to_embed:
1095 |             logger.debug(f"{self.name} tool embedding {len(files_to_embed)} new files: {', '.join(files_to_embed)}")
1096 |             logger.debug(
1097 |                 f"[FILES] {self.name}: Starting file embedding with token budget {effective_max_tokens + reserve_tokens:,}"
1098 |             )
1099 |             try:
1100 |                 # Before calling read_files, expand directories to get individual file paths
1101 |                 from utils.file_utils import expand_paths
1102 | 
1103 |                 expanded_files = expand_paths(files_to_embed)
1104 |                 logger.debug(
1105 |                     f"[FILES] {self.name}: Expanded {len(files_to_embed)} paths to {len(expanded_files)} individual files"
1106 |                 )
1107 | 
1108 |                 file_content = read_files(
1109 |                     files_to_embed,
1110 |                     max_tokens=effective_max_tokens + reserve_tokens,
1111 |                     reserve_tokens=reserve_tokens,
1112 |                     include_line_numbers=self.wants_line_numbers_by_default(),
1113 |                 )
1114 |                 # Note: No need to validate against MCP_PROMPT_SIZE_LIMIT here
1115 |                 # read_files already handles token-aware truncation based on model's capabilities
1116 |                 content_parts.append(file_content)
1117 | 
1118 |                 # Track the expanded files as actually processed
1119 |                 actually_processed_files.extend(expanded_files)
1120 | 
1121 |                 # Estimate tokens for debug logging
1122 |                 from utils.token_utils import estimate_tokens
1123 | 
1124 |                 content_tokens = estimate_tokens(file_content)
1125 |                 logger.debug(
1126 |                     f"{self.name} tool successfully embedded {len(files_to_embed)} files ({content_tokens:,} tokens)"
1127 |                 )
1128 |                 logger.debug(f"[FILES] {self.name}: Successfully embedded files - {content_tokens:,} tokens used")
1129 |                 logger.debug(
1130 |                     f"[FILES] {self.name}: Actually processed {len(actually_processed_files)} individual files"
1131 |                 )
1132 |             except Exception as e:
1133 |                 logger.error(f"{self.name} tool failed to embed files {files_to_embed}: {type(e).__name__}: {e}")
1134 |                 logger.debug(f"[FILES] {self.name}: File embedding failed - {type(e).__name__}: {e}")
1135 |                 raise
1136 |         else:
1137 |             logger.debug(f"[FILES] {self.name}: No files to embed after filtering")
1138 | 
1139 |         # Generate note about files already in conversation history
1140 |         if continuation_id and len(files_to_embed) < len(request_files):
1141 |             embedded_files = self.get_conversation_embedded_files(continuation_id)
1142 |             skipped_files = [f for f in request_files if f in embedded_files]
1143 |             if skipped_files:
1144 |                 logger.debug(
1145 |                     f"{self.name} tool skipping {len(skipped_files)} files already in conversation history: {', '.join(skipped_files)}"
1146 |                 )
1147 |                 logger.debug(f"[FILES] {self.name}: Adding note about {len(skipped_files)} skipped files")
1148 |                 if content_parts:
1149 |                     content_parts.append("\n\n")
1150 |                 note_lines = [
1151 |                     "--- NOTE: Additional files referenced in conversation history ---",
1152 |                     "The following files are already available in our conversation context:",
1153 |                     "\n".join(f"  - {f}" for f in skipped_files),
1154 |                     "--- END NOTE ---",
1155 |                 ]
1156 |                 content_parts.append("\n".join(note_lines))
1157 |             else:
1158 |                 logger.debug(f"[FILES] {self.name}: No skipped files to note")
1159 | 
1160 |         result = "".join(content_parts) if content_parts else ""
1161 |         logger.debug(
1162 |             f"[FILES] {self.name}: _prepare_file_content_for_prompt returning {len(result)} chars, {len(actually_processed_files)} processed files"
1163 |         )
1164 |         return result, actually_processed_files
1165 | 
1166 |     def get_websearch_instruction(self, tool_specific: Optional[str] = None) -> str:
1167 |         """
1168 |         Generate standardized web search instruction.
1169 | 
1170 |         Args:
1171 |             tool_specific: Optional tool-specific search guidance
1172 | 
1173 |         Returns:
1174 |             str: Web search instruction to append to prompt
1175 |         """
1176 | 
1177 |         base_instruction = """
1178 | 
1179 | WEB SEARCH CAPABILITY: You can request the calling agent to perform web searches to enhance your analysis with current information!
1180 | 
1181 | IMPORTANT: When you identify areas where web searches would significantly improve your response (such as checking current documentation, finding recent solutions, verifying best practices, or gathering community insights), you MUST explicitly instruct the agent to perform specific web searches and then respond back using the continuation_id from this response to continue the analysis.
1182 | 
1183 | Use clear, direct language based on the value of the search:
1184 | 
1185 | For valuable supplementary information: "Please perform a web search on '[specific topic/query]' and then continue this analysis using the continuation_id from this response if you find relevant information."
1186 | 
1187 | For important missing information: "Please search for '[specific topic/query]' and respond back with the findings using the continuation_id from this response - this information is needed to provide a complete analysis."
1188 | 
1189 | For critical/essential information: "SEARCH REQUIRED: Please immediately perform a web search on '[specific topic/query]' and respond back with the results using the continuation_id from this response. Cannot provide accurate analysis without this current information."
1190 | 
1191 | This ensures you get the most current and comprehensive information while maintaining conversation context through the continuation_id."""
1192 | 
1193 |         if tool_specific:
1194 |             return f"""{base_instruction}
1195 | 
1196 | {tool_specific}
1197 | 
1198 | When recommending searches, be specific about what information you need and why it would improve your analysis."""
1199 | 
1200 |         # Default instruction for all tools
1201 |         return f"""{base_instruction}
1202 | 
1203 | Consider requesting searches for:
1204 | - Current documentation and API references
1205 | - Recent best practices and patterns
1206 | - Known issues and community solutions
1207 | - Framework updates and compatibility
1208 | - Security advisories and patches
1209 | - Performance benchmarks and optimizations
1210 | 
1211 | When recommending searches, be specific about what information you need and why it would improve your analysis. Always remember to instruct agent to use the continuation_id from this response when providing search results."""
1212 | 
1213 |     def get_language_instruction(self) -> str:
1214 |         """
1215 |         Generate language instruction based on LOCALE configuration.
1216 | 
1217 |         Returns:
1218 |             str: Language instruction to prepend to prompt, or empty string if
1219 |                  no locale set
1220 |         """
1221 |         # Read LOCALE directly from environment to support dynamic changes
1222 |         # Tests can monkeypatch LOCALE via the environment helper (or .env when override is enforced)
1223 | 
1224 |         locale = (get_env("LOCALE", "") or "").strip()
1225 | 
1226 |         if not locale:
1227 |             return ""
1228 | 
1229 |         # Simple language instruction
1230 |         return f"Always respond in {locale}.\n\n"
1231 | 
1232 |     # === ABSTRACT METHODS FOR SIMPLE TOOLS ===
1233 | 
1234 |     @abstractmethod
1235 |     async def prepare_prompt(self, request) -> str:
1236 |         """
1237 |         Prepare the complete prompt for the AI model.
1238 | 
1239 |         This method should construct the full prompt by combining:
1240 |         - System prompt from get_system_prompt()
1241 |         - File content from _prepare_file_content_for_prompt()
1242 |         - Conversation history from reconstruct_thread_context()
1243 |         - User's request and any tool-specific context
1244 | 
1245 |         Args:
1246 |             request: The validated request object
1247 | 
1248 |         Returns:
1249 |             str: Complete prompt ready for the AI model
1250 |         """
1251 |         pass
1252 | 
1253 |     def format_response(self, response: str, request, model_info: dict = None) -> str:
1254 |         """
1255 |         Format the AI model's response for the user.
1256 | 
1257 |         This method allows tools to post-process the model's response,
1258 |         adding structure, validation, or additional context.
1259 | 
1260 |         The default implementation returns the response unchanged.
1261 |         Tools can override this method to add custom formatting.
1262 | 
1263 |         Args:
1264 |             response: Raw response from the AI model
1265 |             request: The original request object
1266 |             model_info: Optional model information and metadata
1267 | 
1268 |         Returns:
1269 |             str: Formatted response ready for the user
1270 |         """
1271 |         return response
1272 | 
1273 |     # === IMPLEMENTATION METHODS ===
1274 |     # These will be provided in a full implementation but are inherited from current base.py
1275 |     # for now to maintain compatibility.
1276 | 
1277 |     async def execute(self, arguments: dict[str, Any]) -> list[TextContent]:
1278 |         """Execute the tool - will be inherited from existing base.py for now."""
1279 |         # This will be implemented by importing from the current base.py
1280 |         # for backward compatibility during the migration
1281 |         raise NotImplementedError("Subclasses must implement execute method")
1282 | 
1283 |     def _should_require_model_selection(self, model_name: str) -> bool:
1284 |         """
1285 |         Check if we should require the CLI to select a model at runtime.
1286 | 
1287 |         This is called during request execution to determine if we need
1288 |         to return an error asking the CLI to provide a model parameter.
1289 | 
1290 |         Args:
1291 |             model_name: The model name from the request or DEFAULT_MODEL
1292 | 
1293 |         Returns:
1294 |             bool: True if we should require model selection
1295 |         """
1296 |         # Case 1: Model is explicitly "auto"
1297 |         if model_name.lower() == "auto":
1298 |             return True
1299 | 
1300 |         # Case 2: Requested model is not available
1301 |         from providers.registry import ModelProviderRegistry
1302 | 
1303 |         provider = ModelProviderRegistry.get_provider_for_model(model_name)
1304 |         if not provider:
1305 |             logger.warning(f"Model '{model_name}' is not available with current API keys. Requiring model selection.")
1306 |             return True
1307 | 
1308 |         return False
1309 | 
1310 |     def _get_available_models(self) -> list[str]:
1311 |         """
1312 |         Get list of models available from enabled providers.
1313 | 
1314 |         Only returns models from providers that have valid API keys configured.
1315 |         This fixes the namespace collision bug where models from disabled providers
1316 |         were shown to the CLI, causing routing conflicts.
1317 | 
1318 |         Returns:
1319 |             List of model names from enabled providers only
1320 |         """
1321 |         from providers.registry import ModelProviderRegistry
1322 | 
1323 |         # Get models from enabled providers only (those with valid API keys)
1324 |         all_models = ModelProviderRegistry.get_available_model_names()
1325 | 
1326 |         # Add OpenRouter models and their aliases when OpenRouter is configured
1327 |         openrouter_key = get_env("OPENROUTER_API_KEY")
1328 |         if openrouter_key and openrouter_key != "your_openrouter_api_key_here":
1329 |             try:
1330 |                 registry = self._get_openrouter_registry()
1331 | 
1332 |                 for alias in registry.list_aliases():
1333 |                     if alias not in all_models:
1334 |                         all_models.append(alias)
1335 |             except Exception as exc:  # pragma: no cover - logged for observability
1336 |                 import logging
1337 | 
1338 |                 logging.debug(f"Failed to add OpenRouter models to enum: {exc}")
1339 | 
1340 |         # Add custom models (and their aliases) when a custom endpoint is available
1341 |         custom_url = get_env("CUSTOM_API_URL")
1342 |         if custom_url:
1343 |             try:
1344 |                 registry = self._get_custom_registry()
1345 |                 for alias in registry.list_aliases():
1346 |                     if alias not in all_models:
1347 |                         all_models.append(alias)
1348 |             except Exception as exc:  # pragma: no cover - logged for observability
1349 |                 import logging
1350 | 
1351 |                 logging.debug(f"Failed to add custom models to enum: {exc}")
1352 | 
1353 |         # Remove duplicates while preserving insertion order
1354 |         seen: set[str] = set()
1355 |         unique_models: list[str] = []
1356 |         for model in all_models:
1357 |             if model not in seen:
1358 |                 seen.add(model)
1359 |                 unique_models.append(model)
1360 | 
1361 |         return unique_models
1362 | 
1363 |     def _resolve_model_context(self, arguments: dict, request) -> tuple[str, Any]:
1364 |         """
1365 |         Resolve model context and name using centralized logic.
1366 | 
1367 |         This method extracts the model resolution logic from execute() so it can be
1368 |         reused by tools that override execute() (like debug tool) without duplicating code.
1369 | 
1370 |         Args:
1371 |             arguments: Dictionary of arguments from the MCP client
1372 |             request: The validated request object
1373 | 
1374 |         Returns:
1375 |             tuple[str, ModelContext]: (resolved_model_name, model_context)
1376 | 
1377 |         Raises:
1378 |             ValueError: If model resolution fails or model selection is required
1379 |         """
1380 |         # MODEL RESOLUTION NOW HAPPENS AT MCP BOUNDARY
1381 |         # Extract pre-resolved model context from server.py
1382 |         model_context = arguments.get("_model_context")
1383 |         resolved_model_name = arguments.get("_resolved_model_name")
1384 | 
1385 |         if model_context and resolved_model_name:
1386 |             # Model was already resolved at MCP boundary
1387 |             model_name = resolved_model_name
1388 |             logger.debug(f"Using pre-resolved model '{model_name}' from MCP boundary")
1389 |         else:
1390 |             # Fallback for direct execute calls
1391 |             model_name = getattr(request, "model", None)
1392 |             if not model_name:
1393 |                 from config import DEFAULT_MODEL
1394 | 
1395 |                 model_name = DEFAULT_MODEL
1396 |             logger.debug(f"Using fallback model resolution for '{model_name}' (test mode)")
1397 | 
1398 |             # For tests: Check if we should require model selection (auto mode)
1399 |             if self._should_require_model_selection(model_name):
1400 |                 # Build error message based on why selection is required
1401 |                 if model_name.lower() == "auto":
1402 |                     error_message = self._build_auto_mode_required_message()
1403 |                 else:
1404 |                     error_message = self._build_model_unavailable_message(model_name)
1405 |                 raise ValueError(error_message)
1406 | 
1407 |             # Create model context for tests
1408 |             from utils.model_context import ModelContext
1409 | 
1410 |             model_context = ModelContext(model_name)
1411 | 
1412 |         return model_name, model_context
1413 | 
1414 |     def validate_and_correct_temperature(self, temperature: float, model_context: Any) -> tuple[float, list[str]]:
1415 |         """
1416 |         Validate and correct temperature for the specified model.
1417 | 
1418 |         This method ensures that the temperature value is within the valid range
1419 |         for the specific model being used. Different models have different temperature
1420 |         constraints (e.g., o1 models require temperature=1.0, GPT models support 0-2).
1421 | 
1422 |         Args:
1423 |             temperature: Temperature value to validate
1424 |             model_context: Model context object containing model name, provider, and capabilities
1425 | 
1426 |         Returns:
1427 |             Tuple of (corrected_temperature, warning_messages)
1428 |         """
1429 |         try:
1430 |             # Use model context capabilities directly - clean OOP approach
1431 |             capabilities = model_context.capabilities
1432 |             constraint = capabilities.temperature_constraint
1433 | 
1434 |             warnings = []
1435 |             if not constraint.validate(temperature):
1436 |                 corrected = constraint.get_corrected_value(temperature)
1437 |                 warning = (
1438 |                     f"Temperature {temperature} invalid for {model_context.model_name}. "
1439 |                     f"{constraint.get_description()}. Using {corrected} instead."
1440 |                 )
1441 |                 warnings.append(warning)
1442 |                 return corrected, warnings
1443 | 
1444 |             return temperature, warnings
1445 | 
1446 |         except Exception as e:
1447 |             # If validation fails for any reason, use the original temperature
1448 |             # and log a warning (but don't fail the request)
1449 |             logger.warning(f"Temperature validation failed for {model_context.model_name}: {e}")
1450 |             return temperature, [f"Temperature validation failed: {e}"]
1451 | 
1452 |     def _validate_image_limits(
1453 |         self, images: Optional[list[str]], model_context: Optional[Any] = None, continuation_id: Optional[str] = None
1454 |     ) -> Optional[dict]:
1455 |         """
1456 |         Validate image size and count against model capabilities.
1457 | 
1458 |         This performs strict validation to ensure we don't exceed model-specific
1459 |         image limits. Uses capability-based validation with actual model
1460 |         configuration rather than hard-coded limits.
1461 | 
1462 |         Args:
1463 |             images: List of image paths/data URLs to validate
1464 |             model_context: Model context object containing model name, provider, and capabilities
1465 |             continuation_id: Optional continuation ID for conversation context
1466 | 
1467 |         Returns:
1468 |             Optional[dict]: Error response if validation fails, None if valid
1469 |         """
1470 |         if not images:
1471 |             return None
1472 | 
1473 |         # Import here to avoid circular imports
1474 |         import base64
1475 |         from pathlib import Path
1476 | 
1477 |         if not model_context:
1478 |             # Get from tool's stored context as fallback
1479 |             model_context = getattr(self, "_model_context", None)
1480 |             if not model_context:
1481 |                 logger.warning("No model context available for image validation")
1482 |                 return None
1483 | 
1484 |         try:
1485 |             # Use model context capabilities directly - clean OOP approach
1486 |             capabilities = model_context.capabilities
1487 |             model_name = model_context.model_name
1488 |         except Exception as e:
1489 |             logger.warning(f"Failed to get capabilities from model_context for image validation: {e}")
1490 |             # Generic error response when capabilities cannot be accessed
1491 |             model_name = getattr(model_context, "model_name", "unknown")
1492 |             return {
1493 |                 "status": "error",
1494 |                 "content": self._build_model_unavailable_message(model_name),
1495 |                 "content_type": "text",
1496 |                 "metadata": {
1497 |                     "error_type": "validation_error",
1498 |                     "model_name": model_name,
1499 |                     "supports_images": None,  # Unknown since model capabilities unavailable
1500 |                     "image_count": len(images) if images else 0,
1501 |                 },
1502 |             }
1503 | 
1504 |         # Check if model supports images
1505 |         if not capabilities.supports_images:
1506 |             return {
1507 |                 "status": "error",
1508 |                 "content": (
1509 |                     f"Image support not available: Model '{model_name}' does not support image processing. "
1510 |                     f"Please use a vision-capable model such as 'gemini-2.5-flash', 'o3', "
1511 |                     f"or 'claude-opus-4.1' for image analysis tasks."
1512 |                 ),
1513 |                 "content_type": "text",
1514 |                 "metadata": {
1515 |                     "error_type": "validation_error",
1516 |                     "model_name": model_name,
1517 |                     "supports_images": False,
1518 |                     "image_count": len(images),
1519 |                 },
1520 |             }
1521 | 
1522 |         # Get model image limits from capabilities
1523 |         max_images = 5  # Default max number of images
1524 |         max_size_mb = capabilities.max_image_size_mb
1525 | 
1526 |         # Check image count
1527 |         if len(images) > max_images:
1528 |             return {
1529 |                 "status": "error",
1530 |                 "content": (
1531 |                     f"Too many images: Model '{model_name}' supports a maximum of {max_images} images, "
1532 |                     f"but {len(images)} were provided. Please reduce the number of images."
1533 |                 ),
1534 |                 "content_type": "text",
1535 |                 "metadata": {
1536 |                     "error_type": "validation_error",
1537 |                     "model_name": model_name,
1538 |                     "image_count": len(images),
1539 |                     "max_images": max_images,
1540 |                 },
1541 |             }
1542 | 
1543 |         # Calculate total size of all images
1544 |         total_size_mb = 0.0
1545 |         for image_path in images:
1546 |             try:
1547 |                 if image_path.startswith("data:image/"):
1548 |                     # Handle data URL: data:image/png;base64,iVBORw0...
1549 |                     _, data = image_path.split(",", 1)
1550 |                     # Base64 encoding increases size by ~33%, so decode to get actual size
1551 |                     actual_size = len(base64.b64decode(data))
1552 |                     total_size_mb += actual_size / (1024 * 1024)
1553 |                 else:
1554 |                     # Handle file path
1555 |                     path = Path(image_path)
1556 |                     if path.exists():
1557 |                         file_size = path.stat().st_size
1558 |                         total_size_mb += file_size / (1024 * 1024)
1559 |                     else:
1560 |                         logger.warning(f"Image file not found: {image_path}")
1561 |                         # Assume a reasonable size for missing files to avoid breaking validation
1562 |                         total_size_mb += 1.0  # 1MB assumption
1563 |             except Exception as e:
1564 |                 logger.warning(f"Failed to get size for image {image_path}: {e}")
1565 |                 # Assume a reasonable size for problematic files
1566 |                 total_size_mb += 1.0  # 1MB assumption
1567 | 
1568 |         # Apply 40MB cap for custom models if needed
1569 |         effective_limit_mb = max_size_mb
1570 |         try:
1571 |             from providers.shared import ProviderType
1572 | 
1573 |             # ModelCapabilities dataclass has provider field defined
1574 |             if capabilities.provider == ProviderType.CUSTOM:
1575 |                 effective_limit_mb = min(max_size_mb, 40.0)
1576 |         except Exception:
1577 |             pass
1578 | 
1579 |         # Validate against size limit
1580 |         if total_size_mb > effective_limit_mb:
1581 |             return {
1582 |                 "status": "error",
1583 |                 "content": (
1584 |                     f"Image size limit exceeded: Model '{model_name}' supports maximum {effective_limit_mb:.1f}MB "
1585 |                     f"for all images combined, but {total_size_mb:.1f}MB was provided. "
1586 |                     f"Please reduce image sizes or count and try again."
1587 |                 ),
1588 |                 "content_type": "text",
1589 |                 "metadata": {
1590 |                     "error_type": "validation_error",
1591 |                     "model_name": model_name,
1592 |                     "total_size_mb": round(total_size_mb, 2),
1593 |                     "limit_mb": round(effective_limit_mb, 2),
1594 |                     "image_count": len(images),
1595 |                     "supports_images": True,
1596 |                 },
1597 |             }
1598 | 
1599 |         # All validations passed
1600 |         logger.debug(f"Image validation passed: {len(images)} images, {total_size_mb:.1f}MB total")
1601 |         return None
1602 | 
1603 |     def _parse_response(self, raw_text: str, request, model_info: Optional[dict] = None):
1604 |         """Parse response - will be inherited for now."""
1605 |         # Implementation inherited from current base.py
1606 |         raise NotImplementedError("Subclasses must implement _parse_response method")
1607 | 
```

--------------------------------------------------------------------------------
/tools/workflow/workflow_mixin.py:
--------------------------------------------------------------------------------

```python
   1 | """
   2 | Workflow Mixin for Zen MCP Tools
   3 | 
   4 | This module provides a sophisticated workflow-based pattern that enables tools to
   5 | perform multi-step work with structured findings and expert analysis.
   6 | 
   7 | Key Components:
   8 | - BaseWorkflowMixin: Abstract base class providing comprehensive workflow functionality
   9 | 
  10 | The workflow pattern enables tools like debug, precommit, and codereview to perform
  11 | systematic multi-step work with pause/resume capabilities, context-aware file embedding,
  12 | and seamless integration with external AI models for expert analysis.
  13 | 
  14 | Features:
  15 | - Multi-step workflow orchestration with pause/resume
  16 | - Context-aware file embedding optimization
  17 | - Expert analysis integration with token budgeting
  18 | - Conversation memory and threading support
  19 | - Proper inheritance-based architecture (no hasattr/getattr)
  20 | - Comprehensive type annotations for IDE support
  21 | """
  22 | 
  23 | import json
  24 | import logging
  25 | import os
  26 | import re
  27 | from abc import ABC, abstractmethod
  28 | from typing import Any, Optional
  29 | 
  30 | from mcp.types import TextContent
  31 | 
  32 | from config import MCP_PROMPT_SIZE_LIMIT
  33 | from utils.conversation_memory import add_turn, create_thread
  34 | 
  35 | from ..shared.base_models import ConsolidatedFindings
  36 | from ..shared.exceptions import ToolExecutionError
  37 | 
  38 | logger = logging.getLogger(__name__)
  39 | 
  40 | 
  41 | class BaseWorkflowMixin(ABC):
  42 |     """
  43 |     Abstract base class providing guided workflow functionality for tools.
  44 | 
  45 |     This class implements a sophisticated workflow pattern where the CLI performs
  46 |     systematic local work before calling external models for expert analysis.
  47 |     Tools can inherit from this class to gain comprehensive workflow capabilities.
  48 | 
  49 |     Architecture:
  50 |     - Uses proper inheritance patterns instead of hasattr/getattr
  51 |     - Provides hook methods with default implementations
  52 |     - Requires abstract methods to be implemented by subclasses
  53 |     - Fully type-annotated for excellent IDE support
  54 | 
  55 |     Context-Aware File Embedding:
  56 |     - Intermediate steps: Only reference file names (saves the CLI's context)
  57 |     - Final steps: Embed full file content for expert analysis
  58 |     - Integrates with existing token budgeting infrastructure
  59 | 
  60 |     Requirements:
  61 |     This class expects to be used with BaseTool and requires implementation of:
  62 |     - get_model_provider(model_name)
  63 |     - _resolve_model_context(arguments, request)
  64 |     - get_system_prompt()
  65 |     - get_default_temperature()
  66 |     - _prepare_file_content_for_prompt()
  67 |     """
  68 | 
  69 |     def __init__(self) -> None:
  70 |         super().__init__()
  71 |         self.work_history: list[dict[str, Any]] = []
  72 |         self.consolidated_findings: ConsolidatedFindings = ConsolidatedFindings()
  73 |         self.initial_request: Optional[str] = None
  74 | 
  75 |     # ================================================================================
  76 |     # Abstract Methods - Required Implementation by BaseTool or Subclasses
  77 |     # ================================================================================
  78 | 
  79 |     @abstractmethod
  80 |     def get_name(self) -> str:
  81 |         """Return the name of this tool. Usually provided by BaseTool."""
  82 |         pass
  83 | 
  84 |     @abstractmethod
  85 |     def get_workflow_request_model(self) -> type:
  86 |         """Return the request model class for this workflow tool."""
  87 |         pass
  88 | 
  89 |     @abstractmethod
  90 |     def get_system_prompt(self) -> str:
  91 |         """Return the system prompt for this tool. Usually provided by BaseTool."""
  92 |         pass
  93 | 
  94 |     @abstractmethod
  95 |     def get_language_instruction(self) -> str:
  96 |         """Return the language instruction for localization. Usually provided by BaseTool."""
  97 |         pass
  98 | 
  99 |     @abstractmethod
 100 |     def get_default_temperature(self) -> float:
 101 |         """Return the default temperature for this tool. Usually provided by BaseTool."""
 102 |         pass
 103 | 
 104 |     @abstractmethod
 105 |     def get_model_provider(self, model_name: str) -> Any:
 106 |         """Get model provider for the given model. Usually provided by BaseTool."""
 107 |         pass
 108 | 
 109 |     @abstractmethod
 110 |     def _resolve_model_context(self, arguments: dict[str, Any], request: Any) -> tuple[str, Any]:
 111 |         """Resolve model context from arguments. Usually provided by BaseTool."""
 112 |         pass
 113 | 
 114 |     @abstractmethod
 115 |     def _prepare_file_content_for_prompt(
 116 |         self,
 117 |         request_files: list[str],
 118 |         continuation_id: Optional[str],
 119 |         context_description: str = "New files",
 120 |         max_tokens: Optional[int] = None,
 121 |         reserve_tokens: int = 1_000,
 122 |         remaining_budget: Optional[int] = None,
 123 |         arguments: Optional[dict[str, Any]] = None,
 124 |         model_context: Optional[Any] = None,
 125 |     ) -> tuple[str, list[str]]:
 126 |         """Prepare file content for prompts. Usually provided by BaseTool."""
 127 |         pass
 128 | 
 129 |     # ================================================================================
 130 |     # Abstract Methods - Tool-Specific Implementation Required
 131 |     # ================================================================================
 132 | 
 133 |     @abstractmethod
 134 |     def get_work_steps(self, request: Any) -> list[str]:
 135 |         """Define tool-specific work steps and criteria"""
 136 |         pass
 137 | 
 138 |     @abstractmethod
 139 |     def get_required_actions(
 140 |         self, step_number: int, confidence: str, findings: str, total_steps: int, request=None
 141 |     ) -> list[str]:
 142 |         """Define required actions for each work phase.
 143 | 
 144 |         Args:
 145 |             step_number: Current step (1-based)
 146 |             confidence: Current confidence level (exploring, low, medium, high, certain)
 147 |             findings: Current findings text
 148 |             total_steps: Total estimated steps for this work
 149 |             request: Optional request object for continuation-aware decisions
 150 | 
 151 |         Returns:
 152 |             List of specific actions the CLI should take before calling tool again
 153 |         """
 154 |         pass
 155 | 
 156 |     # ================================================================================
 157 |     # Hook Methods - Default Implementations with Override Capability
 158 |     # ================================================================================
 159 | 
 160 |     def should_call_expert_analysis(self, consolidated_findings: ConsolidatedFindings, request=None) -> bool:
 161 |         """
 162 |         Decide when to call external model based on tool-specific criteria.
 163 | 
 164 |         Default implementation for tools that don't use expert analysis.
 165 |         Override this for tools that do use expert analysis.
 166 | 
 167 |         Args:
 168 |             consolidated_findings: Findings from workflow steps
 169 |             request: Current request object (optional for backwards compatibility)
 170 |         """
 171 |         if not self.requires_expert_analysis():
 172 |             return False
 173 | 
 174 |         # Check if user requested to skip assistant model
 175 |         if request and not self.get_request_use_assistant_model(request):
 176 |             return False
 177 | 
 178 |         # Default logic for tools that support expert analysis
 179 |         return (
 180 |             len(consolidated_findings.relevant_files) > 0
 181 |             or len(consolidated_findings.findings) >= 2
 182 |             or len(consolidated_findings.issues_found) > 0
 183 |         )
 184 | 
 185 |     def prepare_expert_analysis_context(self, consolidated_findings: ConsolidatedFindings) -> str:
 186 |         """
 187 |         Prepare context for external model call.
 188 | 
 189 |         Default implementation for tools that don't use expert analysis.
 190 |         Override this for tools that do use expert analysis.
 191 |         """
 192 |         if not self.requires_expert_analysis():
 193 |             return ""
 194 | 
 195 |         # Default context preparation
 196 |         context_parts = [
 197 |             f"=== {self.get_name().upper()} WORK SUMMARY ===",
 198 |             f"Total steps: {len(consolidated_findings.findings)}",
 199 |             f"Files examined: {len(consolidated_findings.files_checked)}",
 200 |             f"Relevant files: {len(consolidated_findings.relevant_files)}",
 201 |             "",
 202 |             "=== WORK PROGRESSION ===",
 203 |         ]
 204 | 
 205 |         for finding in consolidated_findings.findings:
 206 |             context_parts.append(finding)
 207 | 
 208 |         return "\n".join(context_parts)
 209 | 
 210 |     def requires_expert_analysis(self) -> bool:
 211 |         """
 212 |         Override this to completely disable expert analysis for the tool.
 213 | 
 214 |         Returns True if the tool supports expert analysis (default).
 215 |         Returns False if the tool is self-contained (like planner).
 216 |         """
 217 |         return True
 218 | 
 219 |     def should_include_files_in_expert_prompt(self) -> bool:
 220 |         """
 221 |         Whether to include file content in the expert analysis prompt.
 222 |         Override this to return True if your tool needs files in the prompt.
 223 |         """
 224 |         return False
 225 | 
 226 |     def should_embed_system_prompt(self) -> bool:
 227 |         """
 228 |         Whether to embed the system prompt in the main prompt.
 229 |         Override this to return True if your tool needs the system prompt embedded.
 230 |         """
 231 |         return False
 232 | 
 233 |     def get_expert_thinking_mode(self) -> str:
 234 |         """
 235 |         Get the thinking mode for expert analysis.
 236 |         Override this to customize the thinking mode.
 237 |         """
 238 |         return "high"
 239 | 
 240 |     def get_request_temperature(self, request) -> float:
 241 |         """Get temperature from request. Override for custom temperature handling."""
 242 |         try:
 243 |             return request.temperature if request.temperature is not None else self.get_default_temperature()
 244 |         except AttributeError:
 245 |             return self.get_default_temperature()
 246 | 
 247 |     def get_validated_temperature(self, request, model_context: Any) -> tuple[float, list[str]]:
 248 |         """
 249 |         Get temperature from request and validate it against model constraints.
 250 | 
 251 |         This is a convenience method that combines temperature extraction and validation
 252 |         for workflow tools. It ensures temperature is within valid range for the model.
 253 | 
 254 |         Args:
 255 |             request: The request object containing temperature
 256 |             model_context: Model context object containing model info
 257 | 
 258 |         Returns:
 259 |             Tuple of (validated_temperature, warning_messages)
 260 |         """
 261 |         temperature = self.get_request_temperature(request)
 262 |         return self.validate_and_correct_temperature(temperature, model_context)
 263 | 
 264 |     def get_request_thinking_mode(self, request) -> str:
 265 |         """Get thinking mode from request. Override for custom thinking mode handling."""
 266 |         try:
 267 |             return request.thinking_mode if request.thinking_mode is not None else self.get_expert_thinking_mode()
 268 |         except AttributeError:
 269 |             return self.get_expert_thinking_mode()
 270 | 
 271 |     def get_expert_analysis_instruction(self) -> str:
 272 |         """
 273 |         Get the instruction to append after the expert context.
 274 |         Override this to provide tool-specific instructions.
 275 |         """
 276 |         return "Please provide expert analysis based on the investigation findings."
 277 | 
 278 |     def get_request_use_assistant_model(self, request) -> bool:
 279 |         """
 280 |         Get use_assistant_model from request. Override for custom assistant model handling.
 281 | 
 282 |         Args:
 283 |             request: Current request object
 284 | 
 285 |         Returns:
 286 |             True if assistant model should be used, False otherwise
 287 |         """
 288 |         try:
 289 |             return request.use_assistant_model if request.use_assistant_model is not None else True
 290 |         except AttributeError:
 291 |             return True
 292 | 
 293 |     def get_step_guidance_message(self, request) -> str:
 294 |         """
 295 |         Get step guidance message. Override for tool-specific guidance.
 296 |         Default implementation uses required actions.
 297 |         """
 298 |         required_actions = self.get_required_actions(
 299 |             request.step_number, self.get_request_confidence(request), request.findings, request.total_steps, request
 300 |         )
 301 | 
 302 |         next_step_number = request.step_number + 1
 303 |         return (
 304 |             f"MANDATORY: DO NOT call the {self.get_name()} tool again immediately. "
 305 |             f"You MUST first work using appropriate tools. "
 306 |             f"REQUIRED ACTIONS before calling {self.get_name()} step {next_step_number}:\n"
 307 |             + "\n".join(f"{i + 1}. {action}" for i, action in enumerate(required_actions))
 308 |             + f"\n\nOnly call {self.get_name()} again with step_number: {next_step_number} "
 309 |             f"AFTER completing this work."
 310 |         )
 311 | 
 312 |     def _prepare_files_for_expert_analysis(self) -> str:
 313 |         """
 314 |         Prepare file content for expert analysis.
 315 | 
 316 |         EXPERT ANALYSIS REQUIRES ACTUAL FILE CONTENT:
 317 |         Expert analysis needs actual file content of all unique files marked as relevant
 318 |         throughout the workflow, regardless of conversation history optimization.
 319 | 
 320 |         SIMPLIFIED LOGIC:
 321 |         Expert analysis gets all unique files from relevant_files across the entire workflow.
 322 |         This includes:
 323 |         - Current step's relevant_files (consolidated_findings.relevant_files)
 324 |         - Plus any additional relevant_files from conversation history (if continued workflow)
 325 | 
 326 |         This ensures expert analysis has complete context without including irrelevant files.
 327 |         """
 328 |         all_relevant_files = set()
 329 | 
 330 |         # 1. Get files from current consolidated relevant_files
 331 |         all_relevant_files.update(self.consolidated_findings.relevant_files)
 332 | 
 333 |         # 2. Get additional relevant_files from conversation history (if continued workflow)
 334 |         try:
 335 |             current_arguments = self.get_current_arguments()
 336 |             if current_arguments:
 337 |                 continuation_id = current_arguments.get("continuation_id")
 338 | 
 339 |                 if continuation_id:
 340 |                     from utils.conversation_memory import get_conversation_file_list, get_thread
 341 | 
 342 |                     thread_context = get_thread(continuation_id)
 343 |                     if thread_context:
 344 |                         # Get all files from conversation (these were relevant_files in previous steps)
 345 |                         conversation_files = get_conversation_file_list(thread_context)
 346 |                         all_relevant_files.update(conversation_files)
 347 |                         logger.debug(
 348 |                             f"[WORKFLOW_FILES] {self.get_name()}: Added {len(conversation_files)} files from conversation history"
 349 |                         )
 350 |         except Exception as e:
 351 |             logger.warning(f"[WORKFLOW_FILES] {self.get_name()}: Could not get conversation files: {e}")
 352 | 
 353 |         # Convert to list and remove any empty/None values
 354 |         files_for_expert = [f for f in all_relevant_files if f and f.strip()]
 355 | 
 356 |         if not files_for_expert:
 357 |             logger.debug(f"[WORKFLOW_FILES] {self.get_name()}: No relevant files found for expert analysis")
 358 |             return ""
 359 | 
 360 |         # Expert analysis needs actual file content, bypassing conversation optimization
 361 |         try:
 362 |             file_content, processed_files = self._force_embed_files_for_expert_analysis(files_for_expert)
 363 | 
 364 |             logger.info(
 365 |                 f"[WORKFLOW_FILES] {self.get_name()}: Prepared {len(processed_files)} unique relevant files for expert analysis "
 366 |                 f"(from {len(self.consolidated_findings.relevant_files)} current relevant files)"
 367 |             )
 368 | 
 369 |             return file_content
 370 | 
 371 |         except Exception as e:
 372 |             logger.error(f"[WORKFLOW_FILES] {self.get_name()}: Failed to prepare files for expert analysis: {e}")
 373 |             return ""
 374 | 
 375 |     def _force_embed_files_for_expert_analysis(self, files: list[str]) -> tuple[str, list[str]]:
 376 |         """
 377 |         Force embed files for expert analysis, bypassing conversation history filtering.
 378 | 
 379 |         Expert analysis has different requirements than normal workflow steps:
 380 |         - Normal steps: Optimize tokens by skipping files in conversation history
 381 |         - Expert analysis: Needs actual file content regardless of conversation history
 382 | 
 383 |         Args:
 384 |             files: List of file paths to embed
 385 | 
 386 |         Returns:
 387 |             tuple[str, list[str]]: (file_content, processed_files)
 388 |         """
 389 |         # Use read_files directly with token budgeting, bypassing filter_new_files
 390 |         from utils.file_utils import expand_paths, read_files
 391 | 
 392 |         # Get token budget for files
 393 |         current_model_context = self.get_current_model_context()
 394 |         if current_model_context:
 395 |             try:
 396 |                 token_allocation = current_model_context.calculate_token_allocation()
 397 |                 max_tokens = token_allocation.file_tokens
 398 |                 logger.debug(
 399 |                     f"[WORKFLOW_FILES] {self.get_name()}: Using {max_tokens:,} tokens for expert analysis files"
 400 |                 )
 401 |             except Exception as e:
 402 |                 logger.warning(f"[WORKFLOW_FILES] {self.get_name()}: Failed to get token allocation: {e}")
 403 |                 max_tokens = 100_000  # Fallback
 404 |         else:
 405 |             max_tokens = 100_000  # Fallback
 406 | 
 407 |         # Read files directly without conversation history filtering
 408 |         logger.debug(f"[WORKFLOW_FILES] {self.get_name()}: Force embedding {len(files)} files for expert analysis")
 409 |         file_content = read_files(
 410 |             files,
 411 |             max_tokens=max_tokens,
 412 |             reserve_tokens=1000,
 413 |             include_line_numbers=self.wants_line_numbers_by_default(),
 414 |         )
 415 | 
 416 |         # Expand paths to get individual files for tracking
 417 |         processed_files = expand_paths(files)
 418 | 
 419 |         logger.debug(
 420 |             f"[WORKFLOW_FILES] {self.get_name()}: Expert analysis embedding: {len(processed_files)} files, "
 421 |             f"{len(file_content):,} characters"
 422 |         )
 423 | 
 424 |         return file_content, processed_files
 425 | 
 426 |     def wants_line_numbers_by_default(self) -> bool:
 427 |         """
 428 |         Whether this tool wants line numbers in file content by default.
 429 |         Override this to customize line number behavior.
 430 |         """
 431 |         return True  # Most workflow tools benefit from line numbers for analysis
 432 | 
 433 |     def _add_files_to_expert_context(self, expert_context: str, file_content: str) -> str:
 434 |         """
 435 |         Add file content to the expert context.
 436 |         Override this to customize how files are added to the context.
 437 |         """
 438 |         return f"{expert_context}\n\n=== ESSENTIAL FILES ===\n{file_content}\n=== END ESSENTIAL FILES ==="
 439 | 
 440 |     # ================================================================================
 441 |     # Context-Aware File Embedding - Core Implementation
 442 |     # ================================================================================
 443 | 
 444 |     def _handle_workflow_file_context(self, request: Any, arguments: dict[str, Any]) -> None:
 445 |         """
 446 |         Handle file context appropriately based on workflow phase.
 447 | 
 448 |         CONTEXT-AWARE FILE EMBEDDING STRATEGY:
 449 |         1. Intermediate steps + continuation: Only reference file names (save the CLI's context)
 450 |         2. Final step: Embed full file content for expert analysis
 451 |         3. Expert analysis: Always embed relevant files with token budgeting
 452 | 
 453 |         This prevents wasting the CLI's limited context on intermediate steps while ensuring
 454 |         the final expert analysis has complete file context.
 455 |         """
 456 |         continuation_id = self.get_request_continuation_id(request)
 457 |         is_final_step = not self.get_request_next_step_required(request)
 458 |         step_number = self.get_request_step_number(request)
 459 | 
 460 |         # Extract model context for token budgeting
 461 |         model_context = arguments.get("_model_context")
 462 |         self._model_context = model_context
 463 | 
 464 |         # Clear any previous file context to ensure clean state
 465 |         self._embedded_file_content = ""
 466 |         self._file_reference_note = ""
 467 |         self._actually_processed_files = []
 468 | 
 469 |         # Determine if we should embed files or just reference them
 470 |         should_embed_files = self._should_embed_files_in_workflow_step(step_number, continuation_id, is_final_step)
 471 | 
 472 |         if should_embed_files:
 473 |             # Final step or expert analysis - embed full file content
 474 |             logger.debug(f"[WORKFLOW_FILES] {self.get_name()}: Embedding files for final step/expert analysis")
 475 |             self._embed_workflow_files(request, arguments)
 476 |         else:
 477 |             # Intermediate step with continuation - only reference file names
 478 |             logger.debug(f"[WORKFLOW_FILES] {self.get_name()}: Only referencing file names for intermediate step")
 479 |             self._reference_workflow_files(request)
 480 | 
 481 |     def _should_embed_files_in_workflow_step(
 482 |         self, step_number: int, continuation_id: Optional[str], is_final_step: bool
 483 |     ) -> bool:
 484 |         """
 485 |         Determine whether to embed file content based on workflow context.
 486 | 
 487 |         CORRECT LOGIC:
 488 |         - NEVER embed files when the CLI is getting the next step (next_step_required=True)
 489 |         - ONLY embed files when sending to external model (next_step_required=False)
 490 | 
 491 |         Args:
 492 |             step_number: Current step number
 493 |             continuation_id: Thread continuation ID (None for new conversations)
 494 |             is_final_step: Whether this is the final step (next_step_required == False)
 495 | 
 496 |         Returns:
 497 |             bool: True if files should be embedded, False if only referenced
 498 |         """
 499 |         # RULE 1: Final steps (no more steps needed) - embed files for expert analysis
 500 |         if is_final_step:
 501 |             logger.debug("[WORKFLOW_FILES] Final step - will embed files for expert analysis")
 502 |             return True
 503 | 
 504 |         # RULE 2: Any intermediate step (more steps needed) - NEVER embed files
 505 |         # This includes:
 506 |         # - New conversations with next_step_required=True
 507 |         # - Steps with continuation_id and next_step_required=True
 508 |         logger.debug("[WORKFLOW_FILES] Intermediate step (more work needed) - will only reference files")
 509 |         return False
 510 | 
 511 |     def _embed_workflow_files(self, request: Any, arguments: dict[str, Any]) -> None:
 512 |         """
 513 |         Embed full file content for final steps and expert analysis.
 514 |         Uses proper token budgeting like existing debug.py.
 515 |         """
 516 |         # Use relevant_files as the standard field for workflow tools
 517 |         request_files = self.get_request_relevant_files(request)
 518 |         if not request_files:
 519 |             logger.debug(f"[WORKFLOW_FILES] {self.get_name()}: No relevant_files to embed")
 520 |             return
 521 | 
 522 |         try:
 523 |             # Model context should be available from early validation, but might be deferred for tests
 524 |             current_model_context = self.get_current_model_context()
 525 |             if not current_model_context:
 526 |                 # Try to resolve model context now (deferred from early validation)
 527 |                 try:
 528 |                     model_name, model_context = self._resolve_model_context(arguments, request)
 529 |                     self._model_context = model_context
 530 |                     self._current_model_name = model_name
 531 |                 except Exception as e:
 532 |                     logger.error(f"[WORKFLOW_FILES] {self.get_name()}: Failed to resolve model context: {e}")
 533 |                     # Create fallback model context (preserves existing test behavior)
 534 |                     from utils.model_context import ModelContext
 535 | 
 536 |                     model_name = self.get_request_model_name(request)
 537 |                     self._model_context = ModelContext(model_name)
 538 |                     self._current_model_name = model_name
 539 | 
 540 |             # Use the same file preparation logic as BaseTool with token budgeting
 541 |             continuation_id = self.get_request_continuation_id(request)
 542 |             remaining_tokens = arguments.get("_remaining_tokens")
 543 | 
 544 |             file_content, processed_files = self._prepare_file_content_for_prompt(
 545 |                 request_files,
 546 |                 continuation_id,
 547 |                 "Workflow files for analysis",
 548 |                 remaining_budget=remaining_tokens,
 549 |                 arguments=arguments,
 550 |                 model_context=self._model_context,
 551 |             )
 552 | 
 553 |             # Store for use in expert analysis
 554 |             self._embedded_file_content = file_content
 555 |             self._actually_processed_files = processed_files
 556 | 
 557 |             logger.info(
 558 |                 f"[WORKFLOW_FILES] {self.get_name()}: Embedded {len(processed_files)} relevant_files for final analysis"
 559 |             )
 560 | 
 561 |         except Exception as e:
 562 |             logger.error(f"[WORKFLOW_FILES] {self.get_name()}: Failed to embed files: {e}")
 563 |             # Continue without file embedding rather than failing
 564 |             self._embedded_file_content = ""
 565 |             self._actually_processed_files = []
 566 | 
 567 |     def _reference_workflow_files(self, request: Any) -> None:
 568 |         """
 569 |         Reference file names without embedding content for intermediate steps.
 570 |         Saves the CLI's context while still providing file awareness.
 571 |         """
 572 |         # Workflow tools use relevant_files, not files
 573 |         request_files = self.get_request_relevant_files(request)
 574 |         logger.debug(
 575 |             f"[WORKFLOW_FILES] {self.get_name()}: _reference_workflow_files called with {len(request_files)} relevant_files"
 576 |         )
 577 | 
 578 |         if not request_files:
 579 |             logger.debug(f"[WORKFLOW_FILES] {self.get_name()}: No files to reference, skipping")
 580 |             return
 581 | 
 582 |         # Store file references for conversation context
 583 |         self._referenced_files = request_files
 584 | 
 585 |         # Create a simple reference note
 586 |         file_names = [os.path.basename(f) for f in request_files]
 587 |         reference_note = f"Files referenced in this step: {', '.join(file_names)}\n"
 588 | 
 589 |         self._file_reference_note = reference_note
 590 |         logger.debug(f"[WORKFLOW_FILES] {self.get_name()}: Set _file_reference_note: {self._file_reference_note}")
 591 | 
 592 |         logger.info(
 593 |             f"[WORKFLOW_FILES] {self.get_name()}: Referenced {len(request_files)} files without embedding content"
 594 |         )
 595 | 
 596 |     # ================================================================================
 597 |     # Main Workflow Orchestration
 598 |     # ================================================================================
 599 | 
 600 |     async def execute_workflow(self, arguments: dict[str, Any]) -> list[TextContent]:
 601 |         """
 602 |         Main workflow orchestration following debug tool pattern.
 603 | 
 604 |         Comprehensive workflow implementation that handles all common patterns:
 605 |         1. Request validation and step management
 606 |         2. Continuation and backtracking support
 607 |         3. Step data processing and consolidation
 608 |         4. Tool-specific field mapping and customization
 609 |         5. Completion logic with optional expert analysis
 610 |         6. Generic "certain confidence" handling
 611 |         7. Step guidance and required actions
 612 |         8. Conversation memory integration
 613 |         """
 614 |         from mcp.types import TextContent
 615 | 
 616 |         try:
 617 |             # Store arguments for access by helper methods
 618 |             self._current_arguments = arguments
 619 | 
 620 |             # Validate request using tool-specific model
 621 |             request = self.get_workflow_request_model()(**arguments)
 622 | 
 623 |             # Validate step field size (basic validation for workflow instructions)
 624 |             # If step is too large, user should use shorter instructions and put details in files
 625 |             step_content = request.step
 626 |             if step_content and len(step_content) > MCP_PROMPT_SIZE_LIMIT:
 627 |                 from tools.models import ToolOutput
 628 | 
 629 |                 error_output = ToolOutput(
 630 |                     status="resend_prompt",
 631 |                     content="Step instructions are too long. Please use shorter instructions and provide detailed context via file paths instead.",
 632 |                     content_type="text",
 633 |                     metadata={"prompt_size": len(step_content), "limit": MCP_PROMPT_SIZE_LIMIT},
 634 |                 )
 635 |                 raise ValueError(f"MCP_SIZE_CHECK:{error_output.model_dump_json()}")
 636 | 
 637 |             # Validate file paths for security (same as base tool)
 638 |             # Use try/except instead of hasattr as per coding standards
 639 |             try:
 640 |                 path_error = self.validate_file_paths(request)
 641 |                 if path_error:
 642 |                     from tools.models import ToolOutput
 643 | 
 644 |                     error_output = ToolOutput(
 645 |                         status="error",
 646 |                         content=path_error,
 647 |                         content_type="text",
 648 |                     )
 649 |                     logger.error("Path validation failed for %s: %s", self.get_name(), path_error)
 650 |                     raise ToolExecutionError(error_output.model_dump_json())
 651 |             except AttributeError:
 652 |                 # validate_file_paths method not available - skip validation
 653 |                 pass
 654 | 
 655 |             # Try to validate model availability early for production scenarios
 656 |             # For tests, defer model validation to later to allow mocks to work
 657 |             try:
 658 |                 model_name, model_context = self._resolve_model_context(arguments, request)
 659 |                 # Store for later use
 660 |                 self._current_model_name = model_name
 661 |                 self._model_context = model_context
 662 |             except ValueError as e:
 663 |                 # Model resolution failed - in production this would be an error,
 664 |                 # but for tests we defer to allow mocks to handle model resolution
 665 |                 logger.debug(f"Early model validation failed, deferring to later: {e}")
 666 |                 self._current_model_name = None
 667 |                 self._model_context = None
 668 | 
 669 |             # Handle continuation
 670 |             continuation_id = request.continuation_id
 671 | 
 672 |             # Restore workflow state on continuation
 673 |             if continuation_id:
 674 |                 from utils.conversation_memory import get_thread
 675 | 
 676 |                 thread = get_thread(continuation_id)
 677 |                 if thread and thread.turns:
 678 |                     # Find the most recent assistant turn from this tool with workflow state
 679 |                     for turn in reversed(thread.turns):
 680 |                         if turn.role == "assistant" and turn.tool_name == self.get_name() and turn.model_metadata:
 681 |                             state = turn.model_metadata
 682 |                             if isinstance(state, dict) and "work_history" in state:
 683 |                                 self.work_history = state.get("work_history", [])
 684 |                                 self.initial_request = state.get("initial_request")
 685 |                                 # Rebuild consolidated findings from restored history
 686 |                                 self._reprocess_consolidated_findings()
 687 |                                 logger.debug(
 688 |                                     f"[{self.get_name()}] Restored workflow state with {len(self.work_history)} history items"
 689 |                                 )
 690 |                                 break  # State restored, exit loop
 691 | 
 692 |             # Adjust total steps if needed
 693 |             if request.step_number > request.total_steps:
 694 |                 request.total_steps = request.step_number
 695 | 
 696 |             # Create thread for first step
 697 |             if not continuation_id and request.step_number == 1:
 698 |                 clean_args = {k: v for k, v in arguments.items() if k not in ["_model_context", "_resolved_model_name"]}
 699 |                 continuation_id = create_thread(self.get_name(), clean_args)
 700 |                 self.initial_request = request.step
 701 |                 # Allow tools to store initial description for expert analysis
 702 |                 self.store_initial_issue(request.step)
 703 | 
 704 |             # Process work step - allow tools to customize field mapping
 705 |             step_data = self.prepare_step_data(request)
 706 | 
 707 |             # Store in history
 708 |             self.work_history.append(step_data)
 709 | 
 710 |             # Update consolidated findings
 711 |             self._update_consolidated_findings(step_data)
 712 | 
 713 |             # Handle file context appropriately based on workflow phase
 714 |             self._handle_workflow_file_context(request, arguments)
 715 | 
 716 |             # Build response with tool-specific customization
 717 |             response_data = self.build_base_response(request, continuation_id)
 718 | 
 719 |             # If work is complete, handle completion logic
 720 |             if not request.next_step_required:
 721 |                 response_data = await self.handle_work_completion(response_data, request, arguments)
 722 |             else:
 723 |                 # Force CLI to work before calling tool again
 724 |                 response_data = self.handle_work_continuation(response_data, request)
 725 | 
 726 |             # Allow tools to customize the final response
 727 |             response_data = self.customize_workflow_response(response_data, request)
 728 | 
 729 |             # Add metadata (provider_used and model_used) to workflow response
 730 |             self._add_workflow_metadata(response_data, arguments)
 731 | 
 732 |             # Store in conversation memory
 733 |             if continuation_id:
 734 |                 self.store_conversation_turn(continuation_id, response_data, request)
 735 | 
 736 |             return [TextContent(type="text", text=json.dumps(response_data, indent=2, ensure_ascii=False))]
 737 | 
 738 |         except ToolExecutionError:
 739 |             raise
 740 |         except Exception as e:
 741 |             if str(e).startswith("MCP_SIZE_CHECK:"):
 742 |                 payload = str(e)[len("MCP_SIZE_CHECK:") :]
 743 |                 raise ToolExecutionError(payload)
 744 | 
 745 |             logger.error(f"Error in {self.get_name()} work: {e}", exc_info=True)
 746 |             error_data = {
 747 |                 "status": f"{self.get_name()}_failed",
 748 |                 "error": str(e),
 749 |                 "step_number": arguments.get("step_number", 0),
 750 |             }
 751 | 
 752 |             # Add metadata to error responses too
 753 |             self._add_workflow_metadata(error_data, arguments)
 754 | 
 755 |             raise ToolExecutionError(json.dumps(error_data, indent=2, ensure_ascii=False)) from e
 756 | 
 757 |     # Hook methods for tool customization
 758 | 
 759 |     def prepare_step_data(self, request) -> dict:
 760 |         """
 761 |         Prepare step data from request. Tools can override to customize field mapping.
 762 |         """
 763 |         step_data = {
 764 |             "step": request.step,
 765 |             "step_number": request.step_number,
 766 |             "findings": request.findings,
 767 |             "files_checked": self.get_request_files_checked(request),
 768 |             "relevant_files": self.get_request_relevant_files(request),
 769 |             "relevant_context": self.get_request_relevant_context(request),
 770 |             "issues_found": self.get_request_issues_found(request),
 771 |             "confidence": self.get_request_confidence(request),
 772 |             "hypothesis": self.get_request_hypothesis(request),
 773 |             "images": self.get_request_images(request),
 774 |         }
 775 |         return step_data
 776 | 
 777 |     def build_base_response(self, request, continuation_id: str = None) -> dict:
 778 |         """
 779 |         Build the base response structure. Tools can override for custom response fields.
 780 |         """
 781 |         response_data = {
 782 |             "status": f"{self.get_name()}_in_progress",
 783 |             "step_number": request.step_number,
 784 |             "total_steps": request.total_steps,
 785 |             "next_step_required": request.next_step_required,
 786 |             f"{self.get_name()}_status": {
 787 |                 "files_checked": len(self.consolidated_findings.files_checked),
 788 |                 "relevant_files": len(self.consolidated_findings.relevant_files),
 789 |                 "relevant_context": len(self.consolidated_findings.relevant_context),
 790 |                 "issues_found": len(self.consolidated_findings.issues_found),
 791 |                 "images_collected": len(self.consolidated_findings.images),
 792 |                 "current_confidence": self.get_request_confidence(request),
 793 |             },
 794 |         }
 795 | 
 796 |         if continuation_id:
 797 |             response_data["continuation_id"] = continuation_id
 798 | 
 799 |         # Add file context information based on workflow phase
 800 |         embedded_content = self.get_embedded_file_content()
 801 |         reference_note = self.get_file_reference_note()
 802 |         processed_files = self.get_actually_processed_files()
 803 | 
 804 |         logger.debug(
 805 |             f"[WORKFLOW_FILES] {self.get_name()}: Building response - has embedded_content: {bool(embedded_content)}, has reference_note: {bool(reference_note)}"
 806 |         )
 807 | 
 808 |         # Prioritize embedded content over references for final steps
 809 |         if embedded_content:
 810 |             # Final step - include embedded file information
 811 |             logger.debug(f"[WORKFLOW_FILES] {self.get_name()}: Adding fully_embedded file context")
 812 |             response_data["file_context"] = {
 813 |                 "type": "fully_embedded",
 814 |                 "files_embedded": len(processed_files),
 815 |                 "context_optimization": "Full file content embedded for expert analysis",
 816 |             }
 817 |         elif reference_note:
 818 |             # Intermediate step - include file reference note
 819 |             logger.debug(f"[WORKFLOW_FILES] {self.get_name()}: Adding reference_only file context")
 820 |             response_data["file_context"] = {
 821 |                 "type": "reference_only",
 822 |                 "note": reference_note,
 823 |                 "context_optimization": "Files referenced but not embedded to preserve the context window",
 824 |             }
 825 | 
 826 |         return response_data
 827 | 
 828 |     def should_skip_expert_analysis(self, request, consolidated_findings) -> bool:
 829 |         """
 830 |         Determine if expert analysis should be skipped due to high certainty.
 831 | 
 832 |         Default: False (always call expert analysis)
 833 |         Override in tools like debug to check for "certain" confidence.
 834 |         """
 835 |         return False
 836 | 
 837 |     def handle_completion_without_expert_analysis(self, request, consolidated_findings) -> dict:
 838 |         """
 839 |         Handle completion when skipping expert analysis.
 840 | 
 841 |         Tools can override this for custom high-confidence completion handling.
 842 |         Default implementation provides generic response.
 843 |         """
 844 |         work_summary = self.prepare_work_summary()
 845 |         continuation_id = self.get_request_continuation_id(request)
 846 | 
 847 |         response_data = {
 848 |             "status": self.get_completion_status(),
 849 |             f"complete_{self.get_name()}": {
 850 |                 "initial_request": self.get_initial_request(request.step),
 851 |                 "steps_taken": len(consolidated_findings.findings),
 852 |                 "files_examined": list(consolidated_findings.files_checked),
 853 |                 "relevant_files": list(consolidated_findings.relevant_files),
 854 |                 "relevant_context": list(consolidated_findings.relevant_context),
 855 |                 "work_summary": work_summary,
 856 |                 "final_analysis": self.get_final_analysis_from_request(request),
 857 |                 "confidence_level": self.get_confidence_level(request),
 858 |             },
 859 |             "next_steps": self.get_completion_message(),
 860 |             "skip_expert_analysis": True,
 861 |             "expert_analysis": {
 862 |                 "status": self.get_skip_expert_analysis_status(),
 863 |                 "reason": self.get_skip_reason(),
 864 |             },
 865 |         }
 866 | 
 867 |         if continuation_id:
 868 |             response_data["continuation_id"] = continuation_id
 869 | 
 870 |         return response_data
 871 | 
 872 |     # ================================================================================
 873 |     # Inheritance Hook Methods - Replace hasattr/getattr Anti-patterns
 874 |     # ================================================================================
 875 | 
 876 |     def get_request_confidence(self, request: Any) -> str:
 877 |         """Get confidence from request. Override for custom confidence handling."""
 878 |         try:
 879 |             return request.confidence or "low"
 880 |         except AttributeError:
 881 |             return "low"
 882 | 
 883 |     def get_request_relevant_context(self, request: Any) -> list[str]:
 884 |         """Get relevant context from request. Override for custom field mapping."""
 885 |         try:
 886 |             return request.relevant_context or []
 887 |         except AttributeError:
 888 |             return []
 889 | 
 890 |     def get_request_issues_found(self, request: Any) -> list[str]:
 891 |         """Get issues found from request. Override for custom field mapping."""
 892 |         try:
 893 |             return request.issues_found or []
 894 |         except AttributeError:
 895 |             return []
 896 | 
 897 |     def get_request_hypothesis(self, request: Any) -> Optional[str]:
 898 |         """Get hypothesis from request. Override for custom field mapping."""
 899 |         try:
 900 |             return request.hypothesis
 901 |         except AttributeError:
 902 |             return None
 903 | 
 904 |     def get_request_images(self, request: Any) -> list[str]:
 905 |         """Get images from request. Override for custom field mapping."""
 906 |         try:
 907 |             return request.images or []
 908 |         except AttributeError:
 909 |             return []
 910 | 
 911 |     # File Context Access Methods
 912 | 
 913 |     def get_embedded_file_content(self) -> str:
 914 |         """Get embedded file content. Returns empty string if not available."""
 915 |         try:
 916 |             return self._embedded_file_content or ""
 917 |         except AttributeError:
 918 |             return ""
 919 | 
 920 |     def get_file_reference_note(self) -> str:
 921 |         """Get file reference note. Returns empty string if not available."""
 922 |         try:
 923 |             return self._file_reference_note or ""
 924 |         except AttributeError:
 925 |             return ""
 926 | 
 927 |     def get_actually_processed_files(self) -> list[str]:
 928 |         """Get list of actually processed files. Returns empty list if not available."""
 929 |         try:
 930 |             return self._actually_processed_files or []
 931 |         except AttributeError:
 932 |             return []
 933 | 
 934 |     def get_current_model_context(self):
 935 |         """Get current model context. Returns None if not available."""
 936 |         try:
 937 |             return self._model_context
 938 |         except AttributeError:
 939 |             return None
 940 | 
 941 |     def get_request_model_name(self, request: Any) -> str:
 942 |         """Get model name from request. Override for custom model handling."""
 943 |         try:
 944 |             return request.model or "flash"
 945 |         except AttributeError:
 946 |             return "flash"
 947 | 
 948 |     def get_request_continuation_id(self, request: Any) -> Optional[str]:
 949 |         """Get continuation ID from request. Override for custom continuation handling."""
 950 |         try:
 951 |             return request.continuation_id
 952 |         except AttributeError:
 953 |             return None
 954 | 
 955 |     def get_request_next_step_required(self, request: Any) -> bool:
 956 |         """Get next step required from request. Override for custom step handling."""
 957 |         try:
 958 |             return request.next_step_required
 959 |         except AttributeError:
 960 |             return True
 961 | 
 962 |     def get_request_step_number(self, request: Any) -> int:
 963 |         """Get step number from request. Override for custom step handling."""
 964 |         try:
 965 |             return request.step_number or 1
 966 |         except AttributeError:
 967 |             return 1
 968 | 
 969 |     def get_request_relevant_files(self, request: Any) -> list[str]:
 970 |         """Get relevant files from request. Override for custom file handling."""
 971 |         try:
 972 |             return request.relevant_files or []
 973 |         except AttributeError:
 974 |             return []
 975 | 
 976 |     def get_request_files_checked(self, request: Any) -> list[str]:
 977 |         """Get files checked from request. Override for custom file handling."""
 978 |         try:
 979 |             return request.files_checked or []
 980 |         except AttributeError:
 981 |             return []
 982 | 
 983 |     def get_current_arguments(self) -> dict[str, Any]:
 984 |         """Get current arguments. Returns empty dict if not available."""
 985 |         try:
 986 |             return self._current_arguments or {}
 987 |         except AttributeError:
 988 |             return {}
 989 | 
 990 |     def store_initial_issue(self, step_description: str):
 991 |         """Store initial issue description. Override for custom storage."""
 992 |         # Default implementation - tools can override to store differently
 993 |         self.initial_issue = step_description
 994 | 
 995 |     def get_initial_request(self, fallback_step: str) -> str:
 996 |         """Get initial request description. Override for custom retrieval."""
 997 |         try:
 998 |             return self.initial_request or fallback_step
 999 |         except AttributeError:
1000 |             return fallback_step
1001 | 
1002 |     # Default implementations for inheritance hooks
1003 | 
1004 |     def prepare_work_summary(self) -> str:
1005 |         """Prepare work summary. Override for custom implementation."""
1006 |         return f"Completed {len(self.consolidated_findings.findings)} work steps"
1007 | 
1008 |     def get_completion_status(self) -> str:
1009 |         """Get completion status. Override for tool-specific status."""
1010 |         return "high_confidence_completion"
1011 | 
1012 |     def get_final_analysis_from_request(self, request):
1013 |         """Extract final analysis from request. Override for tool-specific fields."""
1014 |         return self.get_request_hypothesis(request)
1015 | 
1016 |     def get_confidence_level(self, request) -> str:
1017 |         """Get confidence level. Override for tool-specific confidence handling."""
1018 |         return self.get_request_confidence(request) or "high"
1019 | 
1020 |     def get_completion_message(self) -> str:
1021 |         """Get completion message. Override for tool-specific messaging."""
1022 |         return (
1023 |             f"{self.get_name().capitalize()} complete with high confidence. Present results "
1024 |             "and proceed with implementation without requiring further consultation."
1025 |         )
1026 | 
1027 |     def get_skip_reason(self) -> str:
1028 |         """Get reason for skipping expert analysis. Override for tool-specific reasons."""
1029 |         return f"{self.get_name()} completed with sufficient confidence"
1030 | 
1031 |     def get_skip_expert_analysis_status(self) -> str:
1032 |         """Get status for skipped expert analysis. Override for tool-specific status."""
1033 |         return "skipped_by_tool_design"
1034 | 
1035 |     def get_completion_next_steps_message(self, expert_analysis_used: bool = False) -> str:
1036 |         """
1037 |         Get the message to show when work is complete.
1038 |         Tools can override for custom messaging.
1039 | 
1040 |         Args:
1041 |             expert_analysis_used: True if expert analysis was successfully executed
1042 |         """
1043 |         base_message = (
1044 |             f"{self.get_name().upper()} IS COMPLETE. You MUST now summarize and present ALL key findings, confirmed "
1045 |             "hypotheses, and exact recommended solutions. Clearly identify the most likely root cause and "
1046 |             "provide concrete, actionable implementation guidance. Highlight affected code paths and display "
1047 |             "reasoning that led to this conclusion—make it easy for a developer to understand exactly where "
1048 |             "the problem lies."
1049 |         )
1050 | 
1051 |         # Add expert analysis guidance only when expert analysis was actually used
1052 |         if expert_analysis_used:
1053 |             expert_guidance = self.get_expert_analysis_guidance()
1054 |             if expert_guidance:
1055 |                 return f"{base_message}\n\n{expert_guidance}"
1056 | 
1057 |         return base_message
1058 | 
1059 |     def get_expert_analysis_guidance(self) -> str:
1060 |         """
1061 |         Get additional guidance for handling expert analysis results.
1062 | 
1063 |         Subclasses can override this to provide specific instructions about how
1064 |         to validate and use expert analysis findings. Returns empty string by default.
1065 | 
1066 |         When expert analysis is called, this guidance will be:
1067 |         1. Appended to the completion next steps message
1068 |         2. Added as "important_considerations" field in the response data
1069 | 
1070 |         Example implementation:
1071 |         ```python
1072 |         def get_expert_analysis_guidance(self) -> str:
1073 |             return (
1074 |                 "IMPORTANT: Expert analysis provided above. You MUST validate "
1075 |                 "the expert findings rather than accepting them blindly. "
1076 |                 "Cross-reference with your own investigation and ensure "
1077 |                 "recommendations align with the codebase context."
1078 |             )
1079 |         ```
1080 | 
1081 |         Returns:
1082 |             Additional guidance text or empty string if no guidance needed
1083 |         """
1084 |         return ""
1085 | 
1086 |     def customize_workflow_response(self, response_data: dict, request) -> dict:
1087 |         """
1088 |         Allow tools to customize the workflow response before returning.
1089 | 
1090 |         Tools can override this to add tool-specific fields, modify status names,
1091 |         customize field mapping, etc. Default implementation returns unchanged.
1092 |         """
1093 |         # Ensure file context information is preserved in all response paths
1094 |         if not response_data.get("file_context"):
1095 |             embedded_content = self.get_embedded_file_content()
1096 |             reference_note = self.get_file_reference_note()
1097 |             processed_files = self.get_actually_processed_files()
1098 | 
1099 |             # Prioritize embedded content over references for final steps
1100 |             if embedded_content:
1101 |                 response_data["file_context"] = {
1102 |                     "type": "fully_embedded",
1103 |                     "files_embedded": len(processed_files),
1104 |                     "context_optimization": "Full file content embedded for expert analysis",
1105 |                 }
1106 |             elif reference_note:
1107 |                 response_data["file_context"] = {
1108 |                     "type": "reference_only",
1109 |                     "note": reference_note,
1110 |                     "context_optimization": "Files referenced but not embedded to preserve the context window",
1111 |                 }
1112 | 
1113 |         return response_data
1114 | 
1115 |     def store_conversation_turn(self, continuation_id: str, response_data: dict, request):
1116 |         """
1117 |         Store the conversation turn. Tools can override for custom memory storage.
1118 |         """
1119 |         # CRITICAL: Extract clean content for conversation history (exclude internal workflow metadata)
1120 |         clean_content = self._extract_clean_workflow_content_for_history(response_data)
1121 | 
1122 |         # Serialize workflow state for persistence across stateless tool calls
1123 |         workflow_state = {"work_history": self.work_history, "initial_request": getattr(self, "initial_request", None)}
1124 | 
1125 |         add_turn(
1126 |             thread_id=continuation_id,
1127 |             role="assistant",
1128 |             content=clean_content,  # Use cleaned content instead of full response_data
1129 |             tool_name=self.get_name(),
1130 |             files=self.get_request_relevant_files(request),
1131 |             images=self.get_request_images(request),
1132 |             model_metadata=workflow_state,  # Persist the state
1133 |         )
1134 | 
1135 |     def _add_workflow_metadata(self, response_data: dict, arguments: dict[str, Any]) -> None:
1136 |         """
1137 |         Add metadata (provider_used and model_used) to workflow response.
1138 | 
1139 |         This ensures workflow tools have the same metadata as regular tools,
1140 |         making it consistent across all tool types for tracking which provider
1141 |         and model were used for the response.
1142 | 
1143 |         Args:
1144 |             response_data: The response data dictionary to modify
1145 |             arguments: The original arguments containing model context
1146 |         """
1147 |         try:
1148 |             # Get model information from arguments (set by server.py)
1149 |             resolved_model_name = arguments.get("_resolved_model_name")
1150 |             model_context = arguments.get("_model_context")
1151 | 
1152 |             if resolved_model_name and model_context:
1153 |                 # Extract provider information from model context
1154 |                 provider = model_context.provider
1155 |                 provider_name = provider.get_provider_type().value if provider else "unknown"
1156 | 
1157 |                 # Create metadata dictionary
1158 |                 metadata = {
1159 |                     "tool_name": self.get_name(),
1160 |                     "model_used": resolved_model_name,
1161 |                     "provider_used": provider_name,
1162 |                 }
1163 | 
1164 |                 # Preserve existing metadata and add workflow metadata
1165 |                 if "metadata" not in response_data:
1166 |                     response_data["metadata"] = {}
1167 |                 response_data["metadata"].update(metadata)
1168 | 
1169 |                 logger.debug(
1170 |                     f"[WORKFLOW_METADATA] {self.get_name()}: Added metadata - "
1171 |                     f"model: {resolved_model_name}, provider: {provider_name}"
1172 |                 )
1173 |             else:
1174 |                 # Fallback - try to get model info from request
1175 |                 request = self.get_workflow_request_model()(**arguments)
1176 |                 model_name = self.get_request_model_name(request)
1177 | 
1178 |                 # Basic metadata without provider info
1179 |                 metadata = {
1180 |                     "tool_name": self.get_name(),
1181 |                     "model_used": model_name,
1182 |                     "provider_used": "unknown",
1183 |                 }
1184 | 
1185 |                 # Preserve existing metadata and add workflow metadata
1186 |                 if "metadata" not in response_data:
1187 |                     response_data["metadata"] = {}
1188 |                 response_data["metadata"].update(metadata)
1189 | 
1190 |                 logger.debug(
1191 |                     f"[WORKFLOW_METADATA] {self.get_name()}: Added fallback metadata - "
1192 |                     f"model: {model_name}, provider: unknown"
1193 |                 )
1194 | 
1195 |         except Exception as e:
1196 |             # Don't fail the workflow if metadata addition fails
1197 |             logger.warning(f"[WORKFLOW_METADATA] {self.get_name()}: Failed to add metadata: {e}")
1198 |             # Still add basic metadata with tool name
1199 |             response_data["metadata"] = {"tool_name": self.get_name()}
1200 | 
1201 |     def _extract_clean_workflow_content_for_history(self, response_data: dict) -> str:
1202 |         """
1203 |         Extract clean content from workflow response suitable for conversation history.
1204 | 
1205 |         This method removes internal workflow metadata, continuation offers, and
1206 |         status information that should not appear when the conversation is
1207 |         reconstructed for expert models or other tools.
1208 | 
1209 |         Args:
1210 |             response_data: The full workflow response data
1211 | 
1212 |         Returns:
1213 |             str: Clean content suitable for conversation history storage
1214 |         """
1215 |         # Create a clean copy with only essential content for conversation history
1216 |         clean_data = {}
1217 | 
1218 |         # Include core content if present
1219 |         if "content" in response_data:
1220 |             clean_data["content"] = response_data["content"]
1221 | 
1222 |         # Include expert analysis if present (but clean it)
1223 |         if "expert_analysis" in response_data:
1224 |             expert_analysis = response_data["expert_analysis"]
1225 |             if isinstance(expert_analysis, dict):
1226 |                 # Only include the actual analysis content, not metadata
1227 |                 clean_expert = {}
1228 |                 if "raw_analysis" in expert_analysis:
1229 |                     clean_expert["analysis"] = expert_analysis["raw_analysis"]
1230 |                 elif "content" in expert_analysis:
1231 |                     clean_expert["analysis"] = expert_analysis["content"]
1232 |                 if clean_expert:
1233 |                     clean_data["expert_analysis"] = clean_expert
1234 | 
1235 |         # Include findings/issues if present (core workflow output)
1236 |         if "complete_analysis" in response_data:
1237 |             complete_analysis = response_data["complete_analysis"]
1238 |             if isinstance(complete_analysis, dict):
1239 |                 clean_complete = {}
1240 |                 # Include essential analysis data without internal metadata
1241 |                 for key in ["findings", "issues_found", "relevant_context", "insights"]:
1242 |                     if key in complete_analysis:
1243 |                         clean_complete[key] = complete_analysis[key]
1244 |                 if clean_complete:
1245 |                     clean_data["analysis_summary"] = clean_complete
1246 | 
1247 |         # Include step information for context but remove internal workflow metadata
1248 |         if "step_number" in response_data:
1249 |             clean_data["step_info"] = {
1250 |                 "step": response_data.get("step", ""),
1251 |                 "step_number": response_data.get("step_number", 1),
1252 |                 "total_steps": response_data.get("total_steps", 1),
1253 |             }
1254 | 
1255 |         # Exclude problematic fields that should never appear in conversation history:
1256 |         # - continuation_id (confuses LLMs with old IDs)
1257 |         # - status (internal workflow state)
1258 |         # - next_step_required (internal control flow)
1259 |         # - analysis_status (internal tracking)
1260 |         # - file_context (internal optimization info)
1261 |         # - required_actions (internal workflow instructions)
1262 | 
1263 |         return json.dumps(clean_data, indent=2, ensure_ascii=False)
1264 | 
1265 |     # Core workflow logic methods
1266 | 
1267 |     async def handle_work_completion(self, response_data: dict, request, arguments: dict) -> dict:
1268 |         """
1269 |         Handle work completion logic - expert analysis decision and response building.
1270 |         """
1271 |         response_data[f"{self.get_name()}_complete"] = True
1272 | 
1273 |         # Check if tool wants to skip expert analysis due to high certainty
1274 |         if self.should_skip_expert_analysis(request, self.consolidated_findings):
1275 |             # Handle completion without expert analysis
1276 |             completion_response = self.handle_completion_without_expert_analysis(request, self.consolidated_findings)
1277 |             response_data.update(completion_response)
1278 |         elif self.requires_expert_analysis() and self.should_call_expert_analysis(self.consolidated_findings, request):
1279 |             # Standard expert analysis path
1280 |             response_data["status"] = "calling_expert_analysis"
1281 | 
1282 |             # Call expert analysis
1283 |             expert_analysis = await self._call_expert_analysis(arguments, request)
1284 |             response_data["expert_analysis"] = expert_analysis
1285 | 
1286 |             # Handle special expert analysis statuses
1287 |             if isinstance(expert_analysis, dict) and expert_analysis.get("status") in [
1288 |                 "files_required_to_continue",
1289 |                 "investigation_paused",
1290 |                 "refactoring_paused",
1291 |             ]:
1292 |                 # Promote the special status to the main response
1293 |                 special_status = expert_analysis["status"]
1294 |                 response_data["status"] = special_status
1295 |                 response_data["content"] = expert_analysis.get(
1296 |                     "raw_analysis", json.dumps(expert_analysis, ensure_ascii=False)
1297 |                 )
1298 |                 del response_data["expert_analysis"]
1299 | 
1300 |                 # Update next steps for special status
1301 |                 if special_status == "files_required_to_continue":
1302 |                     response_data["next_steps"] = "Provide the requested files and continue the analysis."
1303 |                 else:
1304 |                     response_data["next_steps"] = expert_analysis.get(
1305 |                         "next_steps", "Continue based on expert analysis."
1306 |                     )
1307 |             elif isinstance(expert_analysis, dict) and expert_analysis.get("status") == "analysis_error":
1308 |                 # Expert analysis failed - promote error status
1309 |                 response_data["status"] = "error"
1310 |                 response_data["content"] = expert_analysis.get("error", "Expert analysis failed")
1311 |                 response_data["content_type"] = "text"
1312 |                 del response_data["expert_analysis"]
1313 |             else:
1314 |                 # Expert analysis was successfully executed - include expert guidance
1315 |                 response_data["next_steps"] = self.get_completion_next_steps_message(expert_analysis_used=True)
1316 | 
1317 |                 # Add expert analysis guidance as important considerations
1318 |                 expert_guidance = self.get_expert_analysis_guidance()
1319 |                 if expert_guidance:
1320 |                     response_data["important_considerations"] = expert_guidance
1321 | 
1322 |             # Prepare complete work summary
1323 |             work_summary = self._prepare_work_summary()
1324 |             response_data[f"complete_{self.get_name()}"] = {
1325 |                 "initial_request": self.get_initial_request(request.step),
1326 |                 "steps_taken": len(self.work_history),
1327 |                 "files_examined": list(self.consolidated_findings.files_checked),
1328 |                 "relevant_files": list(self.consolidated_findings.relevant_files),
1329 |                 "relevant_context": list(self.consolidated_findings.relevant_context),
1330 |                 "issues_found": self.consolidated_findings.issues_found,
1331 |                 "work_summary": work_summary,
1332 |             }
1333 |         else:
1334 |             # Tool doesn't require expert analysis or local work was sufficient
1335 |             if not self.requires_expert_analysis():
1336 |                 # Tool is self-contained (like planner)
1337 |                 response_data["status"] = f"{self.get_name()}_complete"
1338 |                 response_data["next_steps"] = (
1339 |                     f"{self.get_name().capitalize()} work complete. Present results to the user."
1340 |                 )
1341 |             else:
1342 |                 # Local work was sufficient for tools that support expert analysis
1343 |                 response_data["status"] = "local_work_complete"
1344 |                 response_data["next_steps"] = (
1345 |                     f"Local {self.get_name()} complete with sufficient confidence. Present findings "
1346 |                     "and recommendations to the user based on the work results."
1347 |                 )
1348 | 
1349 |         return response_data
1350 | 
1351 |     def handle_work_continuation(self, response_data: dict, request) -> dict:
1352 |         """
1353 |         Handle work continuation - force pause and provide guidance.
1354 |         """
1355 |         response_data["status"] = f"pause_for_{self.get_name()}"
1356 |         response_data[f"{self.get_name()}_required"] = True
1357 | 
1358 |         # Get tool-specific required actions
1359 |         required_actions = self.get_required_actions(
1360 |             request.step_number, self.get_request_confidence(request), request.findings, request.total_steps, request
1361 |         )
1362 |         response_data["required_actions"] = required_actions
1363 | 
1364 |         # Generate step guidance
1365 |         response_data["next_steps"] = self.get_step_guidance_message(request)
1366 | 
1367 |         return response_data
1368 | 
1369 |     def _update_consolidated_findings(self, step_data: dict):
1370 |         """Update consolidated findings with new step data"""
1371 |         self.consolidated_findings.files_checked.update(step_data.get("files_checked", []))
1372 |         self.consolidated_findings.relevant_files.update(step_data.get("relevant_files", []))
1373 |         self.consolidated_findings.relevant_context.update(step_data.get("relevant_context", []))
1374 |         self.consolidated_findings.findings.append(f"Step {step_data['step_number']}: {step_data['findings']}")
1375 |         if step_data.get("hypothesis"):
1376 |             self.consolidated_findings.hypotheses.append(
1377 |                 {
1378 |                     "step": step_data["step_number"],
1379 |                     "hypothesis": step_data["hypothesis"],
1380 |                     "confidence": step_data["confidence"],
1381 |                 }
1382 |             )
1383 |         if step_data.get("issues_found"):
1384 |             self.consolidated_findings.issues_found.extend(step_data["issues_found"])
1385 |         if step_data.get("images"):
1386 |             self.consolidated_findings.images.extend(step_data["images"])
1387 |         # Update confidence to latest value from this step
1388 |         if step_data.get("confidence"):
1389 |             self.consolidated_findings.confidence = step_data["confidence"]
1390 | 
1391 |     def _reprocess_consolidated_findings(self):
1392 |         """Reprocess consolidated findings after backtracking"""
1393 |         self.consolidated_findings = ConsolidatedFindings()
1394 |         for step in self.work_history:
1395 |             self._update_consolidated_findings(step)
1396 | 
1397 |     def _prepare_work_summary(self) -> str:
1398 |         """Prepare a comprehensive summary of the work"""
1399 |         summary_parts = [
1400 |             f"=== {self.get_name().upper()} WORK SUMMARY ===",
1401 |             f"Total steps: {len(self.work_history)}",
1402 |             f"Files examined: {len(self.consolidated_findings.files_checked)}",
1403 |             f"Relevant files identified: {len(self.consolidated_findings.relevant_files)}",
1404 |             f"Methods/functions involved: {len(self.consolidated_findings.relevant_context)}",
1405 |             f"Issues found: {len(self.consolidated_findings.issues_found)}",
1406 |             "",
1407 |             "=== WORK PROGRESSION ===",
1408 |         ]
1409 | 
1410 |         for finding in self.consolidated_findings.findings:
1411 |             summary_parts.append(finding)
1412 | 
1413 |         if self.consolidated_findings.hypotheses:
1414 |             summary_parts.extend(
1415 |                 [
1416 |                     "",
1417 |                     "=== HYPOTHESIS EVOLUTION ===",
1418 |                 ]
1419 |             )
1420 |             for hyp in self.consolidated_findings.hypotheses:
1421 |                 summary_parts.append(f"Step {hyp['step']} ({hyp['confidence']} confidence): {hyp['hypothesis']}")
1422 | 
1423 |         if self.consolidated_findings.issues_found:
1424 |             summary_parts.extend(
1425 |                 [
1426 |                     "",
1427 |                     "=== ISSUES IDENTIFIED ===",
1428 |                 ]
1429 |             )
1430 |             for issue in self.consolidated_findings.issues_found:
1431 |                 severity = issue.get("severity", "unknown")
1432 |                 description = issue.get("description", "No description")
1433 |                 summary_parts.append(f"[{severity.upper()}] {description}")
1434 | 
1435 |         return "\n".join(summary_parts)
1436 | 
1437 |     async def _call_expert_analysis(self, arguments: dict, request) -> dict:
1438 |         """Call external model for expert analysis"""
1439 |         try:
1440 |             # Model context should be resolved from early validation, but handle fallback for tests
1441 |             if not self._model_context:
1442 |                 # Try to resolve model context for expert analysis (deferred from early validation)
1443 |                 try:
1444 |                     model_name, model_context = self._resolve_model_context(arguments, request)
1445 |                     self._model_context = model_context
1446 |                     self._current_model_name = model_name
1447 |                 except Exception as e:
1448 |                     logger.error(f"Failed to resolve model context for expert analysis: {e}")
1449 |                     # Use request model as fallback (preserves existing test behavior)
1450 |                     model_name = self.get_request_model_name(request)
1451 |                     from utils.model_context import ModelContext
1452 | 
1453 |                     model_context = ModelContext(model_name)
1454 |                     self._model_context = model_context
1455 |                     self._current_model_name = model_name
1456 |             else:
1457 |                 model_name = self._current_model_name
1458 | 
1459 |             provider = self._model_context.provider
1460 | 
1461 |             # Prepare expert analysis context
1462 |             expert_context = self.prepare_expert_analysis_context(self.consolidated_findings)
1463 | 
1464 |             # Check if tool wants to include files in prompt
1465 |             if self.should_include_files_in_expert_prompt():
1466 |                 file_content = self._prepare_files_for_expert_analysis()
1467 |                 if file_content:
1468 |                     expert_context = self._add_files_to_expert_context(expert_context, file_content)
1469 | 
1470 |             # Get system prompt for this tool with localization support
1471 |             base_system_prompt = self.get_system_prompt()
1472 |             capability_augmented_prompt = self._augment_system_prompt_with_capabilities(
1473 |                 base_system_prompt, getattr(self._model_context, "capabilities", None)
1474 |             )
1475 |             language_instruction = self.get_language_instruction()
1476 |             system_prompt = language_instruction + capability_augmented_prompt
1477 | 
1478 |             # Check if tool wants system prompt embedded in main prompt
1479 |             if self.should_embed_system_prompt():
1480 |                 prompt = f"{system_prompt}\n\n{expert_context}\n\n{self.get_expert_analysis_instruction()}"
1481 |                 system_prompt = ""  # Clear it since we embedded it
1482 |             else:
1483 |                 prompt = expert_context
1484 | 
1485 |             # Validate temperature against model constraints
1486 |             validated_temperature, temp_warnings = self.get_validated_temperature(request, self._model_context)
1487 | 
1488 |             # Log any temperature corrections
1489 |             for warning in temp_warnings:
1490 |                 logger.warning(warning)
1491 | 
1492 |             # Generate AI response - use request parameters if available
1493 |             model_response = provider.generate_content(
1494 |                 prompt=prompt,
1495 |                 model_name=model_name,
1496 |                 system_prompt=system_prompt,
1497 |                 temperature=validated_temperature,
1498 |                 thinking_mode=self.get_request_thinking_mode(request),
1499 |                 images=list(set(self.consolidated_findings.images)) if self.consolidated_findings.images else None,
1500 |             )
1501 | 
1502 |             if model_response.content:
1503 |                 content = model_response.content.strip()
1504 | 
1505 |                 # Try to extract JSON from markdown code blocks if present
1506 |                 if "```json" in content or "```" in content:
1507 |                     json_match = re.search(r"```(?:json)?\s*(.*?)\s*```", content, re.DOTALL)
1508 |                     if json_match:
1509 |                         content = json_match.group(1).strip()
1510 | 
1511 |                 try:
1512 |                     # Try to parse as JSON
1513 |                     analysis_result = json.loads(content)
1514 |                     return analysis_result
1515 |                 except json.JSONDecodeError as e:
1516 |                     # Log the parse error with more details but don't fail
1517 |                     logger.info(
1518 |                         f"[{self.get_name()}] Expert analysis returned non-JSON response (this is OK for smaller models). "
1519 |                         f"Parse error: {str(e)}. Response length: {len(model_response.content)} chars."
1520 |                     )
1521 |                     logger.debug(f"First 500 chars of response: {model_response.content[:500]!r}")
1522 | 
1523 |                     # Still return the analysis as plain text - this is valid
1524 |                     return {
1525 |                         "status": "analysis_complete",
1526 |                         "raw_analysis": model_response.content,
1527 |                         "format": "text",  # Indicate it's plain text, not an error
1528 |                         "note": "Analysis provided in plain text format",
1529 |                     }
1530 |             else:
1531 |                 return {"error": "No response from model", "status": "empty_response"}
1532 | 
1533 |         except Exception as e:
1534 |             logger.error(f"Error calling expert analysis: {e}", exc_info=True)
1535 |             return {"error": str(e), "status": "analysis_error"}
1536 | 
1537 |     def _process_work_step(self, step_data: dict):
1538 |         """
1539 |         Process a single work step and update internal state.
1540 | 
1541 |         This method is useful for testing and manual step processing.
1542 |         It adds the step to work history and updates consolidated findings.
1543 | 
1544 |         Args:
1545 |             step_data: Dictionary containing step information including:
1546 |                       step, step_number, findings, files_checked, etc.
1547 |         """
1548 |         # Store in history
1549 |         self.work_history.append(step_data)
1550 | 
1551 |         # Update consolidated findings
1552 |         self._update_consolidated_findings(step_data)
1553 | 
1554 |     # Common execute method for workflow-based tools
1555 | 
1556 |     async def execute(self, arguments: dict[str, Any]) -> list[TextContent]:
1557 |         """
1558 |         Common execute logic for workflow-based tools.
1559 | 
1560 |         This method provides common validation and delegates to execute_workflow.
1561 |         Tools that need custom execute logic can override this method.
1562 |         """
1563 |         try:
1564 |             # Common validation
1565 |             if not arguments:
1566 |                 error_data = {"status": "error", "content": "No arguments provided"}
1567 |                 # Add basic metadata even for validation errors
1568 |                 error_data["metadata"] = {"tool_name": self.get_name()}
1569 |                 raise ToolExecutionError(json.dumps(error_data, ensure_ascii=False))
1570 | 
1571 |             # Delegate to execute_workflow
1572 |             return await self.execute_workflow(arguments)
1573 | 
1574 |         except ToolExecutionError:
1575 |             raise
1576 |         except Exception as e:
1577 |             logger.error(f"Error in {self.get_name()} tool execution: {e}", exc_info=True)
1578 |             error_data = {
1579 |                 "status": "error",
1580 |                 "content": f"Error in {self.get_name()}: {str(e)}",
1581 |             }  # Add metadata to error responses
1582 |             self._add_workflow_metadata(error_data, arguments)
1583 |             raise ToolExecutionError(json.dumps(error_data, ensure_ascii=False)) from e
1584 | 
1585 |     # Default implementations for methods that workflow-based tools typically don't need
1586 | 
1587 |     async def prepare_prompt(self, request) -> str:
1588 |         """
1589 |         Base implementation for workflow tools - compatible with BaseTool signature.
1590 | 
1591 |         Workflow tools typically don't need to return a prompt since they handle
1592 |         their own prompt preparation internally through the workflow execution.
1593 | 
1594 |         Args:
1595 |             request: The validated request object
1596 | 
1597 |         Returns:
1598 |             Empty string since workflow tools manage prompts internally
1599 |         """
1600 |         # Workflow tools handle their prompts internally during workflow execution
1601 |         return ""
1602 | 
1603 |     def format_response(self, response: str, request, model_info=None):
1604 |         """
1605 |         Workflow tools handle their own response formatting.
1606 |         The BaseWorkflowMixin formats responses internally.
1607 |         """
1608 |         return response
1609 | 
```