dicklesworthstone/llm_gateway_mcp

This is page 13 of 45. Use http://codebase.md/dicklesworthstone/llm_gateway_mcp_server?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .cursorignore
├── .env.example
├── .envrc
├── .gitignore
├── additional_features.md
├── check_api_keys.py
├── completion_support.py
├── comprehensive_test.py
├── docker-compose.yml
├── Dockerfile
├── empirically_measured_model_speeds.json
├── error_handling.py
├── example_structured_tool.py
├── examples
│   ├── __init__.py
│   ├── advanced_agent_flows_using_unified_memory_system_demo.py
│   ├── advanced_extraction_demo.py
│   ├── advanced_unified_memory_system_demo.py
│   ├── advanced_vector_search_demo.py
│   ├── analytics_reporting_demo.py
│   ├── audio_transcription_demo.py
│   ├── basic_completion_demo.py
│   ├── cache_demo.py
│   ├── claude_integration_demo.py
│   ├── compare_synthesize_demo.py
│   ├── cost_optimization.py
│   ├── data
│   │   ├── sample_event.txt
│   │   ├── Steve_Jobs_Introducing_The_iPhone_compressed.md
│   │   └── Steve_Jobs_Introducing_The_iPhone_compressed.mp3
│   ├── docstring_refiner_demo.py
│   ├── document_conversion_and_processing_demo.py
│   ├── entity_relation_graph_demo.py
│   ├── filesystem_operations_demo.py
│   ├── grok_integration_demo.py
│   ├── local_text_tools_demo.py
│   ├── marqo_fused_search_demo.py
│   ├── measure_model_speeds.py
│   ├── meta_api_demo.py
│   ├── multi_provider_demo.py
│   ├── ollama_integration_demo.py
│   ├── prompt_templates_demo.py
│   ├── python_sandbox_demo.py
│   ├── rag_example.py
│   ├── research_workflow_demo.py
│   ├── sample
│   │   ├── article.txt
│   │   ├── backprop_paper.pdf
│   │   ├── buffett.pdf
│   │   ├── contract_link.txt
│   │   ├── legal_contract.txt
│   │   ├── medical_case.txt
│   │   ├── northwind.db
│   │   ├── research_paper.txt
│   │   ├── sample_data.json
│   │   └── text_classification_samples
│   │       ├── email_classification.txt
│   │       ├── news_samples.txt
│   │       ├── product_reviews.txt
│   │       └── support_tickets.txt
│   ├── sample_docs
│   │   └── downloaded
│   │       └── attention_is_all_you_need.pdf
│   ├── sentiment_analysis_demo.py
│   ├── simple_completion_demo.py
│   ├── single_shot_synthesis_demo.py
│   ├── smart_browser_demo.py
│   ├── sql_database_demo.py
│   ├── sse_client_demo.py
│   ├── test_code_extraction.py
│   ├── test_content_detection.py
│   ├── test_ollama.py
│   ├── text_classification_demo.py
│   ├── text_redline_demo.py
│   ├── tool_composition_examples.py
│   ├── tournament_code_demo.py
│   ├── tournament_text_demo.py
│   ├── unified_memory_system_demo.py
│   ├── vector_search_demo.py
│   ├── web_automation_instruction_packs.py
│   └── workflow_delegation_demo.py
├── LICENSE
├── list_models.py
├── marqo_index_config.json.example
├── mcp_protocol_schema_2025-03-25_version.json
├── mcp_python_lib_docs.md
├── mcp_tool_context_estimator.py
├── model_preferences.py
├── pyproject.toml
├── quick_test.py
├── README.md
├── resource_annotations.py
├── run_all_demo_scripts_and_check_for_errors.py
├── storage
│   └── smart_browser_internal
│       ├── locator_cache.db
│       ├── readability.js
│       └── storage_state.enc
├── test_client.py
├── test_connection.py
├── TEST_README.md
├── test_sse_client.py
├── test_stdio_client.py
├── tests
│   ├── __init__.py
│   ├── conftest.py
│   ├── integration
│   │   ├── __init__.py
│   │   └── test_server.py
│   ├── manual
│   │   ├── test_extraction_advanced.py
│   │   └── test_extraction.py
│   └── unit
│       ├── __init__.py
│       ├── test_cache.py
│       ├── test_providers.py
│       └── test_tools.py
├── TODO.md
├── tool_annotations.py
├── tools_list.json
├── ultimate_mcp_banner.webp
├── ultimate_mcp_logo.webp
├── ultimate_mcp_server
│   ├── __init__.py
│   ├── __main__.py
│   ├── cli
│   │   ├── __init__.py
│   │   ├── __main__.py
│   │   ├── commands.py
│   │   ├── helpers.py
│   │   └── typer_cli.py
│   ├── clients
│   │   ├── __init__.py
│   │   ├── completion_client.py
│   │   └── rag_client.py
│   ├── config
│   │   └── examples
│   │       └── filesystem_config.yaml
│   ├── config.py
│   ├── constants.py
│   ├── core
│   │   ├── __init__.py
│   │   ├── evaluation
│   │   │   ├── base.py
│   │   │   └── evaluators.py
│   │   ├── providers
│   │   │   ├── __init__.py
│   │   │   ├── anthropic.py
│   │   │   ├── base.py
│   │   │   ├── deepseek.py
│   │   │   ├── gemini.py
│   │   │   ├── grok.py
│   │   │   ├── ollama.py
│   │   │   ├── openai.py
│   │   │   └── openrouter.py
│   │   ├── server.py
│   │   ├── state_store.py
│   │   ├── tournaments
│   │   │   ├── manager.py
│   │   │   ├── tasks.py
│   │   │   └── utils.py
│   │   └── ums_api
│   │       ├── __init__.py
│   │       ├── ums_database.py
│   │       ├── ums_endpoints.py
│   │       ├── ums_models.py
│   │       └── ums_services.py
│   ├── exceptions.py
│   ├── graceful_shutdown.py
│   ├── services
│   │   ├── __init__.py
│   │   ├── analytics
│   │   │   ├── __init__.py
│   │   │   ├── metrics.py
│   │   │   └── reporting.py
│   │   ├── cache
│   │   │   ├── __init__.py
│   │   │   ├── cache_service.py
│   │   │   ├── persistence.py
│   │   │   ├── strategies.py
│   │   │   └── utils.py
│   │   ├── cache.py
│   │   ├── document.py
│   │   ├── knowledge_base
│   │   │   ├── __init__.py
│   │   │   ├── feedback.py
│   │   │   ├── manager.py
│   │   │   ├── rag_engine.py
│   │   │   ├── retriever.py
│   │   │   └── utils.py
│   │   ├── prompts
│   │   │   ├── __init__.py
│   │   │   ├── repository.py
│   │   │   └── templates.py
│   │   ├── prompts.py
│   │   └── vector
│   │       ├── __init__.py
│   │       ├── embeddings.py
│   │       └── vector_service.py
│   ├── tool_token_counter.py
│   ├── tools
│   │   ├── __init__.py
│   │   ├── audio_transcription.py
│   │   ├── base.py
│   │   ├── completion.py
│   │   ├── docstring_refiner.py
│   │   ├── document_conversion_and_processing.py
│   │   ├── enhanced-ums-lookbook.html
│   │   ├── entity_relation_graph.py
│   │   ├── excel_spreadsheet_automation.py
│   │   ├── extraction.py
│   │   ├── filesystem.py
│   │   ├── html_to_markdown.py
│   │   ├── local_text_tools.py
│   │   ├── marqo_fused_search.py
│   │   ├── meta_api_tool.py
│   │   ├── ocr_tools.py
│   │   ├── optimization.py
│   │   ├── provider.py
│   │   ├── pyodide_boot_template.html
│   │   ├── python_sandbox.py
│   │   ├── rag.py
│   │   ├── redline-compiled.css
│   │   ├── sentiment_analysis.py
│   │   ├── single_shot_synthesis.py
│   │   ├── smart_browser.py
│   │   ├── sql_databases.py
│   │   ├── text_classification.py
│   │   ├── text_redline_tools.py
│   │   ├── tournament.py
│   │   ├── ums_explorer.html
│   │   └── unified_memory_system.py
│   ├── utils
│   │   ├── __init__.py
│   │   ├── async_utils.py
│   │   ├── display.py
│   │   ├── logging
│   │   │   ├── __init__.py
│   │   │   ├── console.py
│   │   │   ├── emojis.py
│   │   │   ├── formatter.py
│   │   │   ├── logger.py
│   │   │   ├── panels.py
│   │   │   ├── progress.py
│   │   │   └── themes.py
│   │   ├── parse_yaml.py
│   │   ├── parsing.py
│   │   ├── security.py
│   │   └── text.py
│   └── working_memory_api.py
├── unified_memory_system_technical_analysis.md
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/ultimate_mcp_server/cli/commands.py:
--------------------------------------------------------------------------------

```python
  1 | """Command implementations for the Ultimate MCP Server CLI."""
  2 | import asyncio
  3 | import inspect
  4 | import os
  5 | import sys
  6 | import time
  7 | from typing import List, Optional
  8 | 
  9 | from rich.console import Console
 10 | from rich.progress import (
 11 |     BarColumn,
 12 |     Progress,
 13 |     SpinnerColumn,
 14 |     TaskProgressColumn,
 15 |     TextColumn,
 16 |     TimeElapsedColumn,
 17 | )
 18 | from rich.rule import Rule
 19 | from rich.table import Table
 20 | 
 21 | from ultimate_mcp_server.config import get_config
 22 | from ultimate_mcp_server.constants import BASE_TOOLSET_CATEGORIES, Provider
 23 | from ultimate_mcp_server.core.providers.base import get_provider
 24 | from ultimate_mcp_server.core.server import Gateway, start_server
 25 | from ultimate_mcp_server.graceful_shutdown import enable_quiet_shutdown
 26 | from ultimate_mcp_server.services.cache import get_cache_service
 27 | from ultimate_mcp_server.utils import get_logger
 28 | 
 29 | logger = get_logger(__name__)
 30 | console = Console(file=sys.stderr)  # Use stderr to avoid interfering with MCP protocol
 31 | 
 32 | 
 33 | def run_server(
 34 |     host: Optional[str] = None,
 35 |     port: Optional[int] = None,
 36 |     workers: Optional[int] = None,
 37 |     log_level: Optional[str] = None,
 38 |     transport_mode: str = "sse",
 39 |     include_tools: Optional[List[str]] = None,
 40 |     exclude_tools: Optional[List[str]] = None,
 41 |     load_all_tools: bool = False,
 42 | ) -> None:
 43 |     """Start the Ultimate MCP Server with specified configuration.
 44 |     
 45 |     This function initializes and launches the MCP-compliant server that provides
 46 |     an API for AI agents to use various tools and capabilities. The server uses 
 47 |     FastAPI under the hood and can be customized through configuration overrides.
 48 |     Server settings from command-line arguments take precedence over .env and config file values.
 49 |     
 50 |     Args:
 51 |         host: Network interface to bind to (e.g., '127.0.0.1' for local, '0.0.0.0' for all)
 52 |         port: TCP port to listen on (default: 8013 from config)
 53 |         workers: Number of parallel worker processes (more workers = higher concurrency)
 54 |         log_level: Logging verbosity (debug, info, warning, error, critical)
 55 |         transport_mode: Communication protocol mode ('sse' for Server-Sent Events streaming
 56 |                         or 'stdio' for standard input/output in certain environments)
 57 |         include_tools: Allowlist of specific tool names to register (if not specified, all
 58 |                        available tools are registered)
 59 |         exclude_tools: Blocklist of specific tool names to exclude (takes precedence over
 60 |                        include_tools if both are specified)
 61 |         load_all_tools: If True, load all available tools. If False (default), load only 
 62 |                        the Base Toolset (completion, filesystem, optimization, provider, 
 63 |                        local_text, meta, search).
 64 |     """
 65 |     # Set up graceful shutdown handling
 66 |     enable_quiet_shutdown()
 67 |     
 68 |     # Get the current config
 69 |     cfg = get_config()
 70 |     
 71 |     # Override config with provided values
 72 |     if host:
 73 |         cfg.server.host = host
 74 |     if port:
 75 |         cfg.server.port = port
 76 |     if workers:
 77 |         cfg.server.workers = workers
 78 |     
 79 |     # Update tool registration config
 80 |     if include_tools or exclude_tools:
 81 |         cfg.tool_registration.filter_enabled = True
 82 |         
 83 |     if include_tools:
 84 |         cfg.tool_registration.included_tools = include_tools
 85 |         
 86 |     if exclude_tools:
 87 |         cfg.tool_registration.excluded_tools = exclude_tools
 88 |     
 89 |     # Determine effective log level
 90 |     effective_log_level = log_level or getattr(cfg, 'log_level', 'info')
 91 |     
 92 |     # Print server info
 93 |     console.print("[bold blue]Starting Ultimate MCP Server server[/bold blue]")
 94 |     console.print(f"Host: [cyan]{cfg.server.host}[/cyan]")
 95 |     console.print(f"Port: [cyan]{cfg.server.port}[/cyan]")
 96 |     console.print(f"Workers: [cyan]{cfg.server.workers}[/cyan]")
 97 |     console.print(f"Log level: [cyan]{effective_log_level.upper()}[/cyan]")
 98 |     console.print(f"Transport mode: [cyan]{transport_mode}[/cyan]")
 99 |     
100 |     # Tool Loading Status
101 |     if load_all_tools:
102 |         console.print("Tool Loading: [yellow]All Available Tools[/yellow]")
103 |     else:
104 |         console.print("Tool Loading: [yellow]Base Toolset Only[/yellow] (Use --load-all-tools to load all)")
105 |         # Format the categories for display
106 |         console.print("  [bold]Includes:[/bold]")
107 |         for category, tools in BASE_TOOLSET_CATEGORIES.items():
108 |             console.print(f"    [cyan]{category}[/cyan]: {', '.join(tools)}")
109 |     
110 |     # Print tool filtering info if enabled
111 |     if cfg.tool_registration.filter_enabled:
112 |         if cfg.tool_registration.included_tools:
113 |             console.print(f"Including tools: [cyan]{', '.join(cfg.tool_registration.included_tools)}[/cyan]")
114 |         if cfg.tool_registration.excluded_tools:
115 |             console.print(f"Excluding tools: [red]{', '.join(cfg.tool_registration.excluded_tools)}[/red]")
116 |     
117 |     console.print()
118 |     
119 |     # Set environment variables for the tool context estimator to use
120 |     os.environ["MCP_SERVER_HOST"] = cfg.server.host
121 |     os.environ["MCP_SERVER_PORT"] = str(cfg.server.port)
122 |     
123 |     # Just run the server directly - no threading, no token analysis
124 |     start_server(
125 |         host=cfg.server.host,
126 |         port=cfg.server.port,
127 |         workers=cfg.server.workers,
128 |         log_level=effective_log_level,
129 |         transport_mode=transport_mode,
130 |         include_tools=cfg.tool_registration.included_tools if cfg.tool_registration.filter_enabled else None,
131 |         exclude_tools=cfg.tool_registration.excluded_tools if cfg.tool_registration.filter_enabled else None,
132 |         load_all_tools=load_all_tools,
133 |     )
134 | 
135 | 
136 | async def list_providers(check_keys: bool = False, list_models: bool = False) -> None:
137 |     """Display information about configured LLM providers and their status.
138 |     
139 |     This function queries all supported LLM providers (OpenAI, Anthropic, Gemini, etc.),
140 |     displays their configuration status, and can optionally verify API key validity 
141 |     and list available models. This is useful for diagnosing configuration issues 
142 |     or exploring what models are accessible through each provider.
143 |     
144 |     The basic output shows provider names, enabled status, API key presence,
145 |     and default model configuration. Additional information is available through
146 |     the optional parameters.
147 |     
148 |     Args:
149 |         check_keys: When True, tests each configured API key against the respective
150 |                    provider's authentication endpoint to verify it's valid and active
151 |         list_models: When True, queries each provider's model list endpoint and displays
152 |                     all models available for use with your current credentials
153 |     """
154 |     # Get the current config
155 |     cfg = get_config()
156 |     
157 |     # Create provider table
158 |     table = Table(title="Available LLM Providers")
159 |     table.add_column("Provider", style="cyan")
160 |     table.add_column("Enabled", style="green")
161 |     table.add_column("API Key", style="yellow")
162 |     table.add_column("Default Model", style="blue")
163 |     
164 |     # Add spinner during provider initialization
165 |     with Progress(
166 |         SpinnerColumn(),
167 |         TextColumn("[bold blue]Initializing providers...[/bold blue]"),
168 |         transient=True
169 |     ) as progress:
170 |         progress.add_task("init", total=None)
171 |         
172 |         # Create Gateway instance (initializes all providers)
173 |         gateway = Gateway()
174 |         while not hasattr(gateway, 'provider_status') or not gateway.provider_status:
175 |             await asyncio.sleep(0.1)
176 |     
177 |     # Get provider status
178 |     provider_status = gateway.provider_status
179 |     
180 |     # Add rows to table
181 |     for provider_name in [p.value for p in Provider]:
182 |         status = provider_status.get(provider_name, None)
183 |         
184 |         if status:
185 |             enabled = "✅" if status.enabled else "❌"
186 |             api_key = "✅" if status.api_key_configured else "❌"
187 |             
188 |             # Get default model
189 |             provider_cfg = getattr(cfg, 'providers', {}).get(provider_name, None)
190 |             default_model = provider_cfg.default_model if provider_cfg else "N/A"
191 |             
192 |             table.add_row(provider_name, enabled, api_key, default_model)
193 |     
194 |     # Print table
195 |     console.print(table)
196 |     
197 |     # Check API keys if requested
198 |     if check_keys:
199 |         console.print("\n[bold]API Key Check:[/bold]")
200 |         
201 |         with Progress(
202 |             SpinnerColumn(),
203 |             TextColumn("[bold blue]Checking API keys...[/bold blue]"),
204 |             transient=True
205 |         ) as progress:
206 |             progress.add_task("check", total=None)
207 |             
208 |             for provider_name in [p.value for p in Provider]:
209 |                 console.print(Rule(f"[bold cyan]{provider_name}[/bold cyan]", align="center"))
210 |                 status = provider_status.get(provider_name, None)
211 |                 
212 |                 if status and status.api_key_configured:
213 |                     try:
214 |                         # Get provider instance
215 |                         provider = get_provider(provider_name)
216 |                         
217 |                         # Check API key
218 |                         valid = await provider.check_api_key()
219 |                         
220 |                         status_text = "[green]valid[/green]" if valid else "[red]invalid[/red]"
221 |                         console.print(f"API Key Status: {status_text}")
222 |                         
223 |                     except Exception as e:
224 |                         console.print(f"[red]Error checking API key: {str(e)}[/red]")
225 |                 else:
226 |                     if status:
227 |                         console.print("API Key Status: [yellow]not configured[/yellow]")
228 |                     else:
229 |                         console.print("API Key Status: [dim]Provider not configured/enabled[/dim]")
230 |                 console.print() # Add spacing after each provider's check
231 |     
232 |     # List models if requested
233 |     if list_models:
234 |         console.print("\n[bold]Available Models:[/bold]")
235 |         
236 |         with Progress(
237 |             SpinnerColumn(),
238 |             TextColumn("[bold blue]Loading models...[/bold blue]"),
239 |             transient=True
240 |         ) as progress:
241 |             progress.add_task("load", total=None)
242 |             
243 |             for provider_name in [p.value for p in Provider]:
244 |                 status = provider_status.get(provider_name, None)
245 |                 
246 |                 if status and status.available:
247 |                     provider_instance = gateway.providers.get(provider_name)
248 |                     
249 |                     if provider_instance:
250 |                         console.print(Rule(f"[bold cyan]{provider_name} Models[/bold cyan]", align="center"))
251 |                         try:
252 |                             # Get models
253 |                             models = await provider_instance.list_models()
254 |                             
255 |                             # Create model table
256 |                             model_table = Table(box=None, show_header=True, header_style="bold blue")
257 |                             model_table.add_column("Model ID", style="cyan")
258 |                             model_table.add_column("Description", style="green")
259 |                             
260 |                             if models:
261 |                                 for model in models:
262 |                                     model_table.add_row(
263 |                                         model["id"],
264 |                                         model.get("description", "[dim]N/A[/dim]")
265 |                                     )
266 |                                 console.print(model_table)
267 |                             else:
268 |                                 console.print("[yellow]No models found or returned by provider.[/yellow]")
269 |                             
270 |                         except Exception as e:
271 |                             console.print(f"[red]Error listing models for {provider_name}: {str(e)}[/red]")
272 |                 else:
273 |                     if status:
274 |                         console.print(Rule(f"[bold dim]{provider_name}[/bold dim]", align="center"))
275 |                         console.print("[yellow]Provider not available or configured.[/yellow]")
276 |                 console.print() # Add spacing after each provider's models
277 | 
278 | 
279 | async def test_provider(provider: str, model: Optional[str] = None, prompt: str = "Hello, world!") -> None:
280 |     """Test an LLM provider's functionality with a complete request-response cycle.
281 |     
282 |     This function conducts an end-to-end test of a specified LLM provider by 
283 |     sending a sample prompt and displaying the resulting completion. It measures
284 |     performance metrics such as response time, token usage, and estimated cost.
285 |     
286 |     The test verifies:
287 |     - API key validity and configuration
288 |     - Connection to the provider's endpoint
289 |     - Model availability and functionality
290 |     - Response generation capabilities
291 |     - Cost estimation correctness
292 |     
293 |     Results include the generated text, model used, token counts (input/output),
294 |     estimated cost, and response time, providing a comprehensive health check.
295 |     
296 |     Args:
297 |         provider: Provider identifier (e.g., 'openai', 'anthropic', 'gemini')
298 |         model: Specific model to use for the test (if None, uses the provider's default model)
299 |         prompt: Text prompt to send to the model (defaults to a simple greeting)
300 |     """
301 |     console.print(f"[bold]Testing provider:[/bold] [cyan]{provider}[/cyan]")
302 |     console.print(f"[bold]Model:[/bold] [cyan]{model or 'default'}[/cyan]")
303 |     console.print(f'[bold]Prompt:[/bold] [green]"{prompt}"[/green]')
304 |     console.print()
305 |     
306 |     try:
307 |         # Get provider instance
308 |         provider_instance = get_provider(provider)
309 |         
310 |         # Initialize provider
311 |         await provider_instance.initialize()
312 |         
313 |         # Show spinner during generation
314 |         with Progress(
315 |             SpinnerColumn(),
316 |             TextColumn("[bold blue]Generating completion...[/bold blue]"),
317 |             transient=True
318 |         ) as progress:
319 |             progress.add_task("generate", total=None)
320 |             
321 |             # Generate completion
322 |             start_time = time.time()
323 |             result = await provider_instance.generate_completion(
324 |                 prompt=prompt,
325 |                 model=model,
326 |                 temperature=0.7
327 |             )
328 |             elapsed_time = time.time() - start_time
329 |         
330 |         # Print result
331 |         console.print("[bold cyan]Generated text:[/bold cyan]")
332 |         console.print(result.text)
333 |         console.print()
334 |         
335 |         # Print metrics
336 |         console.print(f"[bold]Model used:[/bold] [cyan]{result.model}[/cyan]")
337 |         console.print(f"[bold]Tokens:[/bold] [yellow]{result.input_tokens}[/yellow] input, [yellow]{result.output_tokens}[/yellow] output, [yellow]{result.total_tokens}[/yellow] total")
338 |         console.print(f"[bold]Cost:[/bold] [green]${result.cost:.6f}[/green]")
339 |         console.print(f"[bold]Time:[/bold] [blue]{elapsed_time:.2f}s[/blue]")
340 |         
341 |     except Exception as e:
342 |         console.print(f"[bold red]Error testing provider:[/bold red] {str(e)}")
343 | 
344 | 
345 | async def generate_completion(
346 |     provider: str,
347 |     model: Optional[str] = None,
348 |     prompt: str = "",
349 |     temperature: float = 0.7,
350 |     max_tokens: Optional[int] = None,
351 |     system: Optional[str] = None,
352 |     stream: bool = False
353 | ) -> None:
354 |     """Generate text from an LLM provider directly through the CLI.
355 |     
356 |     This function provides direct access to LLM text generation capabilities 
357 |     without requiring an MCP server or client. It supports both synchronous 
358 |     (wait for full response) and streaming (token-by-token) output modes.
359 |     
360 |     For providers that natively support system prompts (like Anthropic), 
361 |     the system parameter is passed directly to the API. For other providers,
362 |     the system message is prepended to the prompt with appropriate formatting.
363 |     
364 |     The function displays:
365 |     - Generated text (with real-time streaming if requested)
366 |     - Model information
367 |     - Token usage statistics (input/output)
368 |     - Cost estimate
369 |     - Response time
370 |     
371 |     Args:
372 |         provider: LLM provider identifier (e.g., 'openai', 'anthropic', 'gemini')
373 |         model: Specific model ID to use (if None, uses provider's default model)
374 |         prompt: The text prompt to send to the model
375 |         temperature: Sampling temperature (0.0-2.0) controlling output randomness
376 |                      (lower = more deterministic, higher = more creative)
377 |         max_tokens: Maximum number of tokens to generate in the response
378 |                     (if None, uses provider's default maximum)
379 |         system: Optional system message for setting context/behavior
380 |                 (handled differently depending on provider capabilities)
381 |         stream: When True, displays generated tokens as they arrive rather than
382 |                 waiting for the complete response (may affect metric reporting)
383 |     """
384 |     try:
385 |         # Get provider instance
386 |         provider_instance = get_provider(provider)
387 |         
388 |         # Initialize provider
389 |         await provider_instance.initialize()
390 |         
391 |         # Set extra parameters based on provider
392 |         kwargs = {}
393 |         if system:
394 |             if provider == Provider.ANTHROPIC.value:
395 |                 kwargs["system"] = system
396 |             else:
397 |                 # For other providers, prepend system message to prompt
398 |                 prompt = f"System: {system}\n\nUser: {prompt}"
399 |         
400 |         # Show progress for non-streaming generation
401 |         if not stream:
402 |             with Progress(
403 |                 SpinnerColumn(),
404 |                 TextColumn("[bold blue]Generating completion...[/bold blue]"),
405 |                 transient=True
406 |             ) as progress:
407 |                 progress.add_task("generate", total=None)
408 |                 
409 |                 # Generate completion
410 |                 start_time = time.time()
411 |                 result = await provider_instance.generate_completion(
412 |                     prompt=prompt,
413 |                     model=model,
414 |                     temperature=temperature,
415 |                     max_tokens=max_tokens,
416 |                     **kwargs
417 |                 )
418 |                 elapsed_time = time.time() - start_time
419 |             
420 |             # Print result
421 |             console.print(f"[cyan]{result.text}[/cyan]")
422 |             console.print()
423 |             
424 |             # Print metrics
425 |             console.print(f"[bold]Model:[/bold] [blue]{result.model}[/blue]")
426 |             console.print(f"[bold]Tokens:[/bold] [yellow]{result.input_tokens}[/yellow] input, [yellow]{result.output_tokens}[/yellow] output")
427 |             console.print(f"[bold]Cost:[/bold] [green]${result.cost:.6f}[/green]")
428 |             console.print(f"[bold]Time:[/bold] [blue]{elapsed_time:.2f}s[/blue]")
429 |             
430 |         else:
431 |             # Streaming generation
432 |             console.print("[bold blue]Generating completion (streaming)...[/bold blue]")
433 |             
434 |             # Generate streaming completion
435 |             start_time = time.time()
436 |             stream = provider_instance.generate_completion_stream(
437 |                 prompt=prompt,
438 |                 model=model,
439 |                 temperature=temperature,
440 |                 max_tokens=max_tokens,
441 |                 **kwargs
442 |             )
443 |             
444 |             # Process stream
445 |             full_text = ""
446 |             async for chunk, metadata in stream:  # noqa: B007
447 |                 console.print(chunk, end="")
448 |                 sys.stderr.flush()  # Use stderr instead of stdout
449 |                 full_text += chunk
450 |             
451 |             elapsed_time = time.time() - start_time
452 |             
453 |             # Print metrics
454 |             console.print("\n")
455 |             console.print(f"[bold]Model:[/bold] [blue]{metadata.get('model', model or 'unknown')}[/blue]")
456 |             console.print(f"[bold]Time:[/bold] [blue]{elapsed_time:.2f}s[/blue]")
457 |             
458 |     except Exception as e:
459 |         console.print(f"[bold red]Error generating completion:[/bold red] {str(e)}")
460 | 
461 | 
462 | async def check_cache(show_status: bool = True, clear: bool = False) -> None:
463 |     """View cache statistics and optionally clear the LLM response cache.
464 |     
465 |     The server employs a caching system that stores LLM responses to avoid
466 |     redundant API calls when the same or similar requests are made repeatedly.
467 |     This significantly reduces API costs and improves response times.
468 |     
469 |     This function displays comprehensive cache information including:
470 |     - Basic configuration (enabled status, TTL, max size)
471 |     - Storage details (persistence, directory location)
472 |     - Performance metrics (hit/miss counts, hit ratio percentage)
473 |     - Efficiency data (estimated cost savings, total tokens saved)
474 |     
475 |     Clearing the cache removes all stored responses, which might be useful
476 |     when testing changes, addressing potential staleness, or reclaiming disk space.
477 |     
478 |     Args:
479 |         show_status: When True, displays detailed cache statistics and configuration
480 |         clear: When True, purges all entries from the cache (requires confirmation)
481 |     """
482 |     # Get cache service
483 |     cache_service = get_cache_service()
484 |     
485 |     if clear:
486 |         # Clear cache
487 |         console.print("[bold]Clearing cache...[/bold]")
488 |         cache_service.clear()
489 |         console.print("[green]Cache cleared successfully[/green]")
490 |     
491 |     if show_status:
492 |         # Get cache stats
493 |         stats = cache_service.get_stats()
494 |         
495 |         # Create status table
496 |         table = Table(title="Cache Status")
497 |         table.add_column("Setting", style="cyan")
498 |         table.add_column("Value", style="green")
499 |         
500 |         # Add rows
501 |         table.add_row("Enabled", "✅" if stats["enabled"] else "❌")
502 |         table.add_row("Size", f"{stats['size']} / {stats['max_size']} entries")
503 |         table.add_row("TTL", f"{stats['ttl']} seconds")
504 |         table.add_row("Persistence", "✅" if stats["persistence"]["enabled"] else "❌")
505 |         table.add_row("Cache Directory", stats["persistence"]["cache_dir"])
506 |         table.add_row("Fuzzy Matching", "✅" if stats["fuzzy_matching"] else "❌")
507 |         
508 |         # Print table
509 |         console.print(table)
510 |         
511 |         # Create stats table
512 |         stats_table = Table(title="Cache Statistics")
513 |         stats_table.add_column("Metric", style="cyan")
514 |         stats_table.add_column("Value", style="green")
515 |         
516 |         # Add rows
517 |         cache_stats = stats["stats"]
518 |         stats_table.add_row("Hits", str(cache_stats["hits"]))
519 |         stats_table.add_row("Misses", str(cache_stats["misses"]))
520 |         stats_table.add_row("Hit Ratio", f"{cache_stats['hit_ratio']:.2%}")
521 |         stats_table.add_row("Stores", str(cache_stats["stores"]))
522 |         stats_table.add_row("Evictions", str(cache_stats["evictions"]))
523 |         stats_table.add_row("Total Saved Tokens", f"{cache_stats['total_saved_tokens']:,}")
524 |         stats_table.add_row("Estimated Cost Savings", f"${cache_stats['estimated_cost_savings']:.6f}")
525 |         
526 |         # Print table
527 |         console.print(stats_table)
528 | 
529 | 
530 | async def benchmark_providers(
531 |     providers: List[str] = None,
532 |     models: List[str] = None,
533 |     prompt: Optional[str] = None,
534 |     runs: int = 3
535 | ) -> None:
536 |     """Compare performance metrics across different LLM providers and models.
537 |     
538 |     This benchmark utility sends identical prompts to multiple provider/model
539 |     combinations and measures various performance characteristics. It runs
540 |     each test multiple times to establish average metrics for more reliable
541 |     comparison.
542 |     
543 |     Benchmarks measure:
544 |     - Response time: How long it takes to receive a complete response
545 |     - Processing speed: Tokens per second throughput
546 |     - Token efficiency: Input/output token ratio
547 |     - Cost: Estimated price per request
548 |     
549 |     Results are presented in a table format for easy comparison. This helps
550 |     identify the optimal provider/model for specific needs based on speed,
551 |     cost, or quality considerations.
552 |     
553 |     Args:
554 |         providers: List of provider identifiers to benchmark (e.g., ['openai', 'anthropic'])
555 |                   If None, benchmarks all available and configured providers
556 |         models: List of specific model IDs to test (e.g., ['gpt-4o', 'claude-3-5-haiku'])
557 |                If None, uses each provider's default model
558 |         prompt: Text prompt to use for testing (should be identical across all providers)
559 |                 If None, uses a default explanation prompt
560 |         runs: Number of test iterations to run for each provider/model combination
561 |               (higher values produce more reliable averages but take longer)
562 |     """
563 |     # Use default providers if not specified
564 |     if not providers:
565 |         providers = [p.value for p in Provider]
566 |     
567 |     # Set default prompt if not provided
568 |     if not prompt:
569 |         prompt = "Explain the concept of quantum computing in simple terms that a high school student would understand."
570 |     
571 |     console.print(f"[bold]Running benchmark with {runs} runs per provider/model[/bold]")
572 |     console.print(f'[bold]Prompt:[/bold] [green]"{prompt}"[/green]')
573 |     console.print()
574 |     
575 |     # Create results table
576 |     table = Table(title="Benchmark Results")
577 |     table.add_column("Provider", style="cyan")
578 |     table.add_column("Model", style="blue")
579 |     table.add_column("Avg. Time (s)", style="green")
580 |     table.add_column("Tokens/Sec", style="yellow")
581 |     table.add_column("Avg. Cost ($)", style="magenta")
582 |     table.add_column("Input Tokens", style="dim")
583 |     table.add_column("Output Tokens", style="dim")
584 |     
585 |     # Track benchmarks for progress bar
586 |     total_benchmarks = 0
587 |     for provider_name in providers:
588 |         try:
589 |             provider_instance = get_provider(provider_name)
590 |             await provider_instance.initialize()
591 |             
592 |             # Get available models
593 |             available_models = await provider_instance.list_models()
594 |             
595 |             # Filter models if specified
596 |             if models:
597 |                 available_models = [m for m in available_models if m["id"] in models]
598 |             else:
599 |                 # Use default model if no models specified
600 |                 default_model = provider_instance.get_default_model()
601 |                 available_models = [m for m in available_models if m["id"] == default_model]
602 |             
603 |             total_benchmarks += len(available_models)
604 |             
605 |         except Exception:
606 |             # Skip providers that can't be initialized
607 |             pass
608 |     
609 |     # Run benchmarks with progress bar
610 |     with Progress(
611 |         TextColumn("[progress.description]{task.description}"),
612 |         BarColumn(),
613 |         TaskProgressColumn(),
614 |         TimeElapsedColumn()
615 |     ) as progress:
616 |         benchmark_task = progress.add_task("[bold blue]Running benchmarks...", total=total_benchmarks * runs)
617 |         
618 |         for provider_name in providers:
619 |             try:
620 |                 # Get provider instance
621 |                 provider_instance = get_provider(provider_name)
622 |                 await provider_instance.initialize()
623 |                 
624 |                 # Get available models
625 |                 available_models = await provider_instance.list_models()
626 |                 
627 |                 # Filter models if specified
628 |                 if models:
629 |                     available_models = [m for m in available_models if m["id"] in models]
630 |                 else:
631 |                     # Use default model if no models specified
632 |                     default_model = provider_instance.get_default_model()
633 |                     available_models = [m for m in available_models if m["id"] == default_model]
634 |                 
635 |                 for model_info in available_models:
636 |                     model_id = model_info["id"]
637 |                     
638 |                     # Run benchmark for this model
639 |                     total_time = 0.0
640 |                     total_cost = 0.0
641 |                     total_input_tokens = 0
642 |                     total_output_tokens = 0
643 |                     total_tokens = 0
644 |                     
645 |                     for run in range(runs):
646 |                         try:
647 |                             # Update progress description
648 |                             progress.update(
649 |                                 benchmark_task,
650 |                                 description=f"[bold blue]Benchmarking {provider_name}/{model_id} (Run {run+1}/{runs})"
651 |                             )
652 |                             
653 |                             # Run benchmark
654 |                             start_time = time.time()
655 |                             result = await provider_instance.generate_completion(
656 |                                 prompt=prompt,
657 |                                 model=model_id,
658 |                                 temperature=0.7
659 |                             )
660 |                             run_time = time.time() - start_time
661 |                             
662 |                             # Record metrics
663 |                             total_time += run_time
664 |                             total_cost += result.cost
665 |                             total_input_tokens += result.input_tokens
666 |                             total_output_tokens += result.output_tokens
667 |                             total_tokens += result.total_tokens
668 |                             
669 |                             # Update progress
670 |                             progress.advance(benchmark_task)
671 |                             
672 |                         except Exception as e:
673 |                             console.print(f"[red]Error in run {run+1} for {provider_name}/{model_id}: {str(e)}[/red]")
674 |                             # Still advance progress
675 |                             progress.advance(benchmark_task)
676 |                     
677 |                     # Calculate averages
678 |                     avg_time = total_time / max(1, runs)
679 |                     avg_cost = total_cost / max(1, runs)
680 |                     avg_input_tokens = total_input_tokens // max(1, runs)
681 |                     avg_output_tokens = total_output_tokens // max(1, runs)
682 |                     
683 |                     # Calculate tokens per second
684 |                     tokens_per_second = total_tokens / total_time if total_time > 0 else 0
685 |                     
686 |                     # Add to results table
687 |                     table.add_row(
688 |                         provider_name,
689 |                         model_id,
690 |                         f"{avg_time:.2f}",
691 |                         f"{tokens_per_second:.1f}",
692 |                         f"{avg_cost:.6f}",
693 |                         str(avg_input_tokens),
694 |                         str(avg_output_tokens)
695 |                     )
696 |                     
697 |             except Exception as e:
698 |                 console.print(f"[red]Error benchmarking provider {provider_name}: {str(e)}[/red]")
699 |     
700 |     # Print results
701 |     console.print(table)
702 | 
703 | 
704 | async def list_tools(category: Optional[str] = None) -> None:
705 |     """Display all available MCP tools registered in the server.
706 |     
707 |     This function provides a comprehensive listing of tools that can be called
708 |     through the Model Context Protocol (MCP) interface. Each tool represents
709 |     a specific capability that AI agents can access, from text generation to
710 |     filesystem operations, browser automation, database access, and more.
711 |     
712 |     Tools are organized into functional categories such as:
713 |     - completion: Text generation capabilities
714 |     - document: Document processing and analysis
715 |     - extraction: Structured data extraction from text
716 |     - filesystem: File and directory operations
717 |     - browser: Web browsing and automation
718 |     - rag: Retrieval-augmented generation
719 |     - database: SQL database interactions
720 |     - meta: Self-reflection and tool discovery
721 |     
722 |     The listing includes tool names, categories, and brief descriptions.
723 |     The output also provides usage hints for filtering which tools are
724 |     enabled when starting the server.
725 |     
726 |     Args:
727 |         category: When specified, only shows tools belonging to the given
728 |                  category (e.g., 'filesystem', 'document', 'browser')
729 |     """
730 |     # Import tools module to get the list of available tools
731 |     from ultimate_mcp_server.tools import STANDALONE_TOOL_FUNCTIONS
732 |     
733 |     # Create tools table
734 |     table = Table(title="Available Ultimate MCP Server Tools")
735 |     table.add_column("Tool Name", style="cyan")
736 |     table.add_column("Category", style="green")
737 |     table.add_column("Description", style="yellow")
738 |     
739 |     # Define tool categories
740 |     categories = {
741 |         "completion": ["generate_completion", "stream_completion", "chat_completion", "multi_completion"],
742 |         "provider": ["get_provider_status", "list_models"],
743 |         "tournament": ["create_tournament", "get_tournament_status", "list_tournaments", "get_tournament_results", "cancel_tournament"],
744 |         "document": ["chunk_document", "summarize_document", "extract_entities", "generate_qa_pairs", "process_document_batch"],
745 |         "extraction": ["extract_json", "extract_table", "extract_key_value_pairs", "extract_semantic_schema", "extract_entity_graph", "extract_code_from_response"],
746 |         "filesystem": ["read_file", "read_multiple_files", "write_file", "edit_file", "create_directory", "list_directory", "directory_tree", "move_file", "search_files", "get_file_info", "list_allowed_directories"],
747 |         "rag": ["create_knowledge_base", "list_knowledge_bases", "delete_knowledge_base", "add_documents", "retrieve_context", "generate_with_rag"],
748 |         "meta": ["get_tool_info", "get_llm_instructions", "get_tool_recommendations", "register_api_meta_tools"],
749 |         "search": ["marqo_fused_search"],
750 |         "ocr": ["extract_text_from_pdf", "process_image_ocr", "enhance_ocr_text", "analyze_pdf_structure", "batch_process_documents"],
751 |         "optimization": ["estimate_cost", "compare_models", "recommend_model", "execute_optimized_workflow"],
752 |         "database": ["connect_to_database", "disconnect_from_database", "discover_database_schema", "execute_query", "generate_database_documentation", "get_table_details", "find_related_tables", "analyze_column_statistics", "execute_parameterized_query", "create_database_view", "create_database_index", "test_connection", "execute_transaction", "execute_query_with_pagination", "get_database_status"],
753 |         "audio": ["transcribe_audio", "extract_audio_transcript_key_points", "chat_with_transcript"],
754 |         "browser": ["browser_init", "browser_navigate", "browser_click", "browser_type", "browser_screenshot", "browser_close", "browser_select", "browser_checkbox", "browser_get_text", "browser_get_attributes", "browser_execute_javascript", "browser_wait", "execute_web_workflow", "extract_structured_data_from_pages", "find_and_download_pdfs", "multi_engine_search_summary"],
755 |         "classification": ["text_classification"],
756 |     }
757 |     
758 |     # Find category for each tool
759 |     tool_categories = {}
760 |     for cat_name, tools in categories.items():
761 |         for tool in tools:
762 |             tool_categories[tool] = cat_name
763 |     
764 |     # Add rows to table
765 |     for tool_func in STANDALONE_TOOL_FUNCTIONS:
766 |         if callable(tool_func):
767 |             tool_name = getattr(tool_func, "__name__", str(tool_func))
768 |             tool_category = tool_categories.get(tool_name, "other")
769 |             
770 |             # Skip if category filter is provided and doesn't match
771 |             if category and category.lower() != tool_category.lower():
772 |                 continue
773 |                 
774 |             # Get docstring (first line only for description)
775 |             docstring = inspect.getdoc(tool_func) or ""
776 |             description = docstring.split("\n")[0] if docstring else ""
777 |             
778 |             table.add_row(tool_name, tool_category, description)
779 |     
780 |     # Add the special meta tool registrars
781 |     if not category or category.lower() in ["meta", "other"]:
782 |         if not category or category.lower() == "meta":
783 |             table.add_row("register_api_meta_tools", "meta", "Register Meta API tools")
784 |     
785 |     # Sort table by category and tool name
786 |     console.print(table)
787 |     
788 |     # Print usage hint
789 |     console.print("\n[bold]Usage with tool filtering:[/bold]")
790 |     console.print("To include only specific tools:")
791 |     console.print("  umcp run --include-tools tool1 tool2 tool3")
792 |     console.print("\nTo exclude specific tools:")
793 |     console.print("  umcp run --exclude-tools tool1 tool2 tool3")
794 |     console.print("\nTo include tools by category:")
795 |     console.print("  umcp tools --category filesystem  # List filesystem tools")
796 |     console.print("  umcp run --include-tools read_file write_file edit_file  # Include only these filesystem tools")
```

--------------------------------------------------------------------------------
/ultimate_mcp_server/tools/extraction.py:
--------------------------------------------------------------------------------

```python
  1 | """Advanced extraction tools for Ultimate MCP Server.
  2 | 
  3 | This module provides tools for extracting structured data (JSON, tables, key-value pairs, code)
  4 | from unstructured or semi-structured text using LLMs.
  5 | """
  6 | 
  7 | import asyncio
  8 | import json
  9 | import re  # Added for code extraction
 10 | import time
 11 | from typing import Any, Dict, List, Optional, Tuple
 12 | 
 13 | import jsonschema
 14 | 
 15 | from ultimate_mcp_server.constants import Provider
 16 | 
 17 | # Removed CompletionRequest import as not directly used by standalone functions
 18 | from ultimate_mcp_server.exceptions import ProviderError, ToolInputError
 19 | from ultimate_mcp_server.tools.base import BaseTool, with_error_handling, with_tool_metrics
 20 | 
 21 | # Import the standardized completion tool
 22 | from ultimate_mcp_server.tools.completion import generate_completion
 23 | from ultimate_mcp_server.utils import get_logger
 24 | 
 25 | logger = get_logger("ultimate_mcp_server.tools.extraction")
 26 | 
 27 | def _extract_and_parse_json(text: str) -> Tuple[Any, bool, Optional[str]]:
 28 |     """
 29 |     Robust utility to extract and parse JSON from text, handling various formats and edge cases.
 30 |     
 31 |     Args:
 32 |         text: The text that may contain JSON.
 33 |         
 34 |     Returns:
 35 |         Tuple of (parsed_data, success_flag, error_message)
 36 |     """
 37 |     # Start with a clean slate
 38 |     text = text.strip()
 39 |     error_message = None
 40 |     
 41 |     # Try a series of increasingly aggressive extraction techniques
 42 |     extraction_methods = [
 43 |         # Method 1: Direct parsing if it's already valid JSON
 44 |         lambda t: json.loads(t),
 45 |         
 46 |         # Method 2: Extract JSON using regex for common patterns
 47 |         lambda t: json.loads(re.search(r'(?s)(?:```(?:json)?\s*)?({[\s\S]*?}|\[[\s\S]*?\])(?:\s*```)?', t).group(1).strip()),
 48 |         
 49 |         # Method 3: Remove markdown fences and try again
 50 |         lambda t: json.loads(re.sub(r'```(?:json)?\s*|\s*```', '', t).strip()),
 51 |         
 52 |         # Method 4: Fix common JSON syntax errors and try again
 53 |         lambda t: json.loads(_fix_common_json_errors(t)),
 54 |         
 55 |         # Method 5: Use ast.literal_eval as a fallback for Python literals
 56 |         lambda t: _safe_literal_eval(t)
 57 |     ]
 58 |     
 59 |     # Try each method in sequence until one works
 60 |     for i, method in enumerate(extraction_methods):
 61 |         try:
 62 |             result = method(text)
 63 |             logger.debug(f"Successfully parsed JSON using method {i+1}")
 64 |             return result, True, None
 65 |         except Exception as e:
 66 |             # Continue to next method on failure
 67 |             if i == len(extraction_methods) - 1:  # Last method
 68 |                 error_message = f"All JSON parsing methods failed. Last error: {str(e)}"
 69 |     
 70 |     return None, False, error_message
 71 | 
 72 | def _fix_common_json_errors(json_str: str) -> str:
 73 |     """
 74 |     Fix common JSON syntax errors found in LLM outputs.
 75 |     
 76 |     Args:
 77 |         json_str: The JSON string to fix
 78 |         
 79 |     Returns:
 80 |         Corrected JSON string
 81 |     """
 82 |     # Remove any text before the first '{' or '['
 83 |     json_str = re.sub(r'^.*?([{\[])', r'\1', json_str, flags=re.DOTALL)
 84 |     
 85 |     # Remove any text after the last '}' or ']'
 86 |     json_str = re.sub(r'([}\]])[^}\]]*$', r'\1', json_str, flags=re.DOTALL)
 87 |     
 88 |     # Fix missing quotes around keys
 89 |     json_str = re.sub(r'(\s*)(\w+)(\s*):', r'\1"\2"\3:', json_str)
 90 |     
 91 |     # Fix trailing commas in arrays
 92 |     json_str = re.sub(r',(\s*[\]}])', r'\1', json_str)
 93 |     
 94 |     # Fix missing commas between elements
 95 |     json_str = re.sub(r'(["}\]])(\s*)(["{\[])', r'\1,\2\3', json_str)
 96 |     
 97 |     return json_str
 98 | 
 99 | def _safe_literal_eval(text: str) -> Any:
100 |     """
101 |     Safely evaluate a string containing a Python literal.
102 |     
103 |     Args:
104 |         text: The text containing a Python literal
105 |         
106 |     Returns:
107 |         The evaluated Python object
108 |         
109 |     Raises:
110 |         SyntaxError: If the text cannot be parsed as a Python literal
111 |     """
112 |     import ast
113 |     
114 |     # Remove any text before the first '{' or '['
115 |     text = re.sub(r'^.*?([{\[])', r'\1', text, flags=re.DOTALL)
116 |     
117 |     # Remove any text after the last '}' or ']'
118 |     text = re.sub(r'([}\]])[^}\]]*$', r'\1', text, flags=re.DOTALL)
119 |     
120 |     return ast.literal_eval(text)
121 | 
122 | @with_tool_metrics
123 | @with_error_handling
124 | async def extract_json(
125 |     text: str,
126 |     json_schema: Optional[Dict] = None,
127 |     provider: str = Provider.OPENAI.value,
128 |     model: Optional[str] = None,
129 |     validate_output: bool = True
130 |     # Removed ctx=None
131 | ) -> Dict[str, Any]:
132 |     """Extracts structured data formatted as JSON from within a larger text body.
133 | 
134 |     Use this tool when the input text contains a JSON object or list (potentially embedded
135 |     within other text or markdown code fences) that needs to be isolated and parsed.
136 |     Optionally validates the extracted JSON against a provided schema.
137 | 
138 |     Args:
139 |         text: The input text potentially containing an embedded JSON object or list.
140 |         json_schema: (Optional) A JSON schema (as a Python dictionary) to validate the extracted
141 |                      JSON against. If validation fails, the error is included in the result.
142 |         provider: The name of the LLM provider (e.g., "openai"). Defaults to "openai".
143 |                   Providers supporting JSON mode (like OpenAI) are recommended for reliability.
144 |         model: The specific model ID (e.g., "openai/gpt-4.1-mini"). Uses provider default if None.
145 |         validate_output: (Optional) If True (default) and `json_schema` is provided, validates
146 |                          the extracted data against the schema.
147 | 
148 |     Returns:
149 |         A dictionary containing the extraction results:
150 |         {
151 |             "data": { ... } | [ ... ] | null, # The extracted JSON data (or null if extraction/parsing failed).
152 |             "validation_result": {             # Included if json_schema provided & validate_output=True
153 |                 "valid": true | false,
154 |                 "errors": [ "Validation error message..." ] # List of errors if not valid
155 |             } | null,
156 |             "raw_text": "...",                # Included if JSON parsing failed
157 |             "model": "provider/model-used",
158 |             "provider": "provider-name",
159 |             "tokens": { ... },
160 |             "cost": 0.000045,
161 |             "processing_time": 1.8,
162 |             "success": true | false,
163 |             "error": "Error message if success is false"
164 |         }
165 | 
166 |     Raises:
167 |         ProviderError: If the provider/LLM fails.
168 |         ToolError: For other internal errors.
169 |     """
170 |     start_time = time.time()
171 |     
172 |     if not text or not isinstance(text, str):
173 |         raise ToolInputError("Input 'text' must be a non-empty string.", param_name="text", provided_value=text)
174 |         
175 |     try:
176 |         # Check if there's already valid JSON in the input text
177 |         extracted_data, success, error_message = _extract_and_parse_json(text)
178 |         
179 |         # If we found valid JSON in the input, return it right away
180 |         if success:
181 |             logger.info("Found and extracted valid JSON directly from input text")
182 |             return {
183 |                 "data": extracted_data,
184 |                 "validation_result": None,  # No validation done for direct extraction
185 |                 "raw_text": None,
186 |                 "model": "direct-extraction", # No model used
187 |                 "provider": "direct-extraction", # No provider used
188 |                 "tokens": {"input": len(text), "output": 0, "total": len(text)},
189 |                 "cost": 0.0,  # No cost for direct extraction
190 |                 "processing_time": time.time() - start_time,
191 |                 "success": True,
192 |                 "error": None
193 |             }
194 |         
195 |         # Prepare model ID based on provider format
196 |         effective_model = model
197 |         # Ensure model ID includes provider prefix if not already present
198 |         if model and provider not in model:
199 |              effective_model = f"{provider}/{model}"
200 |         
201 |         schema_description = f"The extracted JSON should conform to this JSON schema:\n```json\n{json.dumps(json_schema, indent=2)}\n```\n" if json_schema else ""
202 |         # Improved prompt asking the LLM to identify and extract the JSON
203 |         prompt = f"Identify and extract the primary JSON object or list embedded within the following text. " \
204 |                  f"{schema_description}Focus on extracting only the JSON data structure itself, removing any surrounding text or markdown fences. " \
205 |                  f"Text:\n```\n{text}\n```\nExtracted JSON:"
206 |         
207 |         # Use JSON mode if supported by the provider (e.g., OpenAI)
208 |         additional_params = {}
209 |         if provider == Provider.OPENAI.value:
210 |             additional_params["response_format"] = {"type": "json_object"}
211 |         
212 |         # Use the standardized completion tool instead of direct provider call
213 |         completion_result = await generate_completion(
214 |             prompt=prompt, 
215 |             model=effective_model,
216 |             provider=provider,
217 |             temperature=0.0, # Low temp for precise extraction
218 |             max_tokens=4000, # Allow for large JSON objects
219 |             additional_params=additional_params
220 |         )
221 |         
222 |         # Extract data from the standardized result format
223 |         processing_time = completion_result.get("processing_time", time.time() - start_time)
224 |         actual_model_used = completion_result.get("model", effective_model) 
225 |         raw_text_output = completion_result.get("text", "").strip()
226 |         token_info = completion_result.get("tokens", {})
227 |         cost = completion_result.get("cost", 0.0)
228 |         tool_success = completion_result.get("success", False)
229 |         
230 |         # If the tool call failed, propagate the error
231 |         if not tool_success:
232 |             error_message = completion_result.get("error", "Unknown error during completion")
233 |             raise ProviderError(
234 |                 f"JSON extraction failed: {error_message}", 
235 |                 provider=provider, 
236 |                 model=actual_model_used
237 |             )
238 |         
239 |         # Use our robust parsing function
240 |         extracted_data, success, error_message = _extract_and_parse_json(raw_text_output)
241 |         validation_result = None
242 |         
243 |         # Validate against schema if requested and extraction succeeded
244 |         if success and json_schema and validate_output:
245 |             validation_result = {"valid": True, "errors": []}
246 |             try: 
247 |                 jsonschema.validate(instance=extracted_data, schema=json_schema)
248 |                 logger.debug("JSON validated successfully against schema.")
249 |             except jsonschema.exceptions.ValidationError as e:
250 |                 validation_result = {"valid": False, "errors": [str(e)]}
251 |                 logger.warning(f"JSON validation failed: {e}")
252 |                 # Keep success=True as extraction worked, but validation failed
253 | 
254 |         logger.info(f"JSON extraction attempt complete. Success: {success}, Validated: {validation_result.get('valid') if validation_result else 'N/A'}. Time: {processing_time:.2f}s")
255 |         return {
256 |             "data": extracted_data,
257 |             "validation_result": validation_result,
258 |             "raw_text": raw_text_output if not success else None, # Include raw only on parse failure
259 |             "model": actual_model_used,
260 |             "provider": provider,
261 |             "tokens": token_info,
262 |             "cost": cost,
263 |             "processing_time": processing_time,
264 |             "success": success,
265 |             "error": error_message
266 |         }
267 |             
268 |     except Exception as e:
269 |         error_model = model or f"{provider}/default"
270 |         if isinstance(e, ProviderError):
271 |             raise # Re-raise
272 |         else:
273 |             raise ProviderError(f"JSON extraction failed: {str(e)}", provider=provider, model=error_model, cause=e) from e
274 | 
275 | @with_tool_metrics
276 | @with_error_handling
277 | async def extract_table(
278 |     text: str,
279 |     headers: Optional[List[str]] = None,
280 |     return_formats: Optional[List[str]] = None, # Renamed from 'formats'
281 |     extract_metadata: bool = False,
282 |     provider: str = Provider.OPENAI.value,
283 |     model: Optional[str] = None
284 |     # Removed ctx=None
285 | ) -> Dict[str, Any]:
286 |     """Extracts tabular data found within text content.
287 | 
288 |     Identifies table structures in the input text and extracts the data, attempting
289 |     to return it in specified formats (e.g., JSON list of objects, Markdown table).
290 | 
291 |     Args:
292 |         text: The input text potentially containing one or more tables.
293 |         headers: (Optional) A list of expected header strings. Providing headers helps the LLM
294 |                  identify the correct table and map columns accurately.
295 |         return_formats: (Optional) List of desired output formats. Supported: "json", "markdown".
296 |                         Defaults to ["json"]. The result dictionary will contain keys matching these formats.
297 |         extract_metadata: (Optional) If True, attempts to extract contextual metadata about the table,
298 |                           such as a title, surrounding notes, or source information. Default False.
299 |         provider: The name of the LLM provider (e.g., "openai"). Defaults to "openai".
300 |         model: The specific model ID (e.g., "openai/gpt-4.1-mini"). Uses provider default if None.
301 | 
302 |     Returns:
303 |         A dictionary containing the extracted table data and metadata:
304 |         {
305 |             "data": {                           # Dictionary containing requested formats
306 |                 "json": [ { "Header1": "Row1Val1", "Header2": "Row1Val2" }, ... ],
307 |                 "markdown": "| Header1 | Header2 |\n|---|---|\n| Row1Val1 | Row1Val2 |\n...",
308 |                 "metadata": { "title": "Table Title...", "notes": "..." } # If extract_metadata=True
309 |             } | null, # Null if extraction fails
310 |             "model": "provider/model-used",
311 |             "provider": "provider-name",
312 |             "tokens": { ... },
313 |             "cost": 0.000180,
314 |             "processing_time": 3.5,
315 |             "success": true | false,
316 |             "error": "Error message if success is false"
317 |         }
318 | 
319 |     Raises:
320 |         ProviderError: If the provider/LLM fails.
321 |         ToolError: For other internal errors, including failure to parse the LLM response.
322 |     """
323 |     return_formats = return_formats or ["json"]
324 |     start_time = time.time()
325 |     
326 |     if not text or not isinstance(text, str):
327 |         raise ToolInputError("Input 'text' must be a non-empty string.", param_name="text", provided_value=text)
328 |         
329 |     try:
330 |         # Prepare model ID based on provider format
331 |         effective_model = model
332 |         # Ensure model ID includes provider prefix if not already present
333 |         if model and provider not in model:
334 |             effective_model = f"{provider}/{model}" 
335 |         
336 |         headers_guidance = f"The table likely has headers similar to: {', '.join(headers)}.\n" if headers else "Attempt to identify table headers automatically.\n"
337 |         metadata_guidance = "Also extract any surrounding metadata like a table title, caption, or source notes.\n" if extract_metadata else ""
338 |         formats_guidance = f"Return the extracted table data in these formats: {', '.join(return_formats)}."
339 |         
340 |         # Improved prompt asking for specific formats in a JSON structure
341 |         prompt = f"Identify and extract the primary data table from the following text. " \
342 |                  f"{headers_guidance}{metadata_guidance}{formats_guidance}" \
343 |                  f"Format the output as a single JSON object containing keys for each requested format ({', '.join(return_formats)}) " \
344 |                  f"and optionally a 'metadata' key if requested. Ensure the values are the table represented in that format." \
345 |                  f"\n\nText:\n```\n{text}\n```\nResult JSON:"
346 |         
347 |         # Use JSON mode if supported by the provider
348 |         additional_params = {}
349 |         if provider == Provider.OPENAI.value:
350 |             additional_params["response_format"] = {"type": "json_object"}
351 |         
352 |         # Use the standardized completion tool instead of direct provider call
353 |         completion_result = await generate_completion(
354 |             prompt=prompt, 
355 |             model=effective_model,
356 |             provider=provider,
357 |             temperature=0.0, # Low temp for precise extraction
358 |             max_tokens=4000, 
359 |             additional_params=additional_params
360 |         )
361 |         
362 |         # Extract data from the standardized result format
363 |         processing_time = completion_result.get("processing_time", time.time() - start_time)
364 |         actual_model_used = completion_result.get("model", effective_model)
365 |         raw_text_output = completion_result.get("text", "").strip()
366 |         token_info = completion_result.get("tokens", {})
367 |         cost = completion_result.get("cost", 0.0)
368 |         tool_success = completion_result.get("success", False)
369 |         
370 |         # If the tool call failed, propagate the error
371 |         if not tool_success:
372 |             error_message = completion_result.get("error", "Unknown error during completion")
373 |             raise ProviderError(
374 |                 f"Table extraction failed: {error_message}", 
375 |                 provider=provider, 
376 |                 model=actual_model_used
377 |             )
378 |         
379 |         # Use our robust parsing function
380 |         extraction_result, success, error_message = _extract_and_parse_json(raw_text_output)
381 |             
382 |         # Basic validation if extraction succeeded
383 |         if success and (not isinstance(extraction_result, dict) or not any(fmt in extraction_result for fmt in return_formats)):
384 |             logger.warning(f"Table extraction JSON result missing expected structure or formats ({return_formats}). Result: {extraction_result}")
385 |             # Allow partial success if it's a dict, but log warning
386 |             if isinstance(extraction_result, dict):
387 |                 error_message = f"Warning: LLM output did not contain all requested formats ({return_formats})."
388 |             else:
389 |                 error_message = "Expected a JSON object with format keys"
390 |                 success = False
391 |                 extraction_result = None
392 |         
393 |         logger.info(f"Table extraction attempt complete. Success: {success}. Time: {processing_time:.2f}s")
394 |         return {
395 |             "data": extraction_result, 
396 |             "raw_text": raw_text_output if not success else None, # Include raw only on parse failure
397 |             "model": actual_model_used, 
398 |             "provider": provider,
399 |             "tokens": token_info,
400 |             "cost": cost, 
401 |             "processing_time": processing_time, 
402 |             "success": success,
403 |             "error": error_message
404 |         }
405 |             
406 |     except Exception as e:
407 |         error_model = model or f"{provider}/default"
408 |         if isinstance(e, ProviderError):
409 |             raise
410 |         else:
411 |             raise ProviderError(f"Table extraction failed: {str(e)}", provider=provider, model=error_model, cause=e) from e
412 | 
413 | @with_tool_metrics
414 | @with_error_handling
415 | async def extract_key_value_pairs(
416 |     text: str,
417 |     keys: Optional[List[str]] = None,
418 |     provider: str = Provider.OPENAI.value,
419 |     model: Optional[str] = None
420 |     # Removed ctx=None
421 | ) -> Dict[str, Any]:
422 |     """Extracts key-value pairs from text, optionally targeting specific keys.
423 | 
424 |     Use this tool to pull out data points that appear in a "Key: Value" or similar format
425 |     within unstructured text (e.g., fields from a form, details from a description).
426 | 
427 |     Args:
428 |         text: The input text containing key-value pairs.
429 |         keys: (Optional) A list of specific key names to look for and extract. If omitted,
430 |               the tool attempts to extract all identifiable key-value pairs.
431 |         provider: The name of the LLM provider (e.g., "openai"). Defaults to "openai".
432 |         model: The specific model ID (e.g., "openai/gpt-4.1-mini"). Uses provider default if None.
433 | 
434 |     Returns:
435 |         A dictionary containing the extracted key-value data and metadata:
436 |         {
437 |             "data": {             # Dictionary of extracted key-value pairs
438 |                 "Name": "Alice",
439 |                 "Role": "Engineer",
440 |                 "Location": "Remote", ...
441 |             } | null,           # Null if extraction fails
442 |             "model": "provider/model-used",
443 |             "provider": "provider-name",
444 |             "tokens": { ... },
445 |             "cost": 0.000070,
446 |             "processing_time": 2.1,
447 |             "success": true | false,
448 |             "error": "Error message if success is false"
449 |         }
450 | 
451 |     Raises:
452 |         ProviderError: If the provider/LLM fails.
453 |         ToolError: For other internal errors, including failure to parse the LLM JSON response.
454 |     """
455 |     start_time = time.time()
456 |     
457 |     if not text or not isinstance(text, str):
458 |         raise ToolInputError("Input 'text' must be a non-empty string.", param_name="text", provided_value=text)
459 |         
460 |     try:
461 |         # Prepare model ID based on provider format
462 |         effective_model = model
463 |         # Ensure model ID includes provider prefix if not already present
464 |         if model and provider not in model:
465 |             effective_model = f"{provider}/{model}"
466 |         
467 |         keys_guidance = f"Extract the values for these specific keys: {', '.join(keys)}.\n" if keys else "Identify and extract all distinct key-value pairs present in the text.\n"
468 |         prompt = f"Analyze the following text and extract key-value pairs. {keys_guidance}" \
469 |                  f"Format the output as a single, flat JSON object mapping the extracted keys (as strings) to their corresponding values (as strings or appropriate simple types). " \
470 |                  f"Infer the value associated with each key from the text context. Ignore keys not present in the text.\n\n" \
471 |                  f"Text:\n```\n{text}\n```\nResult JSON object:"
472 |         
473 |         # Use JSON mode if supported by the provider
474 |         additional_params = {}
475 |         if provider == Provider.OPENAI.value:
476 |             additional_params["response_format"] = {"type": "json_object"}
477 |         
478 |         # Use the standardized completion tool instead of direct provider call
479 |         completion_result = await generate_completion(
480 |             prompt=prompt, 
481 |             model=effective_model,
482 |             provider=provider,
483 |             temperature=0.0, # Low temp for precise extraction
484 |             max_tokens=2000,
485 |             additional_params=additional_params
486 |         )
487 |         
488 |         # Extract data from the standardized result format
489 |         processing_time = completion_result.get("processing_time", time.time() - start_time)
490 |         actual_model_used = completion_result.get("model", effective_model)
491 |         raw_text_output = completion_result.get("text", "").strip()
492 |         token_info = completion_result.get("tokens", {})
493 |         cost = completion_result.get("cost", 0.0)
494 |         tool_success = completion_result.get("success", False)
495 |         
496 |         # If the tool call failed, propagate the error
497 |         if not tool_success:
498 |             error_message = completion_result.get("error", "Unknown error during completion")
499 |             raise ProviderError(
500 |                 f"Key-value pair extraction failed: {error_message}", 
501 |                 provider=provider, 
502 |                 model=actual_model_used
503 |             )
504 | 
505 |         # Use our robust parsing function
506 |         kv_data, success, error_message = _extract_and_parse_json(raw_text_output)
507 |         
508 |         # Validate it's a dictionary if extraction succeeded
509 |         if success and not isinstance(kv_data, dict):
510 |             error_message = "Extracted data is not a valid key-value dictionary"
511 |             logger.warning(error_message)
512 |             success = False
513 |             kv_data = None
514 |                  
515 |         logger.info(f"Key-Value pair extraction attempt complete. Success: {success}. Time: {processing_time:.2f}s")
516 |         return {
517 |             "data": kv_data, 
518 |             "raw_text": raw_text_output if not success else None,
519 |             "model": actual_model_used, 
520 |             "provider": provider,
521 |             "tokens": token_info,
522 |             "cost": cost, 
523 |             "processing_time": processing_time, 
524 |             "success": success,
525 |             "error": error_message
526 |         }
527 |             
528 |     except Exception as e:
529 |         error_model = model or f"{provider}/default"
530 |         if isinstance(e, ProviderError):
531 |             raise
532 |         else:
533 |             raise ProviderError(f"Key-value pair extraction failed: {str(e)}", provider=provider, model=error_model, cause=e) from e
534 | 
535 | @with_tool_metrics
536 | @with_error_handling
537 | async def extract_semantic_schema(
538 |     text: str,
539 |     # Schema should ideally be passed as a structured dict, not within the prompt
540 |     semantic_schema: Dict[str, Any], # Changed from embedding prompt
541 |     provider: str = Provider.OPENAI.value,
542 |     model: Optional[str] = None
543 |     # Removed ctx=None
544 | ) -> Dict[str, Any]:
545 |     """Extracts information from text matching a specified semantic structure (schema).
546 | 
547 |     Use this tool when you need to populate a predefined JSON structure with information
548 |     found or inferred from the input text. Unlike `extract_json`, the target JSON structure
549 |     is *defined by you* (via `semantic_schema`), not expected to be present in the input text.
550 | 
551 |     Args:
552 |         text: The input text containing information to extract.
553 |         semantic_schema: A Python dictionary representing the desired JSON schema for the output.
554 |                          Use JSON Schema conventions (e.g., {"type": "object", "properties": { ... }}).
555 |                          This guides the LLM on what fields to extract and their expected types.
556 |         provider: The name of the LLM provider (e.g., "openai"). Defaults to "openai".
557 |                   Providers supporting JSON mode or strong instruction following are recommended.
558 |         model: The specific model ID (e.g., "openai/gpt-4o"). Uses provider default if None.
559 | 
560 |     Returns:
561 |         A dictionary containing the extracted data conforming to the schema and metadata:
562 |         {
563 |             "data": { ... }, # The extracted data, structured according to semantic_schema
564 |             "model": "provider/model-used",
565 |             "provider": "provider-name",
566 |             "tokens": { ... },
567 |             "cost": 0.000210,
568 |             "processing_time": 4.1,
569 |             "success": true | false,
570 |             "error": "Error message if success is false"
571 |         }
572 | 
573 |     Raises:
574 |         ToolInputError: If `semantic_schema` is not a valid dictionary.
575 |         ProviderError: If the provider/LLM fails.
576 |         ToolError: For other internal errors, including failure to parse the LLM JSON response.
577 |     """
578 |     start_time = time.time()
579 |     
580 |     if not text or not isinstance(text, str):
581 |          raise ToolInputError("Input 'text' must be a non-empty string.", param_name="text", provided_value=text)
582 |     if not semantic_schema or not isinstance(semantic_schema, dict):
583 |         raise ToolInputError("Input 'semantic_schema' must be a non-empty dictionary representing a JSON schema.", param_name="semantic_schema", provided_value=semantic_schema)
584 | 
585 |     try:
586 |         # Prepare model ID based on provider format
587 |         effective_model = model
588 |         # Ensure model ID includes provider prefix if not already present
589 |         if model and provider not in model:
590 |             effective_model = f"{provider}/{model}" 
591 |         
592 |         # Create a clear prompt explaining the task and providing the schema
593 |         schema_str = json.dumps(semantic_schema, indent=2)
594 |         prompt = f"Analyze the following text and extract information that conforms to the provided JSON schema. " \
595 |                  f"Populate the fields in the schema based *only* on information present in the text. " \
596 |                  f"If information for a field is not found, omit the field or use a null value as appropriate according to the schema. " \
597 |                  f"Return ONLY the populated JSON object conforming to the schema.\n\n" \
598 |                  f"JSON Schema:\n```json\n{schema_str}\n```\n\n" \
599 |                  f"Text:\n```\n{text}\n```\nPopulated JSON object:"
600 | 
601 |         # Use JSON mode if supported by the provider
602 |         additional_params = {}
603 |         if provider == Provider.OPENAI.value:
604 |             additional_params["response_format"] = {"type": "json_object"}
605 |         
606 |         # Use the standardized completion tool instead of direct provider call
607 |         completion_result = await generate_completion(
608 |             prompt=prompt, 
609 |             model=effective_model,
610 |             provider=provider,
611 |             temperature=0.0, # Low temp for precise extraction
612 |             max_tokens=4000,
613 |             additional_params=additional_params
614 |         )
615 |         
616 |         # Extract data from the standardized result format
617 |         processing_time = completion_result.get("processing_time", time.time() - start_time)
618 |         actual_model_used = completion_result.get("model", effective_model)
619 |         raw_text_output = completion_result.get("text", "").strip()
620 |         token_info = completion_result.get("tokens", {})
621 |         cost = completion_result.get("cost", 0.0)
622 |         tool_success = completion_result.get("success", False)
623 |         
624 |         # If the tool call failed, propagate the error
625 |         if not tool_success:
626 |             error_message = completion_result.get("error", "Unknown error during completion")
627 |             raise ProviderError(
628 |                 f"Semantic schema extraction failed: {error_message}", 
629 |                 provider=provider, 
630 |                 model=actual_model_used
631 |             )
632 |         
633 |         # Use our robust parsing function
634 |         extracted_data, success, error_message = _extract_and_parse_json(raw_text_output)
635 |         
636 |         # Validate against the provided schema if extraction succeeded
637 |         if success:
638 |             try:
639 |                 jsonschema.validate(instance=extracted_data, schema=semantic_schema)
640 |                 logger.debug("Successfully parsed and validated semantic schema JSON.")
641 |             except jsonschema.exceptions.ValidationError as e:
642 |                  error_message = f"Warning: LLM output did not strictly conform to schema: {str(e)}"
643 |                  logger.warning(f"{error_message}. Data: {extracted_data}")
644 |                  # Still consider extraction successful if parsable
645 |                  
646 |         logger.info(f"Semantic schema extraction attempt complete. Success: {success}. Time: {processing_time:.2f}s")
647 |         return {
648 |             "data": extracted_data,
649 |             "raw_text": raw_text_output if not success else None,
650 |             "model": actual_model_used,
651 |             "provider": provider,
652 |             "tokens": token_info,
653 |             "cost": cost,
654 |             "processing_time": processing_time,
655 |             "success": success,
656 |             "error": error_message
657 |         }
658 | 
659 |     except Exception as e:
660 |         error_model = model or f"{provider}/default"
661 |         if isinstance(e, ProviderError):
662 |             raise
663 |         else:
664 |              raise ProviderError(f"Semantic schema extraction failed: {str(e)}", provider=provider, model=error_model, cause=e) from e
665 | 
666 | # Note: This is a utility function, not typically exposed as a direct tool,
667 | # but kept here as it relates to extraction from LLM *responses*.
668 | # No standard decorators applied.
669 | async def extract_code_from_response(
670 |     response_text: str, 
671 |     model: str = "openai/gpt-4.1-mini", 
672 |     timeout: int = 15,
673 |     tracker: Optional[Any] = None # Add optional tracker (use Any for now to avoid circular import)
674 | ) -> str:
675 |     """Extracts code blocks from LLM response text, using an LLM for complex cases.
676 | 
677 |     Primarily designed to clean up responses from code generation tasks.
678 |     It first tries simple regex matching for markdown code fences. If that fails,
679 |     it uses a specified LLM to identify and extract the code.
680 | 
681 |     Args:
682 |         response_text: The text potentially containing code blocks.
683 |         model: The specific model ID to use for LLM-based extraction if regex fails.
684 |                Defaults to "openai/gpt-4.1-mini".
685 |         timeout: Timeout in seconds for the LLM extraction call. Default 15.
686 |         tracker: (Optional) An instance of a CostTracker for tracking cost and metrics.
687 | 
688 |     Returns:
689 |         The extracted code block as a string, or the original text if no code is found or extraction fails.
690 |     """
691 |     if not response_text or not isinstance(response_text, str):
692 |         return "" # Return empty if no input
693 |         
694 |     # Try simple regex extraction first (common markdown format)
695 |     code_blocks = re.findall(r"```(?:[a-zA-Z0-9\-_]*\n)?(.*?)\n?```", response_text, re.DOTALL)
696 |     
697 |     if code_blocks:
698 |         # Return the content of the first code block found
699 |         logger.debug("Extracted code using regex.")
700 |         return code_blocks[0].strip()
701 |         
702 |     # If regex fails, use LLM for more robust extraction
703 |     logger.debug("Regex failed, attempting LLM-based code extraction.")
704 |     try:
705 |         # Parse provider from model string if it contains a slash
706 |         provider_id = model.split('/')[0] if '/' in model else Provider.OPENAI.value
707 |         effective_model = model  # Use the full model string as provided
708 |         
709 |         prompt = f"Extract only the main code block from the following text. Return just the code itself, without any explanations or markdown fences.\n\nText:\n```\n{response_text}\n```\n\nCode:"
710 |         
711 |         # Set a timeout using asyncio.wait_for
712 |         completion_task = generate_completion(
713 |             prompt=prompt,
714 |             model=effective_model,
715 |             provider=provider_id,
716 |             temperature=0.0,
717 |             max_tokens=len(response_text) # Allow enough tokens, approx original length
718 |         )
719 |         
720 |         # Use asyncio.wait_for to implement timeout
721 |         completion_result = await asyncio.wait_for(completion_task, timeout=timeout)
722 |         
723 |         # Check if completion succeeded
724 |         if not completion_result.get("success", False):
725 |             logger.warning(f"LLM code extraction failed: {completion_result.get('error', 'Unknown error')}. Returning original text.")
726 |             return response_text
727 |         
728 |         # Track cost if tracker is provided
729 |         if tracker:
730 |             try:
731 |                 # Use getattr to safely access attributes, provide defaults
732 |                 # Create a temporary object for tracking as CostTracker expects attributes
733 |                 class Trackable: 
734 |                     pass
735 |                 trackable = Trackable()
736 |                 trackable.cost = completion_result.get('cost', 0.0)
737 |                 trackable.input_tokens = completion_result.get('tokens', {}).get('input', 0)
738 |                 trackable.output_tokens = completion_result.get('tokens', {}).get('output', 0)
739 |                 trackable.provider = provider_id
740 |                 trackable.model = completion_result.get('model', effective_model)
741 |                 trackable.processing_time = completion_result.get('processing_time', 0.0)
742 |                 tracker.add_call(trackable)
743 |             except Exception as track_err:
744 |                  logger.warning(f"Could not track cost for LLM code extraction: {track_err}", exc_info=False)
745 | 
746 |         extracted_code = completion_result.get("text", "").strip()
747 |         logger.info(f"Extracted code using LLM ({effective_model}).")
748 |         return extracted_code
749 |         
750 |     except asyncio.TimeoutError:
751 |         logger.warning(f"LLM code extraction timed out after {timeout}s. Returning original text.")
752 |         return response_text # Fallback to original on timeout
753 |     except Exception as e:
754 |         logger.error(f"LLM code extraction failed: {str(e)}. Returning original text.", exc_info=False)
755 |         return response_text # Fallback to original on error
756 | 
757 | class ExtractionTools(BaseTool):
758 |     """Tools for extracting structured data from unstructured text."""
759 |     
760 |     tool_name = "extraction"
761 |     description = "Tools for extracting structured data from unstructured text, including JSON, tables, and key-value pairs."
762 |     
763 |     def __init__(self, gateway):
764 |         """Initialize extraction tools.
765 |         
766 |         Args:
767 |             gateway: Gateway or MCP server instance
768 |         """
769 |         super().__init__(gateway)
770 |         self._register_tools()
771 |         
772 |     def _register_tools(self):
773 |         """Register extraction tools with MCP server."""
774 |         # Register the extraction functions as tools
775 |         self.mcp.tool(name="extract_json")(extract_json)
776 |         self.mcp.tool(name="extract_table")(extract_table) 
777 |         self.mcp.tool(name="extract_key_value_pairs")(extract_key_value_pairs)
778 |         self.mcp.tool(name="extract_semantic_schema")(extract_semantic_schema)
779 |         self.logger.info("Registered extraction tools", emoji_key="success")
```

--------------------------------------------------------------------------------
/ultimate_mcp_server/core/providers/base.py:
--------------------------------------------------------------------------------

```python
  1 | """Base LLM provider interface."""
  2 | import abc
  3 | import time
  4 | from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple
  5 | 
  6 | from ultimate_mcp_server.config import get_config
  7 | from ultimate_mcp_server.constants import COST_PER_MILLION_TOKENS, Provider
  8 | from ultimate_mcp_server.utils import get_logger
  9 | 
 10 | logger = get_logger(__name__)
 11 | 
 12 | 
 13 | class ModelResponse:
 14 |     """
 15 |     Standard response format for all LLM provider completions in the Ultimate MCP Server.
 16 |     
 17 |     This class provides a unified representation of responses from different LLM providers,
 18 |     normalizing their various formats into a consistent structure. It handles common
 19 |     operations like:
 20 |     
 21 |     - Storing and accessing the generated text content
 22 |     - Tracking token usage statistics (input, output, total)
 23 |     - Computing cost estimates based on provider pricing
 24 |     - Preserving metadata and raw responses for debugging
 25 |     - Formatting response data for serialization
 26 |     
 27 |     The ModelResponse serves as a bridge between provider-specific response formats
 28 |     and the standardized interface presented by the Ultimate MCP Server. This allows
 29 |     client code to work with responses in a consistent way regardless of which
 30 |     provider generated them.
 31 |     
 32 |     All provider implementations in the system should return responses wrapped in
 33 |     this class to ensure consistent behavior across the application.
 34 |     """
 35 |     
 36 |     def __init__(
 37 |         self,
 38 |         text: str,
 39 |         model: str,
 40 |         provider: str,
 41 |         input_tokens: int = 0,
 42 |         output_tokens: int = 0,
 43 |         total_tokens: int = 0,
 44 |         processing_time: float = 0.0,
 45 |         raw_response: Any = None,
 46 |         metadata: Optional[Dict[str, Any]] = None,
 47 |     ):
 48 |         """
 49 |         Initialize a standardized model response object.
 50 |         
 51 |         This constructor creates a unified response object that normalizes the various
 52 |         response formats from different LLM providers into a consistent structure.
 53 |         It automatically calculates derived values like total token count and cost
 54 |         estimates based on the provided parameters.
 55 |         
 56 |         Args:
 57 |             text: The generated text content from the LLM. This is the primary output
 58 |                  that would be presented to users.
 59 |             model: The specific model name that generated this response (e.g., "gpt-4o",
 60 |                   "claude-3-5-haiku-20241022").
 61 |             provider: The provider name that served this response (e.g., "openai",
 62 |                      "anthropic").
 63 |             input_tokens: Number of input/prompt tokens consumed in this request.
 64 |                          Used for usage tracking and cost calculation.
 65 |             output_tokens: Number of output/completion tokens generated in this response.
 66 |                           Used for usage tracking and cost calculation.
 67 |             total_tokens: Total token count for the request. If not explicitly provided,
 68 |                          calculated as input_tokens + output_tokens.
 69 |             processing_time: Time taken to generate the response in seconds, measured
 70 |                             from request initiation to response completion.
 71 |             raw_response: The original, unmodified response object from the provider's API.
 72 |                          Useful for debugging and accessing provider-specific data.
 73 |             metadata: Additional response metadata as a dictionary. Can contain provider-specific
 74 |                      information like finish_reason, logprobs, etc.
 75 |         """
 76 |         self.text = text
 77 |         self.model = model
 78 |         self.provider = provider
 79 |         self.input_tokens = input_tokens
 80 |         self.output_tokens = output_tokens
 81 |         self.total_tokens = total_tokens or (input_tokens + output_tokens)
 82 |         self.processing_time = processing_time
 83 |         self.raw_response = raw_response
 84 |         self.metadata = metadata or {}
 85 |         
 86 |         # Calculate cost based on token usage
 87 |         self.cost = self._calculate_cost()
 88 |         
 89 |     def _calculate_cost(self) -> float:
 90 |         """
 91 |         Calculate the estimated cost of the request based on token usage and current pricing.
 92 |         
 93 |         This internal method computes a cost estimate by:
 94 |         1. Looking up the per-million-token costs for the specific model used
 95 |         2. Applying different rates for input (prompt) and output (completion) tokens
 96 |         3. Computing the final cost based on actual token counts
 97 |         
 98 |         If pricing data isn't available for the specific model, the method falls back
 99 |         to reasonable default estimations and logs a warning.
100 |         
101 |         Returns:
102 |             Estimated cost in USD as a floating-point value. Returns 0.0 if token counts
103 |             are not available or if the model name is not recognized.
104 |             
105 |         Note:
106 |             This is an estimation and may not precisely match actual billing from providers,
107 |             especially as pricing changes over time or for custom deployment configurations.
108 |         """
109 |         if not self.model or not self.input_tokens or not self.output_tokens:
110 |             return 0.0
111 |             
112 |         # Extract model name without provider prefix (e.g., strip "openai/" from "openai/gpt-4o")
113 |         model_name = self.model
114 |         if "/" in model_name:
115 |             model_name = model_name.split("/", 1)[1]
116 |             
117 |         # Get cost per token for this model
118 |         model_costs = COST_PER_MILLION_TOKENS.get(model_name, None)
119 |         if not model_costs:
120 |             # If model not found, use a default estimation
121 |             model_costs = {"input": 0.5, "output": 1.5}
122 |             logger.warning(
123 |                 f"Cost data not found for model {self.model}. Using estimates.", 
124 |                 emoji_key="cost"
125 |             )
126 |             
127 |         # Calculate cost
128 |         input_cost = (self.input_tokens / 1_000_000) * model_costs["input"]
129 |         output_cost = (self.output_tokens / 1_000_000) * model_costs["output"]
130 |         
131 |         return input_cost + output_cost
132 |         
133 |     def to_dict(self) -> Dict[str, Any]:
134 |         """
135 |         Convert the response object to a dictionary suitable for serialization.
136 |         
137 |         This method creates a structured dictionary representation of the response
138 |         that can be easily serialized to JSON or other formats. The dictionary
139 |         preserves all important fields while organizing them into a clean,
140 |         hierarchical structure.
141 |         
142 |         The token usage statistics are grouped under a 'usage' key, making it
143 |         easier to access and analyze metrics separately from the content.
144 |         
145 |         Returns:
146 |             A dictionary containing all relevant response data with the following structure:
147 |             {
148 |                 "text": str,              # The generated text content
149 |                 "model": str,             # Model name used
150 |                 "provider": str,          # Provider name
151 |                 "usage": {                # Token usage statistics
152 |                     "input_tokens": int,
153 |                     "output_tokens": int,
154 |                     "total_tokens": int
155 |                 },
156 |                 "processing_time": float, # Time taken in seconds
157 |                 "cost": float,            # Estimated cost in USD
158 |                 "metadata": dict          # Additional response metadata
159 |             }
160 |             
161 |         Example:
162 |             ```python
163 |             response = await provider.generate_completion(prompt="Hello")
164 |             response_dict = response.to_dict()
165 |             json_response = json.dumps(response_dict)
166 |             ```
167 |         """
168 |         return {
169 |             "text": self.text,
170 |             "model": self.model,
171 |             "provider": self.provider,
172 |             "usage": {
173 |                 "input_tokens": self.input_tokens,
174 |                 "output_tokens": self.output_tokens,
175 |                 "total_tokens": self.total_tokens,
176 |             },
177 |             "processing_time": self.processing_time,
178 |             "cost": self.cost,
179 |             "metadata": self.metadata,
180 |         }
181 | 
182 | 
183 | class BaseProvider(abc.ABC):
184 |     """
185 |     Abstract base class that defines the interface for all LLM providers in Ultimate MCP Server.
186 |     
187 |     This class establishes the common API contract that all provider implementations must follow,
188 |     ensuring consistent behavior regardless of the underlying LLM service (OpenAI, Anthropic, etc.).
189 |     It standardizes key operations like:
190 |     
191 |     - Provider initialization and API key management
192 |     - Text completion generation (both synchronous and streaming)
193 |     - Model listing and default model selection
194 |     - API key validation
195 |     - Request timing and performance tracking
196 |     
197 |     By implementing this interface, each provider ensures compatibility with the broader
198 |     Ultimate MCP Server framework. This abstraction layer allows the system to work with multiple
199 |     LLM providers interchangeably, while hiding the provider-specific implementation details
200 |     from the rest of the application.
201 |     
202 |     Provider implementations should inherit from this class and override all abstract methods.
203 |     They may also extend the interface with provider-specific functionality as needed,
204 |     though core components of the Ultimate MCP Server should rely only on the methods defined
205 |     in this base class to ensure provider-agnostic operation.
206 |     
207 |     Usage example:
208 |         ```python
209 |         class OpenAIProvider(BaseProvider):
210 |             provider_name = "openai"
211 |             
212 |             async def initialize(self) -> bool:
213 |                 # OpenAI-specific initialization...
214 |                 
215 |             async def generate_completion(self, prompt: str, **kwargs) -> ModelResponse:
216 |                 # OpenAI-specific completion implementation...
217 |                 
218 |             # Other required method implementations...
219 |         ```
220 |     """
221 |     
222 |     provider_name: str = "base"
223 |     
224 |     def __init__(self, api_key: Optional[str] = None, **kwargs):
225 |         """Initialize the provider.
226 |         
227 |         Args:
228 |             api_key: API key for the provider
229 |             **kwargs: Additional provider-specific options
230 |         """
231 |         # Get API key from environment if not provided
232 |         if api_key is None:
233 |             api_key = None  # No longer try to get from env, will be provided by config system
234 |             
235 |         self.api_key = api_key
236 |         self.options = kwargs
237 |         self.client = None
238 |         self.logger = get_logger(f"provider.{self.provider_name}")
239 |         
240 |     @abc.abstractmethod
241 |     async def initialize(self) -> bool:
242 |         """
243 |         Initialize the provider client and verify API connectivity.
244 |         
245 |         This abstract method defines the standard interface for initializing a provider
246 |         connection. All provider implementations must override this method with their
247 |         provider-specific initialization logic while maintaining this signature.
248 |         
249 |         The initialization process typically includes:
250 |         1. Creating the provider-specific client with the API key and configuration
251 |         2. Setting up any required HTTP headers, authentication, or session management
252 |         3. Verifying API connectivity with a lightweight request when possible
253 |         4. Setting up provider-specific rate limiting or retry mechanisms
254 |         5. Loading any required provider-specific resources or configurations
255 |         
256 |         This method is called:
257 |         - When a provider is first instantiated via the get_provider factory
258 |         - When a provider connection needs to be refreshed or re-established
259 |         - Before any operations that require an active client connection
260 |         
261 |         Returns:
262 |             bool: True if initialization was successful and the provider is ready for use,
263 |                  False if initialization failed for any reason. A False return will
264 |                  typically prevent the provider from being used by the calling code.
265 |                  
266 |         Raises:
267 |             No exceptions should be raised directly. All errors should be handled
268 |             internally, logged appropriately, and reflected in the return value.
269 |             If initialization fails, detailed error information should be logged
270 |             to help diagnose the issue.
271 |             
272 |         Implementation guidelines:
273 |             - Handle API keys securely, avoiding logging them even in error messages
274 |             - Implement retries with exponential backoff for transient errors
275 |             - Set reasonable timeouts on API connection attempts
276 |             - Log detailed diagnostics on initialization failures
277 |             - Cache expensive resources to improve subsequent initialization times
278 |         """
279 |         pass
280 |         
281 |     @abc.abstractmethod
282 |     async def generate_completion(
283 |         self,
284 |         prompt: str,
285 |         model: Optional[str] = None,
286 |         max_tokens: Optional[int] = None,
287 |         temperature: float = 0.7,
288 |         **kwargs
289 |     ) -> ModelResponse:
290 |         """
291 |         Generate a text completion from the provider (non-streaming).
292 |         
293 |         This abstract method defines the standard interface for generating text completions
294 |         from any LLM provider. All provider implementations must override this method with
295 |         their provider-specific implementation while maintaining this signature.
296 |         
297 |         The method handles sending a prompt to the LLM, processing the response, and
298 |         converting it to the standardized ModelResponse format. It is responsible for
299 |         handling provider-specific API calls, error handling, and token counting.
300 |         
301 |         Args:
302 |             prompt: The text prompt to send to the model. This is the primary input that
303 |                    the model will generate a completion for.
304 |             model: The specific model identifier to use (e.g., "gpt-4o", "claude-3-opus").
305 |                   If None, the provider's default model will be used.
306 |             max_tokens: Maximum number of tokens to generate in the response. If None,
307 |                        the provider's default or maximum limit will be used.
308 |             temperature: Controls randomness in the output. Lower values (e.g., 0.1) make
309 |                         the output more deterministic, while higher values (e.g., 1.0)
310 |                         make it more random and creative. Range is typically 0.0-1.0
311 |                         but may vary by provider.
312 |             **kwargs: Additional provider-specific parameters such as:
313 |                      - top_p: Nucleus sampling parameter (alternative to temperature)
314 |                      - stop_sequences: Tokens/strings that will stop generation when encountered
315 |                      - frequency_penalty: Penalty for using frequent tokens
316 |                      - presence_penalty: Penalty for repeated tokens
317 |                      - system_prompt: System instructions for providers that support it
318 |                      - response_format: Structured format request (e.g., JSON)
319 |             
320 |         Returns:
321 |             ModelResponse: A standardized response object containing:
322 |                          - The generated text
323 |                          - Token usage statistics (input, output, total)
324 |                          - Cost estimation
325 |                          - Processing time
326 |                          - Provider and model information
327 |                          - Any provider-specific metadata
328 |             
329 |         Raises:
330 |             ValueError: For invalid parameter combinations or values
331 |             ConnectionError: For network or API connectivity issues
332 |             AuthenticationError: For API key or authentication problems
333 |             RateLimitError: When provider rate limits are exceeded
334 |             ProviderError: For other provider-specific errors
335 |             
336 |         Implementation guidelines:
337 |             - Use the provider's official client library when available
338 |             - Handle error conditions gracefully with meaningful error messages
339 |             - Track token usage precisely for accurate cost estimation
340 |             - Measure processing time with the process_with_timer utility
341 |             - Include relevant provider-specific metadata in the response
342 |         """
343 |         pass
344 |         
345 |     @abc.abstractmethod
346 |     async def generate_completion_stream(
347 |         self,
348 |         prompt: str,
349 |         model: Optional[str] = None,
350 |         max_tokens: Optional[int] = None,
351 |         temperature: float = 0.7,
352 |         **kwargs
353 |     ) -> AsyncGenerator[Tuple[str, Dict[str, Any]], None]:
354 |         """
355 |         Generate a streaming text completion with real-time token delivery.
356 |         
357 |         This abstract method defines the standard interface for streaming text completions
358 |         from any LLM provider. All provider implementations must override this method with
359 |         their provider-specific streaming implementation while maintaining this signature.
360 |         
361 |         Unlike the non-streaming generate_completion method, this method:
362 |         - Returns content incrementally as it's generated
363 |         - Uses an async generator that yields content chunks
364 |         - Provides metadata with each chunk to track generation progress
365 |         - Enables real-time display and processing of partial responses
366 |         
367 |         Args:
368 |             prompt: The text prompt to send to the model. This is the primary input that
369 |                    the model will generate a completion for.
370 |             model: The specific model identifier to use (e.g., "gpt-4o", "claude-3-opus").
371 |                   If None, the provider's default model will be used.
372 |             max_tokens: Maximum number of tokens to generate in the response. If None,
373 |                        the provider's default or maximum limit will be used.
374 |             temperature: Controls randomness in the output. Lower values (e.g., 0.1) make
375 |                         the output more deterministic, while higher values (e.g., 1.0)
376 |                         make it more random and creative. Range is typically 0.0-1.0
377 |                         but may vary by provider.
378 |             **kwargs: Additional provider-specific parameters, identical to those
379 |                      supported by generate_completion.
380 |             
381 |         Yields:
382 |             Tuple[str, Dict[str, Any]]: Each yield returns:
383 |                 - str: The next chunk of generated text
384 |                 - Dict: Metadata about the generation process, including at minimum:
385 |                   - done: Boolean indicating if this is the final chunk
386 |                   - chunk_index: Integer index of the current chunk (0-based)
387 |                   
388 |                   The metadata may also include provider-specific information such as:
389 |                   - finish_reason: Why the generation stopped (e.g., "stop", "length")
390 |                   - token_count: Running count of tokens generated
391 |                   - model: Model information if it changed during generation
392 |                   
393 |         Raises:
394 |             ValueError: For invalid parameter combinations or values
395 |             ConnectionError: For network or API connectivity issues
396 |             AuthenticationError: For API key or authentication problems
397 |             RateLimitError: When provider rate limits are exceeded
398 |             ProviderError: For other provider-specific errors
399 |             
400 |         Implementation guidelines:
401 |             - Use the provider's official streaming endpoints when available
402 |             - Ensure chunks represent logical breaks where possible (e.g., words, not partial UTF-8)
403 |             - Handle connection interruptions gracefully
404 |             - Set the 'done' flag to True only in the final yielded chunk
405 |             - Provide consistent metadata structure across all yielded chunks
406 |         """
407 |         pass
408 |         
409 |     async def list_models(self) -> List[Dict[str, Any]]:
410 |         """
411 |         List available models from this provider with their capabilities and metadata.
412 |         
413 |         This method retrieves information about all available models from the provider,
414 |         including their identifiers, capabilities, and contextual metadata. Providers
415 |         typically override this method to query their API's model list endpoint and
416 |         normalize the responses into a consistent format.
417 |         
418 |         The base implementation returns a minimal default model entry, but provider-specific
419 |         implementations should:
420 |         1. Query the provider's models API or endpoint
421 |         2. Transform provider-specific model data into the standard format
422 |         3. Enrich the models with useful metadata like token limits and capabilities
423 |         4. Filter models based on access permissions if applicable
424 |         
425 |         Returns:
426 |             A list of dictionaries, each representing a model with at least these keys:
427 |             - id (str): The model identifier (e.g., "gpt-4o", "claude-3-opus")
428 |             - provider (str): The provider name (e.g., "openai", "anthropic")
429 |             - description (str): A human-readable description of the model
430 |             
431 |             Models may also include additional metadata such as:
432 |             - max_tokens (int): Maximum combined tokens (prompt + completion)
433 |             - created (str): Creation/version date of the model
434 |             - pricing (dict): Cost information for input/output tokens
435 |             - capabilities (list): Features the model supports (e.g., "vision", "function_calling")
436 |             - deprecated (bool): Whether the model is deprecated or scheduled for retirement
437 |             
438 |         Raises:
439 |             ConnectionError: If the provider API cannot be reached
440 |             AuthenticationError: If authentication fails during the request
441 |             ProviderError: For other provider-specific errors
442 |             
443 |         Note:
444 |             Model data may be cached internally to reduce API calls. Providers should
445 |             implement appropriate caching strategies to balance freshness with performance.
446 |         """
447 |         # Default implementation - override in provider-specific classes
448 |         return [
449 |             {
450 |                 "id": "default-model",
451 |                 "provider": self.provider_name,
452 |                 "description": "Default model",
453 |             }
454 |         ]
455 |         
456 |     def get_default_model(self) -> str:
457 |         """
458 |         Get the default model identifier for this provider.
459 |         
460 |         This method returns the standard or recommended model identifier to use when
461 |         no specific model is requested. Each provider implementation must override this
462 |         method to specify its default model.
463 |         
464 |         The default model should be:
465 |         - Generally available to all users of the provider
466 |         - Well-balanced between capabilities and cost
467 |         - Appropriate for general-purpose text generation tasks
468 |         - Stable and reliable for production use
469 |         
470 |         The implementation should consider:
471 |         1. Provider-specific configuration settings
472 |         2. Environment variables or system settings
473 |         3. User access level and permissions
474 |         4. Regional availability of models
475 |         5. Current model deprecation status
476 |         
477 |         Returns:
478 |             str: The model identifier string (e.g., "gpt-4o", "claude-3-haiku")
479 |                 that will be used when no model is explicitly specified.
480 |                 This identifier should be valid and usable without additional prefixing.
481 |                 
482 |         Raises:
483 |             NotImplementedError: In the base class implementation, signaling that 
484 |                                subclasses must override this method.
485 |             
486 |         Note:
487 |             Provider implementations should periodically review and update their default
488 |             model selections as newer, more capable models become available or as pricing
489 |             structures change.
490 |         """
491 |         raise NotImplementedError("Provider must implement get_default_model")
492 |         
493 |     async def check_api_key(self) -> bool:
494 |         """
495 |         Check if the API key for this provider is valid and functional.
496 |         
497 |         This method verifies that the configured API key is valid and can be used
498 |         to authenticate with the provider's API. The default implementation simply
499 |         checks if an API key is present, but provider-specific implementations
500 |         should override this to perform actual validation against the provider's API.
501 |         
502 |         A proper implementation should:
503 |         1. Make a lightweight API call to an endpoint that requires authentication
504 |         2. Handle authentication errors specifically to differentiate from other failures
505 |         3. Cache results when appropriate to avoid excessive validation calls
506 |         4. Respect rate limits during validation
507 |         
508 |         This method is typically called during:
509 |         - Server startup to verify all configured providers
510 |         - Before first use of a provider to ensure it's properly configured
511 |         - Periodically as a health check to detect expired or revoked keys
512 |         - After configuration changes that might affect authentication
513 |         
514 |         Returns:
515 |             bool: True if the API key is valid and usable, False otherwise.
516 |                  The default implementation returns True if an API key is present,
517 |                  which does not guarantee the key is actually valid or functional.
518 |                  
519 |         Note:
520 |             Provider implementations should log descriptive error messages when
521 |             validation fails to help with troubleshooting, but should avoid logging
522 |             the actual API key or other sensitive credentials.
523 |         """
524 |         # Default implementation just checks if key exists
525 |         return bool(self.api_key)
526 |         
527 |     async def process_with_timer(
528 |         self, 
529 |         func: callable, 
530 |         *args, 
531 |         **kwargs
532 |     ) -> Tuple[Any, float]:
533 |         """
534 |         Process an async function call with precise timing measurement.
535 |         
536 |         This utility method provides a standardized way to execute any async function
537 |         while measuring its execution time with high precision. It's particularly useful for:
538 |         
539 |         - Tracking LLM API call latency for performance monitoring
540 |         - Measuring request-response round trip times
541 |         - Providing accurate timing data for usage reports and optimizations
542 |         - Including processing time in log messages and response metadata
543 |         
544 |         The method handles the time measurement boilerplate, ensuring consistent
545 |         timing practices across all provider implementations. The measured processing
546 |         time is returned alongside the function's result, allowing both to be used
547 |         in subsequent operations.
548 |         
549 |         Args:
550 |             func: The async function (callable) to execute and time. This should be
551 |                  an awaitable function that performs the actual operation.
552 |             *args: Positional arguments to pass to the function.
553 |             **kwargs: Keyword arguments to pass to the function.
554 |             
555 |         Returns:
556 |             Tuple containing:
557 |               - The result returned by the executed function (any type)
558 |               - The processing time in seconds as a float, measured with
559 |                 high precision from just before the function call to just after
560 |                 it completes.
561 |                 
562 |         Example usage:
563 |             ```python
564 |             # Timing an API call
565 |             response, duration = await self.process_with_timer(
566 |                 self.client.create,
567 |                 model="gpt-4o",
568 |                 prompt="Hello, world!"
569 |             )
570 |             
571 |             # Using the measured time in a response
572 |             return ModelResponse(
573 |                 text=response.choices[0].text,
574 |                 model="gpt-4o",
575 |                 provider=self.provider_name,
576 |                 processing_time=duration
577 |             )
578 |             ```
579 |         """
580 |         start_time = time.time()
581 |         result = await func(*args, **kwargs)
582 |         processing_time = time.time() - start_time
583 |         
584 |         return result, processing_time
585 | 
586 | 
587 | def parse_model_string(model_string: str) -> Tuple[str, str]:
588 |     """Parse a model string that might include a provider prefix.
589 |     
590 |     This function parses a model identifier string that may include a provider prefix
591 |     (e.g., 'openai/gpt-4o' or 'anthropic:claude-3-sonnet'). It supports two separator
592 |     formats: forward slash ('/') and colon (':'). If a valid provider prefix is found, 
593 |     the function returns the provider name and model name as separate strings.
594 |     
595 |     Provider validation is performed against the Provider enum values to ensure the 
596 |     prefix represents a supported provider. If no valid provider prefix is found, the
597 |     provider component will be None, indicating the model should use the default provider.
598 |     
599 |     This function is particularly useful in contexts where users can specify models with optional
600 |     provider prefixes, allowing the system to route requests to the appropriate provider
601 |     even when the provider isn't explicitly specified elsewhere.
602 |     
603 |     Args:
604 |         model_string: A model string, possibly including a provider prefix.
605 |                      Examples: "openai/gpt-4.1-mini", "anthropic/claude-3-opus", 
606 |                                "gemini:gemini-pro", "gpt-4o" (no provider)
607 |                      
608 |     Returns:
609 |         Tuple of (provider_name, model_name):
610 |         - provider_name (str or None): Lowercase provider name if a valid prefix was found,
611 |           or None if no valid provider prefix was detected.
612 |         - model_name (str): The model identifier without the provider prefix.
613 |         
614 |     Examples:
615 |         >>> parse_model_string("openai/gpt-4o")
616 |         ('openai', 'gpt-4o')
617 |         
618 |         >>> parse_model_string("anthropic:claude-3-opus")
619 |         ('anthropic', 'claude-3-opus')
620 |         
621 |         >>> parse_model_string("gpt-4o")  # No provider prefix
622 |         (None, 'gpt-4o')
623 |         
624 |         >>> parse_model_string("unknown/model-name")  # Invalid provider
625 |         (None, 'unknown/model-name')
626 |     """
627 |     separator = None
628 |     if '/' in model_string:
629 |         separator = '/'
630 |     elif ':' in model_string:
631 |         separator = ':'
632 |         
633 |     if separator:
634 |         # Try to extract provider prefix
635 |         parts = model_string.split(separator, 1)
636 |         if len(parts) == 2:
637 |             provider_prefix, model_name = parts
638 |             
639 |             # Check if the prefix is a valid provider name
640 |             # Use list comprehension for cleaner check against Provider enum values
641 |             valid_providers = [p.value.lower() for p in Provider]
642 |             if provider_prefix.lower() in valid_providers:
643 |                 return provider_prefix.lower(), model_name
644 |     
645 |     # No valid provider prefix found or no separator
646 |     return None, model_string
647 | 
648 | 
649 | async def get_provider(provider_name: str, **kwargs) -> BaseProvider:
650 |     """
651 |     Factory function to dynamically create and initialize a provider instance by name.
652 |     
653 |     This function serves as the central provider instantiation mechanism in the Ultimate MCP Server,
654 |     dynamically creating and initializing the appropriate provider implementation based on
655 |     the requested provider name. It handles:
656 |     
657 |     1. Provider name validation and normalization
658 |     2. Provider class selection based on the standardized Provider enum
659 |     3. Model string parsing to extract provider information from model identifiers
660 |     4. Configuration retrieval from the Ultimate MCP Server config system
661 |     5. Provider instance creation with appropriate parameters
662 |     6. Provider initialization and validation
663 |     
664 |     The function supports specifying provider names directly or extracting them from
665 |     model identifiers that include provider prefixes (e.g., "openai/gpt-4o"). This flexibility
666 |     allows for more intuitive access to providers when working with specific models.
667 |     
668 |     Args:
669 |         provider_name: Provider identifier to instantiate. This should match one of the
670 |                       values in the Provider enum (case-insensitive). Examples include
671 |                       "openai", "anthropic", "gemini", etc.
672 |         **kwargs: Additional provider-specific configuration options to pass to the
673 |                  provider's constructor. Common options include:
674 |                  - api_key: Override the API key from configuration
675 |                  - model: Model name to use (may include provider prefix)
676 |                  - base_url: Alternative API endpoint URL
677 |                  - organization: Organization ID for providers that support it
678 |                  
679 |     Returns:
680 |         An initialized provider instance ready for use. The specific return type will
681 |         be a subclass of BaseProvider corresponding to the requested provider.
682 |         
683 |     Raises:
684 |         ValueError: If the provider name is invalid or initialization fails. This ensures
685 |                    that only fully functional provider instances are returned.
686 |                    
687 |     Example usage:
688 |         ```python
689 |         # Basic usage with direct provider name
690 |         openai_provider = await get_provider("openai")
691 |         
692 |         # Using a model string with provider prefix
693 |         provider = await get_provider("openai", model="anthropic/claude-3-opus")
694 |         # The above actually returns an AnthropicProvider because the model string
695 |         # overrides the provider_name parameter
696 |         
697 |         # With additional configuration
698 |         custom_provider = await get_provider(
699 |             "openai",
700 |             api_key="custom-key",
701 |             base_url="https://custom-endpoint.example.com/v1",
702 |             model="gpt-4o"
703 |         )
704 |         ```
705 |     """
706 |     cfg = get_config()
707 |     provider_name = provider_name.lower().strip()
708 |     
709 |     # If a model was provided, check if it has a provider prefix
710 |     # This helps with models like "openai/gpt-4.1-mini" to ensure they go to the right provider
711 |     if 'model' in kwargs and isinstance(kwargs['model'], str):
712 |         extracted_provider, extracted_model = parse_model_string(kwargs['model'])
713 |         if extracted_provider:
714 |             # If we have a provider prefix in the model string, use that provider
715 |             # and update the model name to remove the prefix
716 |             provider_name = extracted_provider
717 |             kwargs['model'] = extracted_model
718 |             logger.debug(f"Extracted provider '{provider_name}' and model '{extracted_model}' from model string")
719 |     
720 |     from ultimate_mcp_server.core.providers.anthropic import AnthropicProvider
721 |     from ultimate_mcp_server.core.providers.deepseek import DeepSeekProvider
722 |     from ultimate_mcp_server.core.providers.gemini import GeminiProvider
723 |     from ultimate_mcp_server.core.providers.grok import GrokProvider
724 |     from ultimate_mcp_server.core.providers.ollama import OllamaProvider
725 |     from ultimate_mcp_server.core.providers.openai import OpenAIProvider
726 |     from ultimate_mcp_server.core.providers.openrouter import OpenRouterProvider
727 |     
728 |     providers = {
729 |         Provider.OPENAI: OpenAIProvider,
730 |         Provider.ANTHROPIC: AnthropicProvider,
731 |         Provider.DEEPSEEK: DeepSeekProvider,
732 |         Provider.GEMINI: GeminiProvider,
733 |         Provider.OPENROUTER: OpenRouterProvider,
734 |         Provider.GROK: GrokProvider,
735 |         Provider.OLLAMA: OllamaProvider,
736 |     }
737 |     
738 |     if provider_name not in providers:
739 |         raise ValueError(f"Invalid provider name: {provider_name}")
740 |         
741 |     # Get the top-level 'providers' config object, default to None if it doesn't exist
742 |     providers_config = getattr(cfg, 'providers', None)
743 |     
744 |     # Get the specific provider config (e.g., providers_config.openai) from the providers_config object
745 |     # Default to None if providers_config is None or the specific provider attr doesn't exist
746 |     provider_cfg = getattr(providers_config, provider_name, None) if providers_config else None
747 |     
748 |     # Now use provider_cfg to get the api_key if needed
749 |     if 'api_key' not in kwargs and provider_cfg and hasattr(provider_cfg, 'api_key') and provider_cfg.api_key:
750 |         kwargs['api_key'] = provider_cfg.api_key
751 |     
752 |     provider_class = providers[provider_name]
753 |     instance = provider_class(**kwargs)
754 |     
755 |     # Initialize the provider immediately
756 |     initialized = await instance.initialize()
757 |     if not initialized:
758 |         # Raise an error if initialization fails to prevent returning an unusable instance
759 |         raise ValueError(f"Failed to initialize provider: {provider_name}")
760 | 
761 |     return instance
```

--------------------------------------------------------------------------------
/ultimate_mcp_server/tools/sentiment_analysis.py:
--------------------------------------------------------------------------------

```python
  1 | """Business-focused sentiment analysis tools for Ultimate MCP Server."""
  2 | import json
  3 | import time
  4 | from typing import Any, Dict, List, Optional
  5 | 
  6 | from ultimate_mcp_server.constants import Provider, TaskType
  7 | from ultimate_mcp_server.exceptions import ProviderError, ToolError, ToolInputError
  8 | from ultimate_mcp_server.tools.base import with_error_handling, with_tool_metrics
  9 | from ultimate_mcp_server.tools.completion import generate_completion
 10 | from ultimate_mcp_server.utils import get_logger
 11 | 
 12 | logger = get_logger("ultimate_mcp_server.tools.business_sentiment")
 13 | 
 14 | @with_tool_metrics
 15 | @with_error_handling
 16 | async def analyze_business_sentiment(
 17 |     text: str,
 18 |     industry: Optional[str] = None,
 19 |     analysis_mode: str = "standard",
 20 |     entity_extraction: bool = False,
 21 |     aspect_based: bool = False,
 22 |     competitive_analysis: bool = False,
 23 |     intent_detection: bool = False,
 24 |     risk_assessment: bool = False,
 25 |     language: str = "english",
 26 |     provider: str = Provider.OPENAI.value,
 27 |     model: Optional[str] = None,
 28 |     threshold_config: Optional[Dict[str, float]] = None
 29 | ) -> Dict[str, Any]:
 30 |     """Performs comprehensive business-oriented sentiment analysis for commercial applications.
 31 | 
 32 |     This enterprise-grade tool analyzes customer feedback, reviews, support tickets, survey responses,
 33 |     social media mentions and other business text data to extract actionable insights. It provides
 34 |     customizable analysis for specific industries and use cases with options for deep-dive analysis.
 35 | 
 36 |     Args:
 37 |         text: The business text to analyze (feedback, review, survey response, etc.).
 38 |         industry: Optional industry context to tailor analysis (e.g., "retail", "financial_services", 
 39 |                  "healthcare", "hospitality", "technology", "telecommunications", "manufacturing").
 40 |                  Improves accuracy by applying industry-specific terminology and benchmarks.
 41 |         analysis_mode: Type of analysis to perform:
 42 |                      - "standard": Basic business sentiment with key indicators
 43 |                      - "comprehensive": Detailed analysis with all available metrics
 44 |                      - "customer_experience": Focus on satisfaction, loyalty, and effort
 45 |                      - "product_feedback": Focus on feature sentiment and product improvement
 46 |                      - "brand_perception": Focus on brand attributes and competitive positioning
 47 |                      - "support_ticket": Optimized for support ticket prioritization and resolution
 48 |                      - "sales_opportunity": Focus on purchase intent and sales readiness
 49 |         entity_extraction: Whether to identify and extract mentioned products, services, features,
 50 |                           and business entities. Useful for pinpointing what customers are discussing.
 51 |         aspect_based: Whether to break down sentiment by specific aspects/features mentioned.
 52 |                      Helps identify which specific product/service elements drive sentiment.
 53 |         competitive_analysis: Whether to identify competitor mentions and comparative sentiment.
 54 |                              Useful for competitive intelligence and benchmarking.
 55 |         intent_detection: Whether to detect customer intents (e.g., purchase interest, cancellation
 56 |                          risk, support request, recommendation intent, complaint, praise).
 57 |         risk_assessment: Whether to evaluate potential business risks (e.g., churn risk, PR risk,
 58 |                         legal/compliance issues, potential escalation) based on the text.
 59 |         language: Language of the input text (supports multiple languages for global businesses).
 60 |         provider: The name of the LLM provider (e.g., "openai", "anthropic", "gemini").
 61 |         model: The specific model ID. If None, uses the provider's default model.
 62 |         threshold_config: Optional dictionary of threshold values for various metrics to customize
 63 |                          sensitivity levels (e.g., {"churn_risk": 0.7, "urgency": 0.8}).
 64 | 
 65 |     Returns:
 66 |         A dictionary containing comprehensive business sentiment analysis:
 67 |         {
 68 |             "core_metrics": {
 69 |                 "primary_sentiment": "positive",  # Overall business sentiment
 70 |                 "sentiment_score": 0.75,          # Normalized score (-1.0 to 1.0)
 71 |                 "confidence": 0.92,               # Confidence in the assessment
 72 |                 "satisfaction_score": 4.2,        # Estimated satisfaction (1-5 scale)
 73 |                 "nps_category": "promoter",       # Predicted NPS category: detractor/passive/promoter
 74 |                 "urgency": "low",                 # Action urgency assessment: low/medium/high/critical
 75 |                 "actionability": 0.35            # How actionable the feedback is (0.0-1.0)
 76 |             },
 77 |             "business_dimensions": {              # Business-specific metrics
 78 |                 "customer_satisfaction": 0.82,    # Satisfaction indicator (0.0-1.0)
 79 |                 "product_sentiment": 0.75,        # Product sentiment (0.0-1.0)
 80 |                 "value_perception": 0.68,         # Price-to-value perception (0.0-1.0)
 81 |                 "ease_of_use": 0.90,              # Usability perception when relevant (0.0-1.0)
 82 |                 "customer_effort_score": 2.1,     # Estimated CES (1-7 scale, lower is better)
 83 |                 "loyalty_indicators": 0.85,       # Loyalty/retention indicators (0.0-1.0)
 84 |                 "recommendation_likelihood": 0.87 # Likelihood to recommend (0.0-1.0)
 85 |             },
 86 |             "intent_analysis": {                  # Only if intent_detection=True
 87 |                 "purchase_intent": 0.15,          # Purchase interest level (0.0-1.0)
 88 |                 "churn_risk": 0.08,               # Risk of customer churn (0.0-1.0)
 89 |                 "support_needed": 0.75,           # Likelihood customer needs support (0.0-1.0)
 90 |                 "feedback_type": "suggestion",    # Type: complaint/praise/question/suggestion
 91 |                 "information_request": false      # Whether customer is requesting information
 92 |             },
 93 |             "aspect_sentiment": {                 # Only if aspect_based=True
 94 |                 "product_quality": 0.85,
 95 |                 "customer_service": 0.92,
 96 |                 "shipping_speed": 0.45,
 97 |                 "return_process": 0.30,
 98 |                 "website_usability": 0.78
 99 |             },
100 |             "entity_extraction": {                # Only if entity_extraction=True
101 |                 "products": ["Product X Pro", "Legacy Model"],
102 |                 "features": ["battery life", "touchscreen responsiveness"],
103 |                 "services": ["customer support", "technical assistance"],
104 |                 "mentioned_departments": ["billing", "technical support"]
105 |             },
106 |             "competitive_insights": {             # Only if competitive_analysis=True
107 |                 "competitor_mentions": ["Competitor A", "Competitor B"],
108 |                 "comparative_sentiment": {
109 |                     "Competitor A": -0.2,         # Negative comparison to competitor
110 |                     "Competitor B": 0.3           # Positive comparison to competitor
111 |                 },
112 |                 "perceived_advantages": ["price", "features"],
113 |                 "perceived_disadvantages": ["support response time"]
114 |             },
115 |             "risk_assessment": {                  # Only if risk_assessment=True
116 |                 "churn_probability": 0.32,
117 |                 "response_urgency": "medium",
118 |                 "pr_risk": "low",
119 |                 "legal_compliance_flags": ["data privacy concern"],
120 |                 "escalation_probability": 0.15
121 |             },
122 |             "message_characteristics": {
123 |                 "key_topics": ["product quality", "customer service", "pricing"],
124 |                 "key_phrases": ["extremely satisfied", "quick resolution"],
125 |                 "tone_indicators": ["appreciative", "constructive"],
126 |                 "clarity": 0.9,                  # How clear/specific the feedback is (0.0-1.0)
127 |                 "subjectivity": 0.4,             # Subjective vs. objective content (0.0-1.0)
128 |                 "emotional_intensity": 0.65      # Intensity of emotion expressed (0.0-1.0)
129 |             },
130 |             "industry_specific_insights": {},    # Varies based on 'industry' parameter
131 |             "recommended_actions": [             # Business action recommendations
132 |                 "Follow up regarding mentioned technical issue",
133 |                 "Highlight positive experience in success stories"
134 |             ],
135 |             "meta": {                           # Metadata about the analysis
136 |                 "provider": "anthropic",
137 |                 "model": "claude-3-5-sonnet-20241022",
138 |                 "analysis_mode": "comprehensive",
139 |                 "language_detected": "english",
140 |                 "tokens": { "input": 350, "output": 820, "total": 1170 },
141 |                 "cost": 0.000843,
142 |                 "processing_time": 1.25,
143 |                 "version": "2.4.0"
144 |             },
145 |             "success": true
146 |         }
147 | 
148 |     Raises:
149 |         ToolInputError: If parameters are invalid or incompatible.
150 |         ProviderError: If the provider is unavailable or the LLM request fails.
151 |         ToolError: For other errors during processing.
152 |     """
153 |     start_time = time.time()
154 |     
155 |     # Parameter validation
156 |     if not text or not isinstance(text, str):
157 |         raise ToolInputError(
158 |             "Input text must be a non-empty string.",
159 |             param_name="text",
160 |             provided_value=text
161 |         )
162 |     
163 |     valid_analysis_modes = [
164 |         "standard", "comprehensive", "customer_experience", "product_feedback", 
165 |         "brand_perception", "support_ticket", "sales_opportunity"
166 |     ]
167 |     if analysis_mode not in valid_analysis_modes:
168 |         raise ToolInputError(
169 |             f"Invalid analysis_mode. Must be one of: {', '.join(valid_analysis_modes)}",
170 |             param_name="analysis_mode",
171 |             provided_value=analysis_mode
172 |         )
173 |     
174 |     # Construct the analysis prompt based on parameters
175 |     system_prompt = _build_sentiment_system_prompt(
176 |         industry=industry,
177 |         analysis_mode=analysis_mode,
178 |         entity_extraction=entity_extraction,
179 |         aspect_based=aspect_based,
180 |         competitive_analysis=competitive_analysis,
181 |         intent_detection=intent_detection,
182 |         risk_assessment=risk_assessment,
183 |         language=language,
184 |         threshold_config=threshold_config
185 |     )
186 |     
187 |     user_prompt = f"""
188 |     Analyze the following business text according to the specified parameters:
189 |     
190 |     Text to analyze:
191 |     ```
192 |     {text}
193 |     ```
194 |     
195 |     Provide a detailed JSON response according to the format specified in the system instructions.
196 |     """
197 |     
198 |     # Combined prompt for all providers
199 |     combined_prompt = f"{system_prompt}\n\n{user_prompt}"
200 |     
201 |     try:
202 |         # Consistently use generate_completion for all providers
203 |         completion_result = await generate_completion(
204 |             prompt=combined_prompt,
205 |             provider=provider,
206 |             model=model,
207 |             temperature=0.2,
208 |             max_tokens=2000,
209 |             additional_params={"response_format": {"type": "json_object"}} if provider.lower() == "openai" else None
210 |         )
211 |         
212 |         # Extract response text from the completion result
213 |         response_text = completion_result["text"].strip()
214 |         
215 |         # Extract JSON response
216 |         try:
217 |             # Try to extract JSON if wrapped in code blocks
218 |             if "```json" in response_text:
219 |                 json_start = response_text.find("```json") + 7
220 |                 json_end = response_text.find("```", json_start)
221 |                 if json_end > json_start:
222 |                     response_text = response_text[json_start:json_end].strip()
223 |             elif "```" in response_text:
224 |                 json_start = response_text.find("```") + 3
225 |                 json_end = response_text.find("```", json_start)
226 |                 if json_end > json_start:
227 |                     response_text = response_text[json_start:json_end].strip()
228 |             
229 |             # Parse and validate JSON
230 |             analysis_data = json.loads(response_text)
231 |             
232 |             # Validate minimum required fields
233 |             if "core_metrics" not in analysis_data:
234 |                 logger.warning("Missing 'core_metrics' in response, adding empty object")
235 |                 analysis_data["core_metrics"] = {}
236 |             
237 |             # Ensure core metrics contains primary sentiment
238 |             core_metrics = analysis_data["core_metrics"]
239 |             if "primary_sentiment" not in core_metrics:
240 |                 sentiment_score = core_metrics.get("sentiment_score", 0.0)
241 |                 if sentiment_score > 0.2:
242 |                     primary_sentiment = "positive"
243 |                 elif sentiment_score < -0.2:
244 |                     primary_sentiment = "negative"
245 |                 else:
246 |                     primary_sentiment = "neutral"
247 |                 core_metrics["primary_sentiment"] = primary_sentiment
248 |                 logger.debug(f"Added missing primary_sentiment: {primary_sentiment}")
249 |             
250 |             # Populate metadata
251 |             processing_time = time.time() - start_time
252 |             
253 |             # Extract provider and model info from completion result
254 |             result_provider = completion_result.get("provider", provider)
255 |             result_model = completion_result.get("model", model)
256 |             input_tokens = completion_result.get("tokens", {}).get("input", 0)
257 |             output_tokens = completion_result.get("tokens", {}).get("output", 0)
258 |             total_tokens = completion_result.get("tokens", {}).get("total", 0)
259 |             cost = completion_result.get("cost", 0.0)
260 |             
261 |             meta = {
262 |                 "provider": result_provider,
263 |                 "model": result_model,
264 |                 "analysis_mode": analysis_mode,
265 |                 "language_detected": language,  # Actual detection would need more logic
266 |                 "tokens": {
267 |                     "input": input_tokens,
268 |                     "output": output_tokens,
269 |                     "total": total_tokens,
270 |                 },
271 |                 "cost": cost,
272 |                 "processing_time": processing_time,
273 |                 "version": "2.4.0"  # Tool version
274 |             }
275 |             
276 |             # Include metadata in the final response
277 |             analysis_data["meta"] = meta
278 |             analysis_data["success"] = True
279 |             
280 |             # Log successful completion
281 |             logger.success(
282 |                 f"Business sentiment analysis completed successfully with {result_provider}/{result_model}",
283 |                 emoji_key=TaskType.CLASSIFICATION.value,
284 |                 analysis_mode=analysis_mode,
285 |                 sentiment=core_metrics.get("primary_sentiment", "unknown"),
286 |                 tokens={
287 |                     "input": input_tokens,
288 |                     "output": output_tokens
289 |                 },
290 |                 cost=cost,
291 |                 time=processing_time
292 |             )
293 |             
294 |             return analysis_data
295 |             
296 |         except json.JSONDecodeError as e:
297 |             logger.error(
298 |                 f"Failed to parse JSON response: {e}",
299 |                 emoji_key="error",
300 |                 raw_response=response_text[:500]  # Log partial response for debugging
301 |             )
302 |             raise ToolError(
303 |                 f"Failed to parse business sentiment analysis response: {e}",
304 |                 error_code="invalid_response_format",
305 |                 details={"raw_response": response_text[:500]}
306 |             ) from e
307 |             
308 |     except Exception as e:
309 |         raise ProviderError(
310 |             f"Business sentiment analysis failed: {str(e)}",
311 |             provider=provider,
312 |             model=model,
313 |             cause=e
314 |         ) from e
315 | 
316 | 
317 | def _build_sentiment_system_prompt(
318 |     industry: Optional[str],
319 |     analysis_mode: str,
320 |     entity_extraction: bool,
321 |     aspect_based: bool,
322 |     competitive_analysis: bool,
323 |     intent_detection: bool, 
324 |     risk_assessment: bool,
325 |     language: str,
326 |     threshold_config: Optional[Dict[str, float]]
327 | ) -> str:
328 |     """Builds a comprehensive system prompt for business sentiment analysis based on parameters."""
329 |     
330 |     # Base prompt with core instructions
331 |     base_prompt = """
332 |     You are an enterprise-grade business sentiment analysis system designed to extract actionable insights from customer and stakeholder feedback. Your analysis should be precise, nuanced, and tailored to business decision-making.
333 |     
334 |     Provide analysis in a structured JSON format with the following core sections:
335 |     1. core_metrics: Essential sentiment indicators
336 |     2. business_dimensions: Business-specific metrics like satisfaction and loyalty
337 |     3. message_characteristics: Content properties like topics and tone
338 |     
339 |     All numerical scores should be consistent (higher is better unless otherwise specified) and normalized within their specified ranges.
340 |     """
341 |     
342 |     # Industry-specific tailoring
343 |     industry_prompt = ""
344 |     if industry:
345 |         industry_mappings = {
346 |             "retail": "Retail and e-commerce context: Focus on product quality, shopping experience, delivery, returns, and customer service. Include retail-specific metrics like purchase satisfaction and repeat purchase intent.",
347 |             
348 |             "financial_services": "Financial services context: Focus on trust, security, transparency, and service quality. Include financial-specific metrics like perceived financial benefit, trust indicator, and financial confidence impact.",
349 |             
350 |             "healthcare": "Healthcare context: Focus on care quality, staff interactions, facility experience, and outcomes. Include healthcare-specific metrics like perceived care quality, staff empathy, and outcome satisfaction.",
351 |             
352 |             "hospitality": "Hospitality context: Focus on accommodations, amenities, staff service, and overall experience. Include hospitality-specific metrics like comfort rating, staff attentiveness, and value perception.",
353 |             
354 |             "technology": "Technology/SaaS context: Focus on software/product functionality, reliability, ease of use, and technical support. Include tech-specific metrics like feature satisfaction, reliability perception, and technical resolution satisfaction.",
355 |             
356 |             "telecommunications": "Telecommunications context: Focus on service reliability, coverage, customer support, and value. Include telecom-specific metrics like service reliability rating, coverage satisfaction, and value perception.",
357 |             
358 |             "manufacturing": "Manufacturing context: Focus on product quality, durability, specifications adherence, and support. Include manufacturing-specific metrics like quality rating, durability perception, and technical specification satisfaction."
359 |         }
360 |         
361 |         if industry.lower() in industry_mappings:
362 |             industry_prompt = f"\nINDUSTRY CONTEXT: {industry_mappings[industry.lower()]}\n"
363 |             industry_prompt += "\nInclude an 'industry_specific_insights' section with metrics and insights specific to this industry."
364 |         else:
365 |             industry_prompt = f"\nINDUSTRY CONTEXT: {industry} - Apply industry-specific terminology and standards.\n"
366 |     
367 |     # Analysis mode specification
368 |     mode_prompt = "\nANALYSIS MODE: "
369 |     if analysis_mode == "standard":
370 |         mode_prompt += "Standard business sentiment with core metrics and key indicators."
371 |     elif analysis_mode == "comprehensive":
372 |         mode_prompt += "Comprehensive analysis with all available metrics and maximum detail."
373 |     elif analysis_mode == "customer_experience":
374 |         mode_prompt += "Customer experience focus: Emphasize satisfaction, loyalty, and effort metrics. Pay special attention to service interactions, pain points, and moments of delight."
375 |     elif analysis_mode == "product_feedback":
376 |         mode_prompt += "Product feedback focus: Emphasize feature sentiment, product quality, and improvement suggestions. Identify specific product components mentioned and their sentiment."
377 |     elif analysis_mode == "brand_perception":
378 |         mode_prompt += "Brand perception focus: Emphasize brand attributes, positioning, and emotional connections. Analyze brand promise fulfillment and competitive positioning."
379 |     elif analysis_mode == "support_ticket":
380 |         mode_prompt += "Support ticket focus: Emphasize issue categorization, severity, urgency, and resolution path. Detect technical terms and problem indicators."
381 |     elif analysis_mode == "sales_opportunity":
382 |         mode_prompt += "Sales opportunity focus: Emphasize purchase intent, objections, and decision factors. Analyze buying signals and sales readiness indicators."
383 |     
384 |     # Optional analysis components
385 |     optional_components = []
386 |     
387 |     if entity_extraction:
388 |         optional_components.append("""
389 |         ENTITY EXTRACTION: Extract and categorize business entities mentioned in the text.
390 |         - Include an 'entity_extraction' section with arrays of identified products, features, services, departments, locations, etc.
391 |         - Normalize entity names when variations of the same entity are mentioned.
392 |         - Exclude generic mentions and focus on specific named entities.
393 |         """)
394 |     
395 |     if aspect_based:
396 |         optional_components.append("""
397 |         ASPECT-BASED SENTIMENT: Break down sentiment by specific aspects or features mentioned.
398 |         - Include an 'aspect_sentiment' section with sentiment scores for each identified aspect.
399 |         - Aspects should be specific (e.g., 'website_usability', 'checkout_process', 'product_quality').
400 |         - Only include aspects explicitly mentioned or strongly implied in the text.
401 |         - Score each aspect from -1.0 (extremely negative) to 1.0 (extremely positive).
402 |         """)
403 |     
404 |     if competitive_analysis:
405 |         optional_components.append("""
406 |         COMPETITIVE ANALYSIS: Identify and analyze competitor mentions and comparisons.
407 |         - Include a 'competitive_insights' section with competitor names and comparative sentiment.
408 |         - Capture explicit and implicit comparisons to competitors.
409 |         - Identify perceived advantages and disadvantages relative to competitors.
410 |         - Score comparative sentiment from -1.0 (negative comparison) to 1.0 (positive comparison).
411 |         """)
412 |     
413 |     if intent_detection:
414 |         optional_components.append("""
415 |         INTENT DETECTION: Identify customer intentions and likely next actions.
416 |         - Include an 'intent_analysis' section with probabilities for purchase intent, churn risk, etc.
417 |         - Classify the feedback type (complaint, praise, question, suggestion).
418 |         - Detect specific intents like information requests, cancellation warnings, escalation threats.
419 |         - Score intent probabilities from 0.0 (no indication) to 1.0 (strong indication).
420 |         """)
421 |     
422 |     if risk_assessment:
423 |         optional_components.append("""
424 |         RISK ASSESSMENT: Evaluate potential business risks in the feedback.
425 |         - Include a 'risk_assessment' section with probabilities and categories of identified risks.
426 |         - Assess churn probability, PR/reputation risk, legal/compliance concerns, etc.
427 |         - Provide an escalation probability and urgency level.
428 |         - Flag sensitive content that may require special attention.
429 |         """)
430 |     
431 |     # Language specification
432 |     language_prompt = f"\nLANGUAGE: Analyze text in {language}. Ensure all scores and categorizations are correctly interpreted within cultural and linguistic context."
433 |     
434 |     # Threshold configurations if provided
435 |     threshold_prompt = ""
436 |     if threshold_config and isinstance(threshold_config, dict):
437 |         threshold_prompt = "\nTHRESHOLD CONFIGURATION:"
438 |         for metric, value in threshold_config.items():
439 |             threshold_prompt += f"\n- {metric}: {value}"
440 |     
441 |     # Combine all prompt components
442 |     full_prompt = base_prompt + industry_prompt + mode_prompt + language_prompt + threshold_prompt
443 |     
444 |     if optional_components:
445 |         full_prompt += "\n\nADDITIONAL ANALYSIS COMPONENTS:"
446 |         full_prompt += "\n".join(optional_components)
447 |     
448 |     # Output format specification
449 |     output_format = """
450 |     RESPONSE FORMAT: Respond only with a valid JSON object containing all applicable sections based on the analysis parameters.
451 |     
452 |     Always include these core sections:
453 |     - core_metrics: Overall sentiment, scores, and primary indicators
454 |     - business_dimensions: Business-specific satisfaction and perception metrics
455 |     - message_characteristics: Content properties, topics, and expression styles
456 |     - recommended_actions: 1-3 specific business actions based on the analysis
457 |     - meta: Will be populated with metadata about the analysis
458 |     
459 |     Add optional sections as specified by the analysis parameters.
460 |     
461 |     Ensure all numerical values are normalized to their specified ranges and all categorical values use consistent terminology.
462 |     """
463 |     
464 |     full_prompt += output_format
465 |     
466 |     return full_prompt
467 | 
468 | 
469 | @with_tool_metrics
470 | @with_error_handling
471 | async def analyze_business_text_batch(
472 |     texts: List[str],
473 |     analysis_config: Dict[str, Any],
474 |     aggregate_results: bool = True,
475 |     max_concurrency: int = 3,
476 |     provider: str = Provider.OPENAI.value,
477 |     model: Optional[str] = None
478 | ) -> Dict[str, Any]:
479 |     """Processes a batch of business texts for sentiment analysis with aggregated insights.
480 | 
481 |     Designed for analyzing large volumes of business feedback (reviews, surveys, tickets)
482 |     efficiently with detailed individual analyses and optional aggregated metrics. Ideal for
483 |     business intelligence, customer experience programs, and trend identification.
484 | 
485 |     Args:
486 |         texts: List of text items to analyze (reviews, feedback, etc.).
487 |         analysis_config: Configuration dictionary for analyze_business_sentiment.
488 |                         Example: {"analysis_mode": "standard", "entity_extraction": True}
489 |                         All parameters from analyze_business_sentiment except text, provider, model.
490 |         aggregate_results: Whether to generate aggregated insights across all analyzed texts.
491 |                          Includes trend detection, sentiment distribution, and pattern identification.
492 |         max_concurrency: Maximum number of parallel analyses to run.
493 |         provider: The name of the LLM provider to use.
494 |         model: The specific model ID. If None, uses the provider's default.
495 | 
496 |     Returns:
497 |         A dictionary containing individual and aggregated results:
498 |         {
499 |             "individual_results": [
500 |                 {
501 |                     "text_id": 0,
502 |                     "text_preview": "First 50 characters of text...",
503 |                     "analysis": { /* Complete analysis result for this text */ }
504 |                 },
505 |                 // Additional individual results...
506 |             ],
507 |             "aggregate_insights": {  // Only if aggregate_results=True
508 |                 "sentiment_distribution": {
509 |                     "positive": 0.65,  // 65% positive
510 |                     "neutral": 0.20,   // 20% neutral
511 |                     "negative": 0.15   // 15% negative
512 |                 },
513 |                 "average_metrics": {
514 |                     "sentiment_score": 0.42,
515 |                     "satisfaction_score": 3.8,
516 |                     // Other averaged metrics...
517 |                 },
518 |                 "top_aspects": [
519 |                     {"name": "customer_service", "avg_sentiment": 0.75, "mention_count": 42},
520 |                     {"name": "product_quality", "avg_sentiment": 0.62, "mention_count": 38},
521 |                     // Additional aspects...
522 |                 ],
523 |                 "key_topics": [
524 |                     {"topic": "shipping delays", "mention_count": 35, "avg_sentiment": -0.3},
525 |                     {"topic": "easy checkout", "mention_count": 28, "avg_sentiment": 0.8},
526 |                     // Additional topics...
527 |                 ],
528 |                 "entity_mention_frequencies": {
529 |                     "products": {"Product X": 45, "Product Y": 23},
530 |                     "features": {"user interface": 38, "reliability": 27}
531 |                 },
532 |                 "emerging_patterns": [
533 |                     "Increasing mentions of mobile app usability",
534 |                     "Growing negative sentiment about recent policy change"
535 |                 ],
536 |                 "risk_indicators": [
537 |                     {"issue": "shipping delays", "severity": "medium", "trend": "increasing"},
538 |                     {"issue": "billing confusion", "severity": "low", "trend": "stable"}
539 |                 ]
540 |             },
541 |             "meta": {
542 |                 "batch_size": 250,
543 |                 "success_count": 248,
544 |                 "error_count": 2,
545 |                 "processing_time": 128.5,
546 |                 "total_cost": 4.87,
547 |                 "timestamp": "2025-04-21T14:30:00Z"
548 |             },
549 |             "success": true
550 |         }
551 | 
552 |     Raises:
553 |         ToolInputError: If input parameters are invalid.
554 |         ProviderError: If the provider service fails.
555 |         ToolError: For other processing errors.
556 |     """
557 |     start_time = time.time()
558 |     total_cost = 0.0
559 |     success_count = 0
560 |     error_count = 0
561 |     
562 |     # Validate inputs
563 |     if not texts or not isinstance(texts, list):
564 |         raise ToolInputError(
565 |             "The 'texts' parameter must be a non-empty list of strings.",
566 |             param_name="texts",
567 |             provided_value=texts
568 |         )
569 |     
570 |     if not analysis_config or not isinstance(analysis_config, dict):
571 |         raise ToolInputError(
572 |             "The 'analysis_config' parameter must be a dictionary of configuration options.",
573 |             param_name="analysis_config",
574 |             provided_value=analysis_config
575 |         )
576 |     
577 |     # Process texts with concurrency control
578 |     import asyncio
579 |     semaphore = asyncio.Semaphore(max_concurrency)
580 |     individual_results = []
581 |     all_analyses = []
582 |     
583 |     async def process_text(idx: int, text: str):
584 |         nonlocal total_cost, success_count, error_count
585 |         
586 |         async with semaphore:
587 |             text_preview = text[:50] + ("..." if len(text) > 50 else "")
588 |             logger.debug(f"Processing text {idx+1}/{len(texts)}: {text_preview}")
589 |             
590 |             try:
591 |                 # Create a copy of analysis_config to avoid modifying the original
592 |                 config = analysis_config.copy()
593 |                 
594 |                 # Add provider and model to config
595 |                 config["provider"] = provider
596 |                 config["model"] = model
597 |                 
598 |                 # Process the individual text using our refactored analyze_business_sentiment
599 |                 result = await analyze_business_sentiment(
600 |                     text=text,
601 |                     **config
602 |                 )
603 |                 
604 |                 # Update metrics
605 |                 total_cost += result.get("meta", {}).get("cost", 0.0)
606 |                 success_count += 1
607 |                 
608 |                 # Record result
609 |                 individual_results.append({
610 |                     "text_id": idx,
611 |                     "text_preview": text_preview,
612 |                     "analysis": result
613 |                 })
614 |                 
615 |                 # Store for aggregation
616 |                 all_analyses.append(result)
617 |                 
618 |                 return result
619 |                 
620 |             except Exception as e:
621 |                 logger.error(f"Error analyzing text {idx}: {str(e)}", exc_info=True)
622 |                 error_count += 1
623 |                 
624 |                 # Record error
625 |                 individual_results.append({
626 |                     "text_id": idx,
627 |                     "text_preview": text_preview,
628 |                     "error": str(e)
629 |                 })
630 |                 
631 |                 return None
632 |     
633 |     # Create and run tasks
634 |     tasks = [process_text(i, text) for i, text in enumerate(texts)]
635 |     await asyncio.gather(*tasks)
636 |     
637 |     # Sort results by text_id to maintain original order
638 |     individual_results.sort(key=lambda x: x["text_id"])
639 |     
640 |     # Build response
641 |     result = {
642 |         "individual_results": individual_results,
643 |         "meta": {
644 |             "batch_size": len(texts),
645 |             "success_count": success_count,
646 |             "error_count": error_count,
647 |             "processing_time": time.time() - start_time,
648 |             "total_cost": total_cost,
649 |             "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
650 |         },
651 |         "success": True
652 |     }
653 |     
654 |     # Calculate aggregate insights if requested and we have successful analyses
655 |     if aggregate_results and all_analyses:
656 |         try:
657 |             aggregate_insights = _calculate_aggregate_insights(all_analyses)
658 |             result["aggregate_insights"] = aggregate_insights
659 |         except Exception as e:
660 |             logger.error(f"Error calculating aggregate insights: {str(e)}", exc_info=True)
661 |             result["aggregate_insights_error"] = str(e)
662 |     
663 |     return result
664 | 
665 | 
666 | def _calculate_aggregate_insights(analyses: List[Dict[str, Any]]) -> Dict[str, Any]:
667 |     """Calculates aggregate insights across multiple business sentiment analyses."""
668 |     
669 |     # Initialize aggregation containers
670 |     sentiment_counts = {"positive": 0, "neutral": 0, "negative": 0}
671 |     sentiment_scores = []
672 |     satisfaction_scores = []
673 |     loyalty_indicators = []
674 |     aspect_sentiments = {}
675 |     topics = {}
676 |     mentioned_entities = {
677 |         "products": {},
678 |         "features": {},
679 |         "services": {}
680 |     }
681 |     
682 |     # Process each analysis
683 |     for analysis in analyses:
684 |         # Skip any analyses without core_metrics
685 |         if "core_metrics" not in analysis:
686 |             continue
687 |         
688 |         core = analysis.get("core_metrics", {})
689 |         business = analysis.get("business_dimensions", {})
690 |         
691 |         # Sentiment distribution
692 |         sentiment = core.get("primary_sentiment", "neutral").lower()
693 |         if sentiment in sentiment_counts:
694 |             sentiment_counts[sentiment] += 1
695 |         
696 |         # Collect numerical metrics
697 |         if "sentiment_score" in core:
698 |             sentiment_scores.append(core["sentiment_score"])
699 |         
700 |         if "satisfaction_score" in business:
701 |             satisfaction_scores.append(business["satisfaction_score"])
702 |             
703 |         if "loyalty_indicators" in business:
704 |             loyalty_indicators.append(business["loyalty_indicators"])
705 |         
706 |         # Aspect sentiments
707 |         for aspect, score in analysis.get("aspect_sentiment", {}).items():
708 |             if aspect not in aspect_sentiments:
709 |                 aspect_sentiments[aspect] = {"scores": [], "count": 0}
710 |             
711 |             aspect_sentiments[aspect]["scores"].append(score)
712 |             aspect_sentiments[aspect]["count"] += 1
713 |         
714 |         # Topics
715 |         for topic in analysis.get("message_characteristics", {}).get("key_topics", []):
716 |             if topic not in topics:
717 |                 topics[topic] = 0
718 |             topics[topic] += 1
719 |         
720 |         # Entity mentions
721 |         for entity_type, entities in analysis.get("entity_extraction", {}).items():
722 |             if entity_type in mentioned_entities and isinstance(entities, list):
723 |                 for entity in entities:
724 |                     if entity not in mentioned_entities[entity_type]:
725 |                         mentioned_entities[entity_type][entity] = 0
726 |                     mentioned_entities[entity_type][entity] += 1
727 |     
728 |     # Calculate distributions as percentages
729 |     total_sentiments = sum(sentiment_counts.values())
730 |     sentiment_distribution = {
731 |         k: round(v / total_sentiments, 2) if total_sentiments else 0 
732 |         for k, v in sentiment_counts.items()
733 |     }
734 |     
735 |     # Calculate average metrics
736 |     average_metrics = {}
737 |     if sentiment_scores:
738 |         average_metrics["sentiment_score"] = sum(sentiment_scores) / len(sentiment_scores)
739 |     
740 |     if satisfaction_scores:
741 |         average_metrics["satisfaction_score"] = sum(satisfaction_scores) / len(satisfaction_scores)
742 |     
743 |     if loyalty_indicators:
744 |         average_metrics["loyalty_indicators"] = sum(loyalty_indicators) / len(loyalty_indicators)
745 |     
746 |     # Process aspect sentiments
747 |     top_aspects = []
748 |     for aspect, data in aspect_sentiments.items():
749 |         avg_sentiment = sum(data["scores"]) / len(data["scores"]) if data["scores"] else 0
750 |         top_aspects.append({
751 |             "name": aspect,
752 |             "avg_sentiment": round(avg_sentiment, 2),
753 |             "mention_count": data["count"]
754 |         })
755 |     
756 |     # Sort aspects by mention count
757 |     top_aspects.sort(key=lambda x: x["mention_count"], reverse=True)
758 |     
759 |     # Process topics
760 |     key_topics = [{"topic": k, "mention_count": v} for k, v in topics.items()]
761 |     key_topics.sort(key=lambda x: x["mention_count"], reverse=True)
762 |     
763 |     # Build aggregated insights
764 |     aggregate_insights = {
765 |         "sentiment_distribution": sentiment_distribution,
766 |         "average_metrics": average_metrics,
767 |         "top_aspects": top_aspects[:10],  # Limit to top 10
768 |         "key_topics": key_topics[:10],    # Limit to top 10
769 |         "entity_mention_frequencies": mentioned_entities
770 |     }
771 |     
772 |     return aggregate_insights
```

--------------------------------------------------------------------------------
/ultimate_mcp_server/core/tournaments/tasks.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Tournament task implementations for asynchronous tournament execution.
  3 | """
  4 | # Standard Library Imports
  5 | import asyncio
  6 | import random
  7 | import re
  8 | import time
  9 | from datetime import datetime, timezone
 10 | from pathlib import Path
 11 | from typing import Any, Dict, Optional
 12 | 
 13 | from ultimate_mcp_server.core.evaluation.base import EvaluationScore
 14 | from ultimate_mcp_server.core.models.tournament import (
 15 |     ModelConfig,
 16 |     ModelResponseData,
 17 |     TournamentData,
 18 |     TournamentRoundResult,
 19 |     TournamentStatus,
 20 | )
 21 | from ultimate_mcp_server.core.tournaments.manager import tournament_manager
 22 | from ultimate_mcp_server.core.tournaments.utils import (
 23 |     calculate_weighted_score,
 24 |     create_round_prompt,
 25 |     extract_thinking,
 26 |     generate_comparison_file_content,
 27 |     generate_leaderboard_file_content,
 28 |     save_model_response_content,
 29 |     update_overall_best_response,
 30 | )
 31 | from ultimate_mcp_server.tools.completion import generate_completion
 32 | from ultimate_mcp_server.tools.extraction import extract_code_from_response
 33 | from ultimate_mcp_server.utils.logging import get_logger
 34 | 
 35 | logger = get_logger("ultimate_mcp_server.tournaments.tasks")
 36 | 
 37 | # --- Global semaphore for concurrent model calls ---
 38 | MODEL_CALL_SEMAPHORE: Optional[asyncio.Semaphore] = None
 39 | 
 40 | def initialize_semaphore(max_concurrent_calls: int):
 41 |     global MODEL_CALL_SEMAPHORE
 42 |     MODEL_CALL_SEMAPHORE = asyncio.Semaphore(max_concurrent_calls)
 43 |     logger.info(f"Tournament task semaphore initialized with concurrency: {max_concurrent_calls}")
 44 | 
 45 | async def run_tournament_async(tournament_id: str):
 46 |     """Main async task to orchestrate the entire tournament."""
 47 |     await asyncio.sleep(0.1) # Small delay for state propagation
 48 |     
 49 |     tournament = tournament_manager.get_tournament(tournament_id, force_reload=True)
 50 |     if not tournament:
 51 |         logger.error(f"[TASK_ERROR] Tournament {tournament_id} not found for execution.")
 52 |         return
 53 | 
 54 |     if tournament.status != TournamentStatus.RUNNING: # Check if it was set to RUNNING
 55 |         logger.warning(f"[TASK_WARN] Tournament {tournament_id} not in RUNNING state ({tournament.status}). Aborting task.")
 56 |         return
 57 | 
 58 |     # --- Initialize semaphore based on tournament config ---
 59 |     if MODEL_CALL_SEMAPHORE is None or MODEL_CALL_SEMAPHORE._value != tournament.config.max_concurrent_model_calls:
 60 |         initialize_semaphore(tournament.config.max_concurrent_model_calls)
 61 | 
 62 |     logger.info(f"[TASK_START] Starting execution for tournament '{tournament.name}' (ID: {tournament_id})")
 63 | 
 64 |     try:
 65 |         if tournament.current_round < 0: # If just started
 66 |             tournament.current_round = 0 
 67 |         
 68 |         while tournament.current_round < tournament.config.rounds:
 69 |             # --- Check for cancellation before starting a round ---
 70 |             current_tournament_state = tournament_manager.get_tournament(tournament_id, force_reload=True)
 71 |             if not current_tournament_state or current_tournament_state.status == TournamentStatus.CANCELLED:
 72 |                 logger.info(f"[TASK_CANCEL] Tournament {tournament_id} cancelled. Halting execution.")
 73 |                 if current_tournament_state and current_tournament_state.status != TournamentStatus.CANCELLED: # Ensure it's marked
 74 |                      tournament_manager.update_tournament_status(tournament_id, TournamentStatus.CANCELLED, "Cancelled during execution.")
 75 |                 return
 76 | 
 77 |             round_num = tournament.current_round
 78 |             logger.info(f"[ROUND_START] Processing Round {round_num}/{tournament.config.rounds -1 } for '{tournament.name}'")
 79 |             
 80 |             round_result_obj = tournament.rounds_results[round_num] # Assumes initialized by manager
 81 |             round_result_obj.status = TournamentStatus.RUNNING
 82 |             round_result_obj.start_time = datetime.now(timezone.utc)
 83 |             tournament_manager._save_tournament_state(tournament)
 84 | 
 85 |             await process_single_round(tournament, round_num, round_result_obj)
 86 | 
 87 |             round_result_obj.status = TournamentStatus.COMPLETED # Mark round as completed
 88 |             round_result_obj.end_time = datetime.now(timezone.utc)
 89 |             tournament_manager._save_tournament_state(tournament)
 90 |             logger.info(f"[ROUND_END] Round {round_num} for '{tournament.name}' completed.")
 91 | 
 92 |             # --- Update overall best response after each round ---
 93 |             update_overall_best_response(tournament) # Utility function to find and set best
 94 |             tournament_manager._save_tournament_state(tournament)
 95 | 
 96 |             tournament.current_round += 1
 97 |             tournament_manager._save_tournament_state(tournament) # Save progress
 98 | 
 99 |         tournament.status = TournamentStatus.COMPLETED
100 |         logger.info(f"[TASK_COMPLETE] Tournament '{tournament.name}' (ID: {tournament_id}) completed successfully.")
101 | 
102 |     except Exception as e:
103 |         logger.error(f"[TASK_FAILURE] Tournament '{tournament.name}' failed: {e}", exc_info=True)
104 |         tournament.status = TournamentStatus.FAILED
105 |         tournament.error_message = str(e)
106 |     finally:
107 |         tournament.end_time = datetime.now(timezone.utc)
108 |         tournament_manager._save_tournament_state(tournament)
109 |         logger.info(f"Final state saved for tournament {tournament_id}. Status: {tournament.status}")
110 | 
111 | 
112 | async def process_single_round(tournament: TournamentData, round_num: int, round_result_obj: TournamentRoundResult):
113 |     """Processes all model variants for a single round."""
114 |     
115 |     # Determine previous responses for synthesis rounds > 0
116 |     previous_round_variant_responses: Dict[str, ModelResponseData] = {}
117 |     if round_num > 0:
118 |         prev_round_idx = round_num - 1
119 |         if prev_round_idx < len(tournament.rounds_results):
120 |             previous_round_result = tournament.rounds_results[prev_round_idx]
121 |             previous_round_variant_responses = previous_round_result.responses # These are already ModelResponseData objects
122 |         else:
123 |             logger.warning(f"Could not find previous round {prev_round_idx} data for round {round_num}. Proceeding without it.")
124 |     
125 |     tasks = []
126 |     for model_cfg in tournament.config.models:
127 |         for i in range(model_cfg.diversity_count):
128 |             variant_id = f"{model_cfg.model_id}/v{i}"
129 |             
130 |             # --- Check for cancellation before each model task ---
131 |             current_tournament_state = tournament_manager.get_tournament(tournament.tournament_id, force_reload=True)
132 |             if not current_tournament_state or current_tournament_state.status == TournamentStatus.CANCELLED:
133 |                 logger.info(f"[MODEL_TASK_CANCEL] Cancellation detected for tournament {tournament.tournament_id}. Skipping variant {variant_id}.")
134 |                 continue # Skip remaining tasks in this round
135 | 
136 |             # Skip if already processed (e.g., resuming a failed round)
137 |             if variant_id in round_result_obj.responses and round_result_obj.responses[variant_id].response_text:
138 |                 logger.info(f"Variant {variant_id} for round {round_num} already processed. Skipping.")
139 |                 continue
140 | 
141 |             tasks.append(
142 |                 process_single_model_variant(
143 |                     tournament,
144 |                     model_cfg,
145 |                     variant_id, # Pass the unique variant ID
146 |                     round_num,
147 |                     round_result_obj,
148 |                     previous_round_variant_responses # Pass full ModelResponseData dict
149 |                 )
150 |             )
151 |     
152 |     if not tasks:
153 |         logger.info(f"No new model variants to process for round {round_num}.")
154 |         round_result_obj.status = TournamentStatus.COMPLETED
155 |         return
156 | 
157 |     logger.info(f"Gathering {len(tasks)} model variant tasks for round {round_num}.")
158 |     # Await all tasks and catch any unhandled exceptions
159 |     try:
160 |         await asyncio.gather(*tasks)
161 |     except Exception as e:
162 |         logger.error(f"An error occurred during asyncio.gather in round {round_num}: {e}", exc_info=True)
163 |         round_result_obj.error_message = (getattr(round_result_obj, 'error_message', '') or "") + f"; Error during task gathering: {str(e)}"
164 |         round_result_obj.status = TournamentStatus.FAILED # Mark round as failed if gather fails
165 |         # Individual task errors are handled within process_single_model_variant
166 |         return
167 | 
168 |     # --- Generate comparison and leaderboard files ---
169 |     # (Ensure these utils exist and are updated)
170 |     comparison_md = generate_comparison_file_content(tournament, round_num)
171 |     leaderboard_md = generate_leaderboard_file_content(tournament, round_num) # New utility
172 | 
173 |     round_storage_path = Path(tournament.storage_path) / f"round_{round_num}"
174 |     round_storage_path.mkdir(parents=True, exist_ok=True)
175 | 
176 |     if comparison_md:
177 |         comp_path = round_storage_path / "round_comparison_report.md"
178 |         comp_path.write_text(comparison_md, encoding='utf-8')
179 |         round_result_obj.comparison_file_path = str(comp_path)
180 |     if leaderboard_md:
181 |         lead_path = round_storage_path / "round_leaderboard.md"
182 |         lead_path.write_text(leaderboard_md, encoding='utf-8')
183 |         round_result_obj.leaderboard_file_path = str(lead_path)
184 |     
185 |     # Save state after generating reports
186 |     tournament_manager._save_tournament_state(tournament)
187 | 
188 | 
189 | async def process_single_model_variant(
190 |     tournament: TournamentData,
191 |     model_config: "ModelConfig", # Forward ref as string, ModelConfig is imported
192 |     variant_id: str, # e.g., "openai/gpt-4o/v0"
193 |     round_num: int,
194 |     round_result_obj: TournamentRoundResult,
195 |     previous_round_variant_responses: Dict[str, ModelResponseData]
196 | ):
197 |     """Processes a single model variant (handles diversity), including retries and evaluation."""
198 |     
199 |     # --- Acquire semaphore ---
200 |     if MODEL_CALL_SEMAPHORE: # Should always be initialized
201 |       await MODEL_CALL_SEMAPHORE.acquire()
202 |     
203 |     response_data = ModelResponseData(
204 |         model_id_original=model_config.model_id,
205 |         model_id_variant=variant_id,
206 |         round_num=round_num
207 |     )
208 |     task_start_time = time.monotonic()
209 |     
210 |     # --- Prepare storage paths if needed (handled in save_model_response_content) ---
211 |     
212 |     try:
213 |         # --- Check for cancellation ---
214 |         current_tournament_state = tournament_manager.get_tournament(tournament.tournament_id, force_reload=True)
215 |         if not current_tournament_state or current_tournament_state.status == TournamentStatus.CANCELLED:
216 |             response_data.error = "Tournament cancelled before model execution."
217 |             logger.info(f"Model task {variant_id} skipped due to tournament cancellation.")
218 |             raise asyncio.CancelledError("Tournament cancelled")
219 | 
220 | 
221 |         prompt = create_round_prompt(
222 |             tournament, 
223 |             round_num, 
224 |             previous_round_variant_responses,
225 |             target_model_variant_id=variant_id # For personalized prompts if needed
226 |         )
227 | 
228 |         # --- LLM Call with Retries ---
229 |         current_attempt = 0
230 |         llm_response_dict = None
231 |         while current_attempt <= tournament.config.max_retries_per_model_call:
232 |             try:
233 |                 logger.info(f"[MODEL_CALL_START] Attempt {current_attempt+1}/{tournament.config.max_retries_per_model_call+1} for {variant_id}, Round {round_num}")
234 |                 
235 |                 provider_id = model_config.model_id.split('/')[0] if '/' in model_config.model_id else None
236 |                 
237 |                 # Parameters that are direct arguments to the generate_completion tool
238 |                 tool_direct_params = {
239 |                     "prompt": prompt,
240 |                     "model": model_config.model_id, # Use original model_id for API call
241 |                     "provider": provider_id,
242 |                     "temperature": model_config.temperature,
243 |                     # max_tokens is added conditionally below
244 |                 }
245 |                 if model_config.max_tokens is not None:
246 |                     tool_direct_params["max_tokens"] = model_config.max_tokens
247 | 
248 |                 # Parameters that should be passed via the 'additional_params' argument of the tool
249 |                 tool_additional_params = {}
250 |                 if model_config.system_prompt is not None:
251 |                     tool_additional_params["system_prompt"] = model_config.system_prompt
252 |                 if model_config.seed is not None:
253 |                     tool_additional_params["seed"] = model_config.seed
254 |                 # Example: if model_config had top_p, it would be added here too:
255 |                 # if hasattr(model_config, 'top_p') and model_config.top_p is not None:
256 |                 #    tool_additional_params["top_p"] = model_config.top_p
257 | 
258 |                 llm_response_dict = await generate_completion(
259 |                     **tool_direct_params,
260 |                     additional_params=tool_additional_params
261 |                 )
262 |                 
263 |                 if llm_response_dict.get("success"):
264 |                     logger.info(f"[MODEL_CALL_SUCCESS] {variant_id} successful on attempt {current_attempt+1}")
265 |                     break # Success, exit retry loop
266 |                 else:
267 |                     error_msg = llm_response_dict.get("error", "Unknown LLM error")
268 |                     logger.warning(f"Attempt {current_attempt+1} for {variant_id} failed: {error_msg}")
269 |                     if current_attempt == tournament.config.max_retries_per_model_call:
270 |                         raise RuntimeError(f"LLM call failed after max retries: {error_msg}")
271 |             
272 |             except Exception as e: # Catch exceptions from generate_completion itself
273 |                 logger.warning(f"Exception on attempt {current_attempt+1} for {variant_id}: {e}")
274 |                 if current_attempt == tournament.config.max_retries_per_model_call:
275 |                     raise RuntimeError(f"LLM call failed after max retries (exception): {e}") from e
276 |             
277 |             current_attempt += 1
278 |             # Decorrelated jitter backoff
279 |             sleep_time = random.uniform(
280 |                 tournament.config.retry_backoff_base_seconds, 
281 |                 tournament.config.retry_backoff_base_seconds * 1.5 * (2 ** (current_attempt -1))
282 |             )
283 |             # Max sleep to prevent overly long waits
284 |             sleep_time = min(sleep_time, 30.0) # e.g., max 30s backoff
285 |             logger.info(f"Retrying {variant_id} in {sleep_time:.2f} seconds...")
286 |             await asyncio.sleep(sleep_time)
287 |         
288 |         # --- Process Successful LLM Response ---
289 |         response_data.response_text = llm_response_dict.get("text", "")
290 |         response_data.metrics.update({
291 |             "input_tokens": llm_response_dict.get("tokens", {}).get("input"),
292 |             "output_tokens": llm_response_dict.get("tokens", {}).get("output"),
293 |             "cost": llm_response_dict.get("cost", 0.0),
294 |             "latency_ms": int(llm_response_dict.get("processing_time", 0) * 1000),
295 |             "api_model_id_used": llm_response_dict.get("model", model_config.model_id)
296 |         })
297 | 
298 |         response_data.thinking_process = await extract_thinking(response_data.response_text)
299 |         
300 |         if tournament.config.tournament_type == "code":
301 |             # Use the tool function for extraction
302 |             extracted_code_string = await extract_code_from_response(
303 |                 response_text=response_data.response_text,
304 |                 model=tournament.config.extraction_model_id # Pass extraction_model_id as the model for extraction
305 |                 # timeout parameter uses its default from extract_code_from_response
306 |             )
307 |             if extracted_code_string: # Check if a non-empty string was returned
308 |                  response_data.extracted_code = extracted_code_string.strip()
309 |             else:
310 |                  logger.warning(f"Code extraction returned empty or failed for {variant_id}. Original response length: {len(response_data.response_text or '')}")
311 |                  response_data.extracted_code = None # Explicitly set to None on failure or empty string
312 | 
313 |         # --- Save response content ---
314 |         # (This util saves the main readable MD and potentially the raw code file)
315 |         saved_paths = await save_model_response_content(
316 |             tournament_storage_path=Path(tournament.storage_path),
317 |             round_num=round_num,
318 |             variant_id=variant_id, # Use variant_id for unique filenames
319 |             response_text=response_data.response_text,
320 |             extracted_code=response_data.extracted_code,
321 |             thinking_process=response_data.thinking_process,
322 |             metrics=response_data.metrics,
323 |             tournament_type=tournament.config.tournament_type
324 |         )
325 |         response_data.response_file_path = saved_paths.get("markdown_file")
326 |         response_data.extracted_code_file_path = saved_paths.get("code_file")
327 | 
328 |         # --- Run Evaluations ---
329 |         evaluators = tournament_manager.get_evaluators_for_tournament(tournament.tournament_id)
330 |         if evaluators:
331 |             logger.info(f"Running {len(evaluators)} evaluators for {variant_id}...")
332 |             for evaluator_instance in evaluators:
333 |                 eval_config = next((e for e in tournament.config.evaluators if e.evaluator_id == evaluator_instance.config.get("evaluator_id_ref", evaluator_instance.evaluator_type)), None) # Find original config for ID
334 | 
335 |                 eval_id_for_scores = eval_config.evaluator_id if eval_config else evaluator_instance.evaluator_type
336 | 
337 |                 try:
338 |                     eval_score_obj = await evaluator_instance.score(
339 |                         response_data, # Pass the full ModelResponseData
340 |                         tournament.config.prompt,
341 |                         tournament.config.tournament_type
342 |                     )
343 |                     response_data.scores[eval_id_for_scores] = eval_score_obj.model_dump() # Store full score object
344 |                     logger.debug(f"Evaluator '{eval_id_for_scores}' score for {variant_id}: {eval_score_obj.score}")
345 |                 except Exception as eval_e:
346 |                     logger.error(f"Evaluator '{eval_id_for_scores}' failed for {variant_id}: {eval_e}", exc_info=True)
347 |                     response_data.scores[eval_id_for_scores] = EvaluationScore(score=0.0, details=f"Evaluation error: {str(eval_e)}").model_dump()
348 |             
349 |             # Calculate overall weighted score
350 |             response_data.overall_score = calculate_weighted_score(response_data.scores, tournament.config.evaluators)
351 | 
352 | 
353 |     except asyncio.CancelledError: # Handle task cancellation gracefully
354 |         logger.info(f"Task for {variant_id} in round {round_num} was cancelled.")
355 |         response_data.error = "Task cancelled."
356 |         response_data.metrics["final_status"] = "cancelled"
357 |     except Exception as e:
358 |         logger.error(f"[MODEL_TASK_FAILURE] Error processing {variant_id}: {e}", exc_info=True)
359 |         response_data.error = str(e)
360 |         response_data.metrics["final_status"] = "failed"
361 |     finally:
362 |         response_data.metrics["total_task_time_ms"] = int((time.monotonic() - task_start_time) * 1000)
363 |         # --- Add response to the round_result_obj (which is part of tournament state) ---
364 |         # This needs to be thread-safe if multiple tasks could update this concurrently,
365 |         # but asyncio tasks run on a single thread, so direct assignment is fine here.
366 |         # The `tournament` object itself is shared, so saving it needs care.
367 |         round_result_obj.responses[variant_id] = response_data
368 |         
369 |         # Defer saving the full tournament state to the calling round processor
370 |         # to batch saves, but log that this variant is done.
371 |         logger.info(f"Finished processing variant {variant_id}. Error: {response_data.error is not None}")
372 |         
373 |         # --- Release semaphore ---
374 |         if MODEL_CALL_SEMAPHORE:
375 |           MODEL_CALL_SEMAPHORE.release()
376 | 
377 | async def process_single_model(
378 |     model_id: str,
379 |     prompt: str,
380 |     tournament_id: str,
381 |     round_num: int,
382 |     is_code_tournament: bool,
383 |     extraction_model_id: Optional[str] = None
384 | ) -> ModelResponseData:
385 |     """
386 |     Handles the logic for calling a single model provider using the generate_completion tool.
387 |     """
388 |     start_time = time.monotonic()
389 |     logger.info(f"[MODEL TASK] Processing model {model_id} for round {round_num}")
390 |     
391 |     # Get tournament to access storage path
392 |     tournament = tournament_manager.get_tournament(tournament_id)
393 |     if not tournament:
394 |         raise ValueError(f"Tournament {tournament_id} not found")
395 |     
396 |     # Setup storage paths
397 |     round_storage_path = Path(tournament.storage_path) / f"round_{round_num}"
398 |     round_storage_path.mkdir(exist_ok=True, parents=True)
399 |     
400 |     response_data = ModelResponseData(
401 |         model_id_original=model_id,
402 |         model_id_variant=model_id, # In this context, variant is the same as original
403 |         round_num=round_num
404 |     )
405 |     extracted_code: Optional[str] = None  # noqa: F841
406 |     file_extension = ".py" if is_code_tournament else ".md"
407 |     
408 |     provider_name = model_id.split('/')[0] if '/' in model_id else None # Infer provider from model_id if possible
409 |     if not provider_name:
410 |         logger.warning(f"[MODEL TASK] Could not infer provider from model_id: {model_id}. Attempting call without explicit provider.")
411 |         # Note: generate_completion might fail if provider isn't specified and cannot be inferred
412 | 
413 |     try:
414 |         # Use generate_completion tool
415 |         logger.info(f"[MODEL TASK] Calling generate_completion for model {model_id} with prompt length {len(prompt)}")
416 |         # Log prompt preview
417 |         preview_length = 100
418 |         prompt_preview = prompt[:preview_length] + "..." if len(prompt) > preview_length else prompt
419 |         logger.info(f"[MODEL TASK] Prompt preview: {prompt_preview}")
420 | 
421 |         # Call the tool function directly
422 |         completion_result_dict = await generate_completion(
423 |             prompt=prompt,
424 |             model=model_id, # Pass the full model ID
425 |             provider=provider_name # Pass inferred provider
426 |             # Add other params like max_tokens, temperature if needed/available in TournamentConfig
427 |         )
428 |         
429 |         # Check for success
430 |         if not completion_result_dict.get("success"):
431 |             error_msg = completion_result_dict.get("error", "generate_completion tool indicated failure")
432 |             raise RuntimeError(f"Completion failed for {model_id}: {error_msg}")
433 | 
434 |         # Extract data from the dictionary returned by the tool
435 |         response_text = completion_result_dict.get("text", "")
436 |         actual_model_used = completion_result_dict.get("model", model_id) # Use actual model if returned
437 |         token_info = completion_result_dict.get("tokens", {})
438 |         cost = completion_result_dict.get("cost", 0.0)
439 |         processing_time_sec = completion_result_dict.get("processing_time", 0.0)
440 |         latency_ms = int(processing_time_sec * 1000)
441 | 
442 |         # Log response preview
443 |         response_preview = response_text[:preview_length] + "..." if len(response_text) > preview_length else response_text
444 |         logger.info(f"[MODEL TASK] Response preview for {actual_model_used}: {response_preview}")
445 | 
446 |         # Extract metrics from the tool result
447 |         completion_metrics = {
448 |             "input_tokens": token_info.get("input"),
449 |             "output_tokens": token_info.get("output"),
450 |             "cost": cost,
451 |             "latency_ms": latency_ms, # Use processing_time from tool
452 |             "api_model_id_used": actual_model_used # Store the actual model ID used by the API
453 |         }
454 | 
455 |         # Process response - use async extract_thinking
456 |         thinking = await extract_thinking(response_text)
457 |         code_metrics = {} # Placeholder for potential future code analysis metrics
458 | 
459 |         # Save response to file with better naming pattern
460 |         timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
461 |         safe_model_id = re.sub(r'[^a-zA-Z0-9_\-.]', '_', actual_model_used) # Use actual model name
462 |         safe_tournament_id = re.sub(r'[^a-zA-Z0-9_\-.]', '_', tournament_id)
463 | 
464 |         filename_base = f"tournament_{safe_tournament_id}_round-{round_num}_model-{safe_model_id}_{timestamp}"
465 |         raw_response_path = round_storage_path / f"{filename_base}{file_extension}"
466 | 
467 |         raw_response_path.write_text(response_text or "", encoding="utf-8")
468 | 
469 |         # Create a more user-friendly version with added context
470 |         readable_content = f"""# Tournament Response
471 | **Tournament ID:** {tournament_id}
472 | **Round:** {round_num}
473 | **Model (Configured):** {model_id}
474 | **Model (Actual API):** {actual_model_used}
475 | **Timestamp:** {datetime.now().isoformat()}
476 | **Tokens:** {completion_metrics.get('input_tokens', 'N/A')} in, {completion_metrics.get('output_tokens', 'N/A')} out
477 | **Cost:** ${completion_metrics.get('cost', 0.0):.6f}
478 | **Latency:** {completion_metrics.get('latency_ms', 'N/A')}ms
479 | 
480 | ## Prompt
481 | ```
482 | {prompt}
483 | ```
484 | 
485 | ## Response
486 | ```
487 | {response_text}
488 | ```
489 | """
490 |         readable_path = round_storage_path / f"{filename_base}_readable{file_extension}"
491 |         readable_path.write_text(readable_content, encoding="utf-8")
492 | 
493 |         logger.info(f"[MODEL TASK] Saved response to: {readable_path}")
494 | 
495 |         # Populate response data
496 |         # model_id_original and model_id_variant are already set
497 |         response_data.response_text = response_text
498 |         response_data.thinking_process = thinking
499 |         response_data.metrics = {**completion_metrics, **code_metrics}
500 |         response_data.timestamp = datetime.now(timezone.utc)
501 |         response_data.response_file_path = str(raw_response_path) # Store path to raw response
502 |         response_data.metrics["total_processing_time_ms"] = int((time.monotonic() - start_time) * 1000) # Keep overall task time
503 | 
504 |         logger.info(f"[MODEL TASK] Finished processing model {actual_model_used} for round {round_num} in {response_data.metrics['total_processing_time_ms']}ms")
505 | 
506 |     except Exception as e:
507 |         logger.error(f"[MODEL TASK] Error processing model {model_id}: {e}", exc_info=True)
508 |         response_data.error = str(e)
509 |     
510 |     return response_data
511 | 
512 | async def run_single_round_task(tournament_id: str, round_num: int):
513 |     """
514 |     Task that runs a single round of the tournament, including LLM calls.
515 |     """
516 |     logger.info(f"[ROUND TASK START] Running round {round_num} for tournament {tournament_id}")
517 |     tournament = tournament_manager.get_tournament(tournament_id, force_reload=True)
518 |     
519 |     # --- Check if tournament exists or was cancelled before proceeding --- 
520 |     if not tournament:
521 |         logger.error(f"[ROUND TASK FAIL] Tournament {tournament_id} not found at start of round {round_num}.")
522 |         return
523 |     if tournament.status == TournamentStatus.CANCELLED:
524 |         logger.info(f"[ROUND TASK ABORT] Tournament {tournament_id} was cancelled. Stopping round {round_num}.")
525 |         # Ensure round status reflects cancellation if it was running
526 |         if round_num < len(tournament.rounds_results):
527 |              round_result = tournament.rounds_results[round_num]
528 |              if round_result.status == TournamentStatus.RUNNING:
529 |                   round_result.status = TournamentStatus.CANCELLED
530 |                   round_result.error = "Cancelled by user request during execution."
531 |                   round_result.end_time = datetime.now(timezone.utc)
532 |                   tournament_manager._save_tournament_state(tournament)
533 |         return
534 |     # -------------------------------------------------------------------
535 |     
536 |     if round_num >= len(tournament.rounds_results):
537 |         logger.error(f"[ROUND TASK FAIL] Invalid round number {round_num} for tournament {tournament_id} state.")
538 |         return
539 |     
540 |     round_result = tournament.rounds_results[round_num]
541 |     
542 |     try:
543 |         # Mark round as running
544 |         round_result.status = TournamentStatus.RUNNING
545 |         round_result.start_time = datetime.now(timezone.utc)
546 |         tournament_manager._save_tournament_state(tournament)
547 |         logger.info(f"[ROUND TASK] Round {round_num} marked as running")
548 |         
549 |         # Get tournament config
550 |         is_code_tournament = tournament.config.tournament_type == "code"
551 |         extraction_model_id = tournament.config.extraction_model_id
552 |         
553 |         # Create prompt for this round
554 |         prompt = create_round_prompt(tournament, round_num)
555 |         
556 |         # Create tasks for all configured models
557 |         model_tasks = []
558 |         for model_config in tournament.config.models:
559 |             model_id = model_config.model_id
560 |             
561 |             # Skip if already processed
562 |             if model_id in round_result.responses:
563 |                 logger.info(f"[ROUND TASK] Skipping already processed model {model_id}")
564 |                 continue
565 |             
566 |             # Add task for this model
567 |             task = process_single_model(
568 |                 model_id=model_id,
569 |                 prompt=prompt,
570 |                 tournament_id=tournament_id,
571 |                 round_num=round_num,
572 |                 is_code_tournament=is_code_tournament,
573 |                 extraction_model_id=extraction_model_id
574 |             )
575 |             model_tasks.append(task)
576 |             logger.info(f"[ROUND TASK] Added task for model {model_id}")
577 |         
578 |         # Exit if no tasks to run
579 |         if not model_tasks:
580 |             logger.info(f"[ROUND TASK] No models to process for round {round_num}")
581 |             round_result.status = TournamentStatus.COMPLETED
582 |             round_result.end_time = datetime.now(timezone.utc)
583 |             tournament_manager._save_tournament_state(tournament)
584 |             return
585 |         
586 |         # Run all model tasks in parallel
587 |         logger.info(f"[ROUND TASK] Running {len(model_tasks)} model tasks in parallel")
588 |         results = await asyncio.gather(*model_tasks, return_exceptions=True)
589 |         
590 |         # Process results
591 |         for i, result in enumerate(results):
592 |             model_id = tournament.config.models[i].model_id
593 |             
594 |             # Handle exceptions
595 |             if isinstance(result, Exception):
596 |                 logger.error(f"[ROUND TASK] Error processing model {model_id}: {result}", exc_info=True)
597 |                 continue
598 |             
599 |             # Store result
600 |             round_result.responses[model_id] = result
601 |             tournament_manager._save_tournament_state(tournament)
602 |         
603 |         # Create comparison file
604 |         comparison_content = generate_comparison_file_content(tournament, round_num)
605 |         if comparison_content:
606 |             round_dir = Path(tournament.storage_path) / f"round_{round_num}"
607 |             round_dir.mkdir(exist_ok=True)
608 |             comparison_file = round_dir / "model_comparison.md"
609 |             
610 |             with open(comparison_file, 'w', encoding='utf-8') as f:
611 |                 f.write(comparison_content)
612 |             
613 |             # Store the path in round results
614 |             round_result.comparison_file_path = str(comparison_file)
615 |             tournament_manager._save_tournament_state(tournament)
616 |         
617 |         # Mark round as completed
618 |         round_result.status = TournamentStatus.COMPLETED
619 |         round_result.end_time = datetime.now(timezone.utc)
620 |         tournament_manager._save_tournament_state(tournament)
621 |         logger.info(f"[ROUND TASK COMPLETE] Round {round_num} for tournament {tournament_id} completed successfully")
622 |         
623 |         # If this was the last round, mark the tournament as completed
624 |         if round_num == tournament.config.rounds - 1:
625 |             tournament.status = TournamentStatus.COMPLETED
626 |             tournament.end_time = datetime.now(timezone.utc)
627 |             tournament_manager._save_tournament_state(tournament)
628 |             logger.info(f"[ROUND TASK] Tournament {tournament_id} marked as completed after final round")
629 |     
630 |     except Exception as e:
631 |         logger.error(f"[ROUND TASK ERROR] Error processing round {round_num}: {e}", exc_info=True)
632 |         round_result.status = TournamentStatus.FAILED
633 |         round_result.error = str(e)
634 |         round_result.end_time = datetime.now(timezone.utc)
635 |         tournament_manager._save_tournament_state(tournament)
636 |         
637 |         # Mark tournament as failed
638 |         tournament.status = TournamentStatus.FAILED
639 |         tournament.error_message = f"Failed during round {round_num}: {str(e)}"
640 |         tournament.end_time = datetime.now(timezone.utc)
641 |         tournament_manager._save_tournament_state(tournament) 
642 | 
643 | async def process_model_task(
644 |     tournament: TournamentData,
645 |     model_id: str,
646 |     round_num: int,
647 |     previous_round_responses: Optional[Dict[str, str]] = None
648 | ) -> Dict[str, Any]:
649 |     """Process a single model task for the tournament using generate_completion tool.
650 |     
651 |     Args:
652 |         tournament: Tournament data
653 |         model_id: Model to use (e.g., 'openai/gpt-4o')
654 |         round_num: Current round number
655 |         previous_round_responses: Previous round responses (for rounds > 0)
656 |         
657 |     Returns:
658 |         Model task result with response text and metrics
659 |     """
660 |     start_task_time = time.monotonic()
661 |     # Infer provider from model_id format 'provider:model_name' or 'provider/model_name'
662 |     provider_id = None
663 |     if ':' in model_id:
664 |         provider_id = model_id.split(':')[0]
665 |     elif '/' in model_id: # Keep backward compatibility if '/' is used
666 |         provider_id = model_id.split('/')[0]
667 |         
668 |     if not provider_id:
669 |          logger.warning(f"[MODEL TASK] Could not infer provider from model_id: {model_id}. Attempting call without explicit provider.")
670 |     
671 |     try:
672 |         logger.info(f"[MODEL TASK] Processing model {model_id} for round {round_num} (Provider: {provider_id})")
673 |             
674 |         # Generate prompt based on tournament type and round
675 |         if round_num == 0:
676 |             prompt = tournament.config.prompt
677 |         else:
678 |             prompt = create_round_prompt(tournament, round_num, previous_round_responses)
679 |         
680 |         # Generate completion using the tool
681 |         logger.info(f"[MODEL TASK] Calling generate_completion for model {model_id} with prompt length {len(prompt)}")
682 |         preview_length = 100
683 |         prompt_preview = prompt[:preview_length] + "..." if len(prompt) > preview_length else prompt
684 |         logger.info(f"[MODEL TASK] Prompt preview: {prompt_preview}")
685 | 
686 |         completion_result_dict = await generate_completion(
687 |             prompt=prompt,
688 |             model=model_id,
689 |             provider=provider_id # Pass the inferred provider
690 |             # Add other params like max_tokens, temperature if needed/available
691 |         )
692 | 
693 |         # Check for success
694 |         if not completion_result_dict.get("success"):
695 |             error_msg = completion_result_dict.get("error", "generate_completion tool indicated failure")
696 |             raise RuntimeError(f"Completion failed for {model_id}: {error_msg}")
697 | 
698 |         # Extract data from the result dictionary
699 |         response_text = completion_result_dict.get("text", "")
700 |         actual_model_used = completion_result_dict.get("model", model_id)
701 |         token_info = completion_result_dict.get("tokens", {})
702 |         cost = completion_result_dict.get("cost", 0.0)
703 |         processing_time_sec = completion_result_dict.get("processing_time", 0.0)
704 | 
705 |         # Log response preview
706 |         response_preview = response_text[:preview_length] + "..." if len(response_text) > preview_length else response_text
707 |         logger.info(f"[MODEL TASK] Response preview for {actual_model_used}: {response_preview}")
708 | 
709 |         # Extract metrics from the tool result
710 |         completion_metrics = {
711 |             "input_tokens": token_info.get("input"),
712 |             "output_tokens": token_info.get("output"),
713 |             "cost": cost,
714 |             "processing_time_ms": int(processing_time_sec * 1000) # Use tool's processing time
715 |         }
716 |         
717 |         # Extract thinking/reasoning if present - use async extract_thinking
718 |         thinking = await extract_thinking(response_text)
719 |         
720 |         # Save response to a file with timestamp - use async save_model_response
721 |         response_file = await save_model_response_content(
722 |             tournament_storage_path=Path(tournament.storage_path),
723 |             round_num=round_num,
724 |             variant_id=model_id, # Use model_id for unique filenames
725 |             response_text=response_text,
726 |             extracted_code=None, # No extracted code for this task
727 |             thinking_process=thinking,
728 |             metrics=completion_metrics,
729 |             tournament_type=tournament.config.tournament_type
730 |         )
731 |         
732 |         total_task_time_ms = int((time.monotonic() - start_task_time) * 1000)
733 |         completion_metrics["total_task_time_ms"] = total_task_time_ms # Add overall task time
734 | 
735 |         logger.info(f"[MODEL TASK] Finished processing model {actual_model_used} for round {round_num} in {total_task_time_ms}ms (LLM time: {completion_metrics['processing_time_ms']}ms)")
736 |         
737 |         return {
738 |             "model_id": actual_model_used, # Return actual model used
739 |             "response_text": response_text,
740 |             "thinking": thinking,
741 |             "metrics": completion_metrics,
742 |             "response_file": str(response_file.get("markdown_file")) if isinstance(response_file, dict) else str(response_file) # Ensure path is string
743 |         }
744 |     except Exception as e:
745 |         logger.error(f"[MODEL TASK] Error processing model {model_id}: {str(e)}", exc_info=True)
746 |         total_task_time_ms = int((time.monotonic() - start_task_time) * 1000)
747 |         return {
748 |             "model_id": model_id,
749 |             "error": str(e),
750 |             "response_text": f"Error generating response: {str(e)}",
751 |             "thinking": None,
752 |             "metrics": {
753 |                 "error": str(e), 
754 |                 "total_task_time_ms": total_task_time_ms,
755 |                 "processing_time_ms": None # LLM call failed
756 |             },
757 |             "response_file": None
758 |         } 
```