This is page 13 of 45. Use http://codebase.md/dicklesworthstone/llm_gateway_mcp_server?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .cursorignore
├── .env.example
├── .envrc
├── .gitignore
├── additional_features.md
├── check_api_keys.py
├── completion_support.py
├── comprehensive_test.py
├── docker-compose.yml
├── Dockerfile
├── empirically_measured_model_speeds.json
├── error_handling.py
├── example_structured_tool.py
├── examples
│ ├── __init__.py
│ ├── advanced_agent_flows_using_unified_memory_system_demo.py
│ ├── advanced_extraction_demo.py
│ ├── advanced_unified_memory_system_demo.py
│ ├── advanced_vector_search_demo.py
│ ├── analytics_reporting_demo.py
│ ├── audio_transcription_demo.py
│ ├── basic_completion_demo.py
│ ├── cache_demo.py
│ ├── claude_integration_demo.py
│ ├── compare_synthesize_demo.py
│ ├── cost_optimization.py
│ ├── data
│ │ ├── sample_event.txt
│ │ ├── Steve_Jobs_Introducing_The_iPhone_compressed.md
│ │ └── Steve_Jobs_Introducing_The_iPhone_compressed.mp3
│ ├── docstring_refiner_demo.py
│ ├── document_conversion_and_processing_demo.py
│ ├── entity_relation_graph_demo.py
│ ├── filesystem_operations_demo.py
│ ├── grok_integration_demo.py
│ ├── local_text_tools_demo.py
│ ├── marqo_fused_search_demo.py
│ ├── measure_model_speeds.py
│ ├── meta_api_demo.py
│ ├── multi_provider_demo.py
│ ├── ollama_integration_demo.py
│ ├── prompt_templates_demo.py
│ ├── python_sandbox_demo.py
│ ├── rag_example.py
│ ├── research_workflow_demo.py
│ ├── sample
│ │ ├── article.txt
│ │ ├── backprop_paper.pdf
│ │ ├── buffett.pdf
│ │ ├── contract_link.txt
│ │ ├── legal_contract.txt
│ │ ├── medical_case.txt
│ │ ├── northwind.db
│ │ ├── research_paper.txt
│ │ ├── sample_data.json
│ │ └── text_classification_samples
│ │ ├── email_classification.txt
│ │ ├── news_samples.txt
│ │ ├── product_reviews.txt
│ │ └── support_tickets.txt
│ ├── sample_docs
│ │ └── downloaded
│ │ └── attention_is_all_you_need.pdf
│ ├── sentiment_analysis_demo.py
│ ├── simple_completion_demo.py
│ ├── single_shot_synthesis_demo.py
│ ├── smart_browser_demo.py
│ ├── sql_database_demo.py
│ ├── sse_client_demo.py
│ ├── test_code_extraction.py
│ ├── test_content_detection.py
│ ├── test_ollama.py
│ ├── text_classification_demo.py
│ ├── text_redline_demo.py
│ ├── tool_composition_examples.py
│ ├── tournament_code_demo.py
│ ├── tournament_text_demo.py
│ ├── unified_memory_system_demo.py
│ ├── vector_search_demo.py
│ ├── web_automation_instruction_packs.py
│ └── workflow_delegation_demo.py
├── LICENSE
├── list_models.py
├── marqo_index_config.json.example
├── mcp_protocol_schema_2025-03-25_version.json
├── mcp_python_lib_docs.md
├── mcp_tool_context_estimator.py
├── model_preferences.py
├── pyproject.toml
├── quick_test.py
├── README.md
├── resource_annotations.py
├── run_all_demo_scripts_and_check_for_errors.py
├── storage
│ └── smart_browser_internal
│ ├── locator_cache.db
│ ├── readability.js
│ └── storage_state.enc
├── test_client.py
├── test_connection.py
├── TEST_README.md
├── test_sse_client.py
├── test_stdio_client.py
├── tests
│ ├── __init__.py
│ ├── conftest.py
│ ├── integration
│ │ ├── __init__.py
│ │ └── test_server.py
│ ├── manual
│ │ ├── test_extraction_advanced.py
│ │ └── test_extraction.py
│ └── unit
│ ├── __init__.py
│ ├── test_cache.py
│ ├── test_providers.py
│ └── test_tools.py
├── TODO.md
├── tool_annotations.py
├── tools_list.json
├── ultimate_mcp_banner.webp
├── ultimate_mcp_logo.webp
├── ultimate_mcp_server
│ ├── __init__.py
│ ├── __main__.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── commands.py
│ │ ├── helpers.py
│ │ └── typer_cli.py
│ ├── clients
│ │ ├── __init__.py
│ │ ├── completion_client.py
│ │ └── rag_client.py
│ ├── config
│ │ └── examples
│ │ └── filesystem_config.yaml
│ ├── config.py
│ ├── constants.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── evaluation
│ │ │ ├── base.py
│ │ │ └── evaluators.py
│ │ ├── providers
│ │ │ ├── __init__.py
│ │ │ ├── anthropic.py
│ │ │ ├── base.py
│ │ │ ├── deepseek.py
│ │ │ ├── gemini.py
│ │ │ ├── grok.py
│ │ │ ├── ollama.py
│ │ │ ├── openai.py
│ │ │ └── openrouter.py
│ │ ├── server.py
│ │ ├── state_store.py
│ │ ├── tournaments
│ │ │ ├── manager.py
│ │ │ ├── tasks.py
│ │ │ └── utils.py
│ │ └── ums_api
│ │ ├── __init__.py
│ │ ├── ums_database.py
│ │ ├── ums_endpoints.py
│ │ ├── ums_models.py
│ │ └── ums_services.py
│ ├── exceptions.py
│ ├── graceful_shutdown.py
│ ├── services
│ │ ├── __init__.py
│ │ ├── analytics
│ │ │ ├── __init__.py
│ │ │ ├── metrics.py
│ │ │ └── reporting.py
│ │ ├── cache
│ │ │ ├── __init__.py
│ │ │ ├── cache_service.py
│ │ │ ├── persistence.py
│ │ │ ├── strategies.py
│ │ │ └── utils.py
│ │ ├── cache.py
│ │ ├── document.py
│ │ ├── knowledge_base
│ │ │ ├── __init__.py
│ │ │ ├── feedback.py
│ │ │ ├── manager.py
│ │ │ ├── rag_engine.py
│ │ │ ├── retriever.py
│ │ │ └── utils.py
│ │ ├── prompts
│ │ │ ├── __init__.py
│ │ │ ├── repository.py
│ │ │ └── templates.py
│ │ ├── prompts.py
│ │ └── vector
│ │ ├── __init__.py
│ │ ├── embeddings.py
│ │ └── vector_service.py
│ ├── tool_token_counter.py
│ ├── tools
│ │ ├── __init__.py
│ │ ├── audio_transcription.py
│ │ ├── base.py
│ │ ├── completion.py
│ │ ├── docstring_refiner.py
│ │ ├── document_conversion_and_processing.py
│ │ ├── enhanced-ums-lookbook.html
│ │ ├── entity_relation_graph.py
│ │ ├── excel_spreadsheet_automation.py
│ │ ├── extraction.py
│ │ ├── filesystem.py
│ │ ├── html_to_markdown.py
│ │ ├── local_text_tools.py
│ │ ├── marqo_fused_search.py
│ │ ├── meta_api_tool.py
│ │ ├── ocr_tools.py
│ │ ├── optimization.py
│ │ ├── provider.py
│ │ ├── pyodide_boot_template.html
│ │ ├── python_sandbox.py
│ │ ├── rag.py
│ │ ├── redline-compiled.css
│ │ ├── sentiment_analysis.py
│ │ ├── single_shot_synthesis.py
│ │ ├── smart_browser.py
│ │ ├── sql_databases.py
│ │ ├── text_classification.py
│ │ ├── text_redline_tools.py
│ │ ├── tournament.py
│ │ ├── ums_explorer.html
│ │ └── unified_memory_system.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── async_utils.py
│ │ ├── display.py
│ │ ├── logging
│ │ │ ├── __init__.py
│ │ │ ├── console.py
│ │ │ ├── emojis.py
│ │ │ ├── formatter.py
│ │ │ ├── logger.py
│ │ │ ├── panels.py
│ │ │ ├── progress.py
│ │ │ └── themes.py
│ │ ├── parse_yaml.py
│ │ ├── parsing.py
│ │ ├── security.py
│ │ └── text.py
│ └── working_memory_api.py
├── unified_memory_system_technical_analysis.md
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/ultimate_mcp_server/cli/commands.py:
--------------------------------------------------------------------------------
```python
1 | """Command implementations for the Ultimate MCP Server CLI."""
2 | import asyncio
3 | import inspect
4 | import os
5 | import sys
6 | import time
7 | from typing import List, Optional
8 |
9 | from rich.console import Console
10 | from rich.progress import (
11 | BarColumn,
12 | Progress,
13 | SpinnerColumn,
14 | TaskProgressColumn,
15 | TextColumn,
16 | TimeElapsedColumn,
17 | )
18 | from rich.rule import Rule
19 | from rich.table import Table
20 |
21 | from ultimate_mcp_server.config import get_config
22 | from ultimate_mcp_server.constants import BASE_TOOLSET_CATEGORIES, Provider
23 | from ultimate_mcp_server.core.providers.base import get_provider
24 | from ultimate_mcp_server.core.server import Gateway, start_server
25 | from ultimate_mcp_server.graceful_shutdown import enable_quiet_shutdown
26 | from ultimate_mcp_server.services.cache import get_cache_service
27 | from ultimate_mcp_server.utils import get_logger
28 |
29 | logger = get_logger(__name__)
30 | console = Console(file=sys.stderr) # Use stderr to avoid interfering with MCP protocol
31 |
32 |
33 | def run_server(
34 | host: Optional[str] = None,
35 | port: Optional[int] = None,
36 | workers: Optional[int] = None,
37 | log_level: Optional[str] = None,
38 | transport_mode: str = "sse",
39 | include_tools: Optional[List[str]] = None,
40 | exclude_tools: Optional[List[str]] = None,
41 | load_all_tools: bool = False,
42 | ) -> None:
43 | """Start the Ultimate MCP Server with specified configuration.
44 |
45 | This function initializes and launches the MCP-compliant server that provides
46 | an API for AI agents to use various tools and capabilities. The server uses
47 | FastAPI under the hood and can be customized through configuration overrides.
48 | Server settings from command-line arguments take precedence over .env and config file values.
49 |
50 | Args:
51 | host: Network interface to bind to (e.g., '127.0.0.1' for local, '0.0.0.0' for all)
52 | port: TCP port to listen on (default: 8013 from config)
53 | workers: Number of parallel worker processes (more workers = higher concurrency)
54 | log_level: Logging verbosity (debug, info, warning, error, critical)
55 | transport_mode: Communication protocol mode ('sse' for Server-Sent Events streaming
56 | or 'stdio' for standard input/output in certain environments)
57 | include_tools: Allowlist of specific tool names to register (if not specified, all
58 | available tools are registered)
59 | exclude_tools: Blocklist of specific tool names to exclude (takes precedence over
60 | include_tools if both are specified)
61 | load_all_tools: If True, load all available tools. If False (default), load only
62 | the Base Toolset (completion, filesystem, optimization, provider,
63 | local_text, meta, search).
64 | """
65 | # Set up graceful shutdown handling
66 | enable_quiet_shutdown()
67 |
68 | # Get the current config
69 | cfg = get_config()
70 |
71 | # Override config with provided values
72 | if host:
73 | cfg.server.host = host
74 | if port:
75 | cfg.server.port = port
76 | if workers:
77 | cfg.server.workers = workers
78 |
79 | # Update tool registration config
80 | if include_tools or exclude_tools:
81 | cfg.tool_registration.filter_enabled = True
82 |
83 | if include_tools:
84 | cfg.tool_registration.included_tools = include_tools
85 |
86 | if exclude_tools:
87 | cfg.tool_registration.excluded_tools = exclude_tools
88 |
89 | # Determine effective log level
90 | effective_log_level = log_level or getattr(cfg, 'log_level', 'info')
91 |
92 | # Print server info
93 | console.print("[bold blue]Starting Ultimate MCP Server server[/bold blue]")
94 | console.print(f"Host: [cyan]{cfg.server.host}[/cyan]")
95 | console.print(f"Port: [cyan]{cfg.server.port}[/cyan]")
96 | console.print(f"Workers: [cyan]{cfg.server.workers}[/cyan]")
97 | console.print(f"Log level: [cyan]{effective_log_level.upper()}[/cyan]")
98 | console.print(f"Transport mode: [cyan]{transport_mode}[/cyan]")
99 |
100 | # Tool Loading Status
101 | if load_all_tools:
102 | console.print("Tool Loading: [yellow]All Available Tools[/yellow]")
103 | else:
104 | console.print("Tool Loading: [yellow]Base Toolset Only[/yellow] (Use --load-all-tools to load all)")
105 | # Format the categories for display
106 | console.print(" [bold]Includes:[/bold]")
107 | for category, tools in BASE_TOOLSET_CATEGORIES.items():
108 | console.print(f" [cyan]{category}[/cyan]: {', '.join(tools)}")
109 |
110 | # Print tool filtering info if enabled
111 | if cfg.tool_registration.filter_enabled:
112 | if cfg.tool_registration.included_tools:
113 | console.print(f"Including tools: [cyan]{', '.join(cfg.tool_registration.included_tools)}[/cyan]")
114 | if cfg.tool_registration.excluded_tools:
115 | console.print(f"Excluding tools: [red]{', '.join(cfg.tool_registration.excluded_tools)}[/red]")
116 |
117 | console.print()
118 |
119 | # Set environment variables for the tool context estimator to use
120 | os.environ["MCP_SERVER_HOST"] = cfg.server.host
121 | os.environ["MCP_SERVER_PORT"] = str(cfg.server.port)
122 |
123 | # Just run the server directly - no threading, no token analysis
124 | start_server(
125 | host=cfg.server.host,
126 | port=cfg.server.port,
127 | workers=cfg.server.workers,
128 | log_level=effective_log_level,
129 | transport_mode=transport_mode,
130 | include_tools=cfg.tool_registration.included_tools if cfg.tool_registration.filter_enabled else None,
131 | exclude_tools=cfg.tool_registration.excluded_tools if cfg.tool_registration.filter_enabled else None,
132 | load_all_tools=load_all_tools,
133 | )
134 |
135 |
136 | async def list_providers(check_keys: bool = False, list_models: bool = False) -> None:
137 | """Display information about configured LLM providers and their status.
138 |
139 | This function queries all supported LLM providers (OpenAI, Anthropic, Gemini, etc.),
140 | displays their configuration status, and can optionally verify API key validity
141 | and list available models. This is useful for diagnosing configuration issues
142 | or exploring what models are accessible through each provider.
143 |
144 | The basic output shows provider names, enabled status, API key presence,
145 | and default model configuration. Additional information is available through
146 | the optional parameters.
147 |
148 | Args:
149 | check_keys: When True, tests each configured API key against the respective
150 | provider's authentication endpoint to verify it's valid and active
151 | list_models: When True, queries each provider's model list endpoint and displays
152 | all models available for use with your current credentials
153 | """
154 | # Get the current config
155 | cfg = get_config()
156 |
157 | # Create provider table
158 | table = Table(title="Available LLM Providers")
159 | table.add_column("Provider", style="cyan")
160 | table.add_column("Enabled", style="green")
161 | table.add_column("API Key", style="yellow")
162 | table.add_column("Default Model", style="blue")
163 |
164 | # Add spinner during provider initialization
165 | with Progress(
166 | SpinnerColumn(),
167 | TextColumn("[bold blue]Initializing providers...[/bold blue]"),
168 | transient=True
169 | ) as progress:
170 | progress.add_task("init", total=None)
171 |
172 | # Create Gateway instance (initializes all providers)
173 | gateway = Gateway()
174 | while not hasattr(gateway, 'provider_status') or not gateway.provider_status:
175 | await asyncio.sleep(0.1)
176 |
177 | # Get provider status
178 | provider_status = gateway.provider_status
179 |
180 | # Add rows to table
181 | for provider_name in [p.value for p in Provider]:
182 | status = provider_status.get(provider_name, None)
183 |
184 | if status:
185 | enabled = "✅" if status.enabled else "❌"
186 | api_key = "✅" if status.api_key_configured else "❌"
187 |
188 | # Get default model
189 | provider_cfg = getattr(cfg, 'providers', {}).get(provider_name, None)
190 | default_model = provider_cfg.default_model if provider_cfg else "N/A"
191 |
192 | table.add_row(provider_name, enabled, api_key, default_model)
193 |
194 | # Print table
195 | console.print(table)
196 |
197 | # Check API keys if requested
198 | if check_keys:
199 | console.print("\n[bold]API Key Check:[/bold]")
200 |
201 | with Progress(
202 | SpinnerColumn(),
203 | TextColumn("[bold blue]Checking API keys...[/bold blue]"),
204 | transient=True
205 | ) as progress:
206 | progress.add_task("check", total=None)
207 |
208 | for provider_name in [p.value for p in Provider]:
209 | console.print(Rule(f"[bold cyan]{provider_name}[/bold cyan]", align="center"))
210 | status = provider_status.get(provider_name, None)
211 |
212 | if status and status.api_key_configured:
213 | try:
214 | # Get provider instance
215 | provider = get_provider(provider_name)
216 |
217 | # Check API key
218 | valid = await provider.check_api_key()
219 |
220 | status_text = "[green]valid[/green]" if valid else "[red]invalid[/red]"
221 | console.print(f"API Key Status: {status_text}")
222 |
223 | except Exception as e:
224 | console.print(f"[red]Error checking API key: {str(e)}[/red]")
225 | else:
226 | if status:
227 | console.print("API Key Status: [yellow]not configured[/yellow]")
228 | else:
229 | console.print("API Key Status: [dim]Provider not configured/enabled[/dim]")
230 | console.print() # Add spacing after each provider's check
231 |
232 | # List models if requested
233 | if list_models:
234 | console.print("\n[bold]Available Models:[/bold]")
235 |
236 | with Progress(
237 | SpinnerColumn(),
238 | TextColumn("[bold blue]Loading models...[/bold blue]"),
239 | transient=True
240 | ) as progress:
241 | progress.add_task("load", total=None)
242 |
243 | for provider_name in [p.value for p in Provider]:
244 | status = provider_status.get(provider_name, None)
245 |
246 | if status and status.available:
247 | provider_instance = gateway.providers.get(provider_name)
248 |
249 | if provider_instance:
250 | console.print(Rule(f"[bold cyan]{provider_name} Models[/bold cyan]", align="center"))
251 | try:
252 | # Get models
253 | models = await provider_instance.list_models()
254 |
255 | # Create model table
256 | model_table = Table(box=None, show_header=True, header_style="bold blue")
257 | model_table.add_column("Model ID", style="cyan")
258 | model_table.add_column("Description", style="green")
259 |
260 | if models:
261 | for model in models:
262 | model_table.add_row(
263 | model["id"],
264 | model.get("description", "[dim]N/A[/dim]")
265 | )
266 | console.print(model_table)
267 | else:
268 | console.print("[yellow]No models found or returned by provider.[/yellow]")
269 |
270 | except Exception as e:
271 | console.print(f"[red]Error listing models for {provider_name}: {str(e)}[/red]")
272 | else:
273 | if status:
274 | console.print(Rule(f"[bold dim]{provider_name}[/bold dim]", align="center"))
275 | console.print("[yellow]Provider not available or configured.[/yellow]")
276 | console.print() # Add spacing after each provider's models
277 |
278 |
279 | async def test_provider(provider: str, model: Optional[str] = None, prompt: str = "Hello, world!") -> None:
280 | """Test an LLM provider's functionality with a complete request-response cycle.
281 |
282 | This function conducts an end-to-end test of a specified LLM provider by
283 | sending a sample prompt and displaying the resulting completion. It measures
284 | performance metrics such as response time, token usage, and estimated cost.
285 |
286 | The test verifies:
287 | - API key validity and configuration
288 | - Connection to the provider's endpoint
289 | - Model availability and functionality
290 | - Response generation capabilities
291 | - Cost estimation correctness
292 |
293 | Results include the generated text, model used, token counts (input/output),
294 | estimated cost, and response time, providing a comprehensive health check.
295 |
296 | Args:
297 | provider: Provider identifier (e.g., 'openai', 'anthropic', 'gemini')
298 | model: Specific model to use for the test (if None, uses the provider's default model)
299 | prompt: Text prompt to send to the model (defaults to a simple greeting)
300 | """
301 | console.print(f"[bold]Testing provider:[/bold] [cyan]{provider}[/cyan]")
302 | console.print(f"[bold]Model:[/bold] [cyan]{model or 'default'}[/cyan]")
303 | console.print(f'[bold]Prompt:[/bold] [green]"{prompt}"[/green]')
304 | console.print()
305 |
306 | try:
307 | # Get provider instance
308 | provider_instance = get_provider(provider)
309 |
310 | # Initialize provider
311 | await provider_instance.initialize()
312 |
313 | # Show spinner during generation
314 | with Progress(
315 | SpinnerColumn(),
316 | TextColumn("[bold blue]Generating completion...[/bold blue]"),
317 | transient=True
318 | ) as progress:
319 | progress.add_task("generate", total=None)
320 |
321 | # Generate completion
322 | start_time = time.time()
323 | result = await provider_instance.generate_completion(
324 | prompt=prompt,
325 | model=model,
326 | temperature=0.7
327 | )
328 | elapsed_time = time.time() - start_time
329 |
330 | # Print result
331 | console.print("[bold cyan]Generated text:[/bold cyan]")
332 | console.print(result.text)
333 | console.print()
334 |
335 | # Print metrics
336 | console.print(f"[bold]Model used:[/bold] [cyan]{result.model}[/cyan]")
337 | console.print(f"[bold]Tokens:[/bold] [yellow]{result.input_tokens}[/yellow] input, [yellow]{result.output_tokens}[/yellow] output, [yellow]{result.total_tokens}[/yellow] total")
338 | console.print(f"[bold]Cost:[/bold] [green]${result.cost:.6f}[/green]")
339 | console.print(f"[bold]Time:[/bold] [blue]{elapsed_time:.2f}s[/blue]")
340 |
341 | except Exception as e:
342 | console.print(f"[bold red]Error testing provider:[/bold red] {str(e)}")
343 |
344 |
345 | async def generate_completion(
346 | provider: str,
347 | model: Optional[str] = None,
348 | prompt: str = "",
349 | temperature: float = 0.7,
350 | max_tokens: Optional[int] = None,
351 | system: Optional[str] = None,
352 | stream: bool = False
353 | ) -> None:
354 | """Generate text from an LLM provider directly through the CLI.
355 |
356 | This function provides direct access to LLM text generation capabilities
357 | without requiring an MCP server or client. It supports both synchronous
358 | (wait for full response) and streaming (token-by-token) output modes.
359 |
360 | For providers that natively support system prompts (like Anthropic),
361 | the system parameter is passed directly to the API. For other providers,
362 | the system message is prepended to the prompt with appropriate formatting.
363 |
364 | The function displays:
365 | - Generated text (with real-time streaming if requested)
366 | - Model information
367 | - Token usage statistics (input/output)
368 | - Cost estimate
369 | - Response time
370 |
371 | Args:
372 | provider: LLM provider identifier (e.g., 'openai', 'anthropic', 'gemini')
373 | model: Specific model ID to use (if None, uses provider's default model)
374 | prompt: The text prompt to send to the model
375 | temperature: Sampling temperature (0.0-2.0) controlling output randomness
376 | (lower = more deterministic, higher = more creative)
377 | max_tokens: Maximum number of tokens to generate in the response
378 | (if None, uses provider's default maximum)
379 | system: Optional system message for setting context/behavior
380 | (handled differently depending on provider capabilities)
381 | stream: When True, displays generated tokens as they arrive rather than
382 | waiting for the complete response (may affect metric reporting)
383 | """
384 | try:
385 | # Get provider instance
386 | provider_instance = get_provider(provider)
387 |
388 | # Initialize provider
389 | await provider_instance.initialize()
390 |
391 | # Set extra parameters based on provider
392 | kwargs = {}
393 | if system:
394 | if provider == Provider.ANTHROPIC.value:
395 | kwargs["system"] = system
396 | else:
397 | # For other providers, prepend system message to prompt
398 | prompt = f"System: {system}\n\nUser: {prompt}"
399 |
400 | # Show progress for non-streaming generation
401 | if not stream:
402 | with Progress(
403 | SpinnerColumn(),
404 | TextColumn("[bold blue]Generating completion...[/bold blue]"),
405 | transient=True
406 | ) as progress:
407 | progress.add_task("generate", total=None)
408 |
409 | # Generate completion
410 | start_time = time.time()
411 | result = await provider_instance.generate_completion(
412 | prompt=prompt,
413 | model=model,
414 | temperature=temperature,
415 | max_tokens=max_tokens,
416 | **kwargs
417 | )
418 | elapsed_time = time.time() - start_time
419 |
420 | # Print result
421 | console.print(f"[cyan]{result.text}[/cyan]")
422 | console.print()
423 |
424 | # Print metrics
425 | console.print(f"[bold]Model:[/bold] [blue]{result.model}[/blue]")
426 | console.print(f"[bold]Tokens:[/bold] [yellow]{result.input_tokens}[/yellow] input, [yellow]{result.output_tokens}[/yellow] output")
427 | console.print(f"[bold]Cost:[/bold] [green]${result.cost:.6f}[/green]")
428 | console.print(f"[bold]Time:[/bold] [blue]{elapsed_time:.2f}s[/blue]")
429 |
430 | else:
431 | # Streaming generation
432 | console.print("[bold blue]Generating completion (streaming)...[/bold blue]")
433 |
434 | # Generate streaming completion
435 | start_time = time.time()
436 | stream = provider_instance.generate_completion_stream(
437 | prompt=prompt,
438 | model=model,
439 | temperature=temperature,
440 | max_tokens=max_tokens,
441 | **kwargs
442 | )
443 |
444 | # Process stream
445 | full_text = ""
446 | async for chunk, metadata in stream: # noqa: B007
447 | console.print(chunk, end="")
448 | sys.stderr.flush() # Use stderr instead of stdout
449 | full_text += chunk
450 |
451 | elapsed_time = time.time() - start_time
452 |
453 | # Print metrics
454 | console.print("\n")
455 | console.print(f"[bold]Model:[/bold] [blue]{metadata.get('model', model or 'unknown')}[/blue]")
456 | console.print(f"[bold]Time:[/bold] [blue]{elapsed_time:.2f}s[/blue]")
457 |
458 | except Exception as e:
459 | console.print(f"[bold red]Error generating completion:[/bold red] {str(e)}")
460 |
461 |
462 | async def check_cache(show_status: bool = True, clear: bool = False) -> None:
463 | """View cache statistics and optionally clear the LLM response cache.
464 |
465 | The server employs a caching system that stores LLM responses to avoid
466 | redundant API calls when the same or similar requests are made repeatedly.
467 | This significantly reduces API costs and improves response times.
468 |
469 | This function displays comprehensive cache information including:
470 | - Basic configuration (enabled status, TTL, max size)
471 | - Storage details (persistence, directory location)
472 | - Performance metrics (hit/miss counts, hit ratio percentage)
473 | - Efficiency data (estimated cost savings, total tokens saved)
474 |
475 | Clearing the cache removes all stored responses, which might be useful
476 | when testing changes, addressing potential staleness, or reclaiming disk space.
477 |
478 | Args:
479 | show_status: When True, displays detailed cache statistics and configuration
480 | clear: When True, purges all entries from the cache (requires confirmation)
481 | """
482 | # Get cache service
483 | cache_service = get_cache_service()
484 |
485 | if clear:
486 | # Clear cache
487 | console.print("[bold]Clearing cache...[/bold]")
488 | cache_service.clear()
489 | console.print("[green]Cache cleared successfully[/green]")
490 |
491 | if show_status:
492 | # Get cache stats
493 | stats = cache_service.get_stats()
494 |
495 | # Create status table
496 | table = Table(title="Cache Status")
497 | table.add_column("Setting", style="cyan")
498 | table.add_column("Value", style="green")
499 |
500 | # Add rows
501 | table.add_row("Enabled", "✅" if stats["enabled"] else "❌")
502 | table.add_row("Size", f"{stats['size']} / {stats['max_size']} entries")
503 | table.add_row("TTL", f"{stats['ttl']} seconds")
504 | table.add_row("Persistence", "✅" if stats["persistence"]["enabled"] else "❌")
505 | table.add_row("Cache Directory", stats["persistence"]["cache_dir"])
506 | table.add_row("Fuzzy Matching", "✅" if stats["fuzzy_matching"] else "❌")
507 |
508 | # Print table
509 | console.print(table)
510 |
511 | # Create stats table
512 | stats_table = Table(title="Cache Statistics")
513 | stats_table.add_column("Metric", style="cyan")
514 | stats_table.add_column("Value", style="green")
515 |
516 | # Add rows
517 | cache_stats = stats["stats"]
518 | stats_table.add_row("Hits", str(cache_stats["hits"]))
519 | stats_table.add_row("Misses", str(cache_stats["misses"]))
520 | stats_table.add_row("Hit Ratio", f"{cache_stats['hit_ratio']:.2%}")
521 | stats_table.add_row("Stores", str(cache_stats["stores"]))
522 | stats_table.add_row("Evictions", str(cache_stats["evictions"]))
523 | stats_table.add_row("Total Saved Tokens", f"{cache_stats['total_saved_tokens']:,}")
524 | stats_table.add_row("Estimated Cost Savings", f"${cache_stats['estimated_cost_savings']:.6f}")
525 |
526 | # Print table
527 | console.print(stats_table)
528 |
529 |
530 | async def benchmark_providers(
531 | providers: List[str] = None,
532 | models: List[str] = None,
533 | prompt: Optional[str] = None,
534 | runs: int = 3
535 | ) -> None:
536 | """Compare performance metrics across different LLM providers and models.
537 |
538 | This benchmark utility sends identical prompts to multiple provider/model
539 | combinations and measures various performance characteristics. It runs
540 | each test multiple times to establish average metrics for more reliable
541 | comparison.
542 |
543 | Benchmarks measure:
544 | - Response time: How long it takes to receive a complete response
545 | - Processing speed: Tokens per second throughput
546 | - Token efficiency: Input/output token ratio
547 | - Cost: Estimated price per request
548 |
549 | Results are presented in a table format for easy comparison. This helps
550 | identify the optimal provider/model for specific needs based on speed,
551 | cost, or quality considerations.
552 |
553 | Args:
554 | providers: List of provider identifiers to benchmark (e.g., ['openai', 'anthropic'])
555 | If None, benchmarks all available and configured providers
556 | models: List of specific model IDs to test (e.g., ['gpt-4o', 'claude-3-5-haiku'])
557 | If None, uses each provider's default model
558 | prompt: Text prompt to use for testing (should be identical across all providers)
559 | If None, uses a default explanation prompt
560 | runs: Number of test iterations to run for each provider/model combination
561 | (higher values produce more reliable averages but take longer)
562 | """
563 | # Use default providers if not specified
564 | if not providers:
565 | providers = [p.value for p in Provider]
566 |
567 | # Set default prompt if not provided
568 | if not prompt:
569 | prompt = "Explain the concept of quantum computing in simple terms that a high school student would understand."
570 |
571 | console.print(f"[bold]Running benchmark with {runs} runs per provider/model[/bold]")
572 | console.print(f'[bold]Prompt:[/bold] [green]"{prompt}"[/green]')
573 | console.print()
574 |
575 | # Create results table
576 | table = Table(title="Benchmark Results")
577 | table.add_column("Provider", style="cyan")
578 | table.add_column("Model", style="blue")
579 | table.add_column("Avg. Time (s)", style="green")
580 | table.add_column("Tokens/Sec", style="yellow")
581 | table.add_column("Avg. Cost ($)", style="magenta")
582 | table.add_column("Input Tokens", style="dim")
583 | table.add_column("Output Tokens", style="dim")
584 |
585 | # Track benchmarks for progress bar
586 | total_benchmarks = 0
587 | for provider_name in providers:
588 | try:
589 | provider_instance = get_provider(provider_name)
590 | await provider_instance.initialize()
591 |
592 | # Get available models
593 | available_models = await provider_instance.list_models()
594 |
595 | # Filter models if specified
596 | if models:
597 | available_models = [m for m in available_models if m["id"] in models]
598 | else:
599 | # Use default model if no models specified
600 | default_model = provider_instance.get_default_model()
601 | available_models = [m for m in available_models if m["id"] == default_model]
602 |
603 | total_benchmarks += len(available_models)
604 |
605 | except Exception:
606 | # Skip providers that can't be initialized
607 | pass
608 |
609 | # Run benchmarks with progress bar
610 | with Progress(
611 | TextColumn("[progress.description]{task.description}"),
612 | BarColumn(),
613 | TaskProgressColumn(),
614 | TimeElapsedColumn()
615 | ) as progress:
616 | benchmark_task = progress.add_task("[bold blue]Running benchmarks...", total=total_benchmarks * runs)
617 |
618 | for provider_name in providers:
619 | try:
620 | # Get provider instance
621 | provider_instance = get_provider(provider_name)
622 | await provider_instance.initialize()
623 |
624 | # Get available models
625 | available_models = await provider_instance.list_models()
626 |
627 | # Filter models if specified
628 | if models:
629 | available_models = [m for m in available_models if m["id"] in models]
630 | else:
631 | # Use default model if no models specified
632 | default_model = provider_instance.get_default_model()
633 | available_models = [m for m in available_models if m["id"] == default_model]
634 |
635 | for model_info in available_models:
636 | model_id = model_info["id"]
637 |
638 | # Run benchmark for this model
639 | total_time = 0.0
640 | total_cost = 0.0
641 | total_input_tokens = 0
642 | total_output_tokens = 0
643 | total_tokens = 0
644 |
645 | for run in range(runs):
646 | try:
647 | # Update progress description
648 | progress.update(
649 | benchmark_task,
650 | description=f"[bold blue]Benchmarking {provider_name}/{model_id} (Run {run+1}/{runs})"
651 | )
652 |
653 | # Run benchmark
654 | start_time = time.time()
655 | result = await provider_instance.generate_completion(
656 | prompt=prompt,
657 | model=model_id,
658 | temperature=0.7
659 | )
660 | run_time = time.time() - start_time
661 |
662 | # Record metrics
663 | total_time += run_time
664 | total_cost += result.cost
665 | total_input_tokens += result.input_tokens
666 | total_output_tokens += result.output_tokens
667 | total_tokens += result.total_tokens
668 |
669 | # Update progress
670 | progress.advance(benchmark_task)
671 |
672 | except Exception as e:
673 | console.print(f"[red]Error in run {run+1} for {provider_name}/{model_id}: {str(e)}[/red]")
674 | # Still advance progress
675 | progress.advance(benchmark_task)
676 |
677 | # Calculate averages
678 | avg_time = total_time / max(1, runs)
679 | avg_cost = total_cost / max(1, runs)
680 | avg_input_tokens = total_input_tokens // max(1, runs)
681 | avg_output_tokens = total_output_tokens // max(1, runs)
682 |
683 | # Calculate tokens per second
684 | tokens_per_second = total_tokens / total_time if total_time > 0 else 0
685 |
686 | # Add to results table
687 | table.add_row(
688 | provider_name,
689 | model_id,
690 | f"{avg_time:.2f}",
691 | f"{tokens_per_second:.1f}",
692 | f"{avg_cost:.6f}",
693 | str(avg_input_tokens),
694 | str(avg_output_tokens)
695 | )
696 |
697 | except Exception as e:
698 | console.print(f"[red]Error benchmarking provider {provider_name}: {str(e)}[/red]")
699 |
700 | # Print results
701 | console.print(table)
702 |
703 |
704 | async def list_tools(category: Optional[str] = None) -> None:
705 | """Display all available MCP tools registered in the server.
706 |
707 | This function provides a comprehensive listing of tools that can be called
708 | through the Model Context Protocol (MCP) interface. Each tool represents
709 | a specific capability that AI agents can access, from text generation to
710 | filesystem operations, browser automation, database access, and more.
711 |
712 | Tools are organized into functional categories such as:
713 | - completion: Text generation capabilities
714 | - document: Document processing and analysis
715 | - extraction: Structured data extraction from text
716 | - filesystem: File and directory operations
717 | - browser: Web browsing and automation
718 | - rag: Retrieval-augmented generation
719 | - database: SQL database interactions
720 | - meta: Self-reflection and tool discovery
721 |
722 | The listing includes tool names, categories, and brief descriptions.
723 | The output also provides usage hints for filtering which tools are
724 | enabled when starting the server.
725 |
726 | Args:
727 | category: When specified, only shows tools belonging to the given
728 | category (e.g., 'filesystem', 'document', 'browser')
729 | """
730 | # Import tools module to get the list of available tools
731 | from ultimate_mcp_server.tools import STANDALONE_TOOL_FUNCTIONS
732 |
733 | # Create tools table
734 | table = Table(title="Available Ultimate MCP Server Tools")
735 | table.add_column("Tool Name", style="cyan")
736 | table.add_column("Category", style="green")
737 | table.add_column("Description", style="yellow")
738 |
739 | # Define tool categories
740 | categories = {
741 | "completion": ["generate_completion", "stream_completion", "chat_completion", "multi_completion"],
742 | "provider": ["get_provider_status", "list_models"],
743 | "tournament": ["create_tournament", "get_tournament_status", "list_tournaments", "get_tournament_results", "cancel_tournament"],
744 | "document": ["chunk_document", "summarize_document", "extract_entities", "generate_qa_pairs", "process_document_batch"],
745 | "extraction": ["extract_json", "extract_table", "extract_key_value_pairs", "extract_semantic_schema", "extract_entity_graph", "extract_code_from_response"],
746 | "filesystem": ["read_file", "read_multiple_files", "write_file", "edit_file", "create_directory", "list_directory", "directory_tree", "move_file", "search_files", "get_file_info", "list_allowed_directories"],
747 | "rag": ["create_knowledge_base", "list_knowledge_bases", "delete_knowledge_base", "add_documents", "retrieve_context", "generate_with_rag"],
748 | "meta": ["get_tool_info", "get_llm_instructions", "get_tool_recommendations", "register_api_meta_tools"],
749 | "search": ["marqo_fused_search"],
750 | "ocr": ["extract_text_from_pdf", "process_image_ocr", "enhance_ocr_text", "analyze_pdf_structure", "batch_process_documents"],
751 | "optimization": ["estimate_cost", "compare_models", "recommend_model", "execute_optimized_workflow"],
752 | "database": ["connect_to_database", "disconnect_from_database", "discover_database_schema", "execute_query", "generate_database_documentation", "get_table_details", "find_related_tables", "analyze_column_statistics", "execute_parameterized_query", "create_database_view", "create_database_index", "test_connection", "execute_transaction", "execute_query_with_pagination", "get_database_status"],
753 | "audio": ["transcribe_audio", "extract_audio_transcript_key_points", "chat_with_transcript"],
754 | "browser": ["browser_init", "browser_navigate", "browser_click", "browser_type", "browser_screenshot", "browser_close", "browser_select", "browser_checkbox", "browser_get_text", "browser_get_attributes", "browser_execute_javascript", "browser_wait", "execute_web_workflow", "extract_structured_data_from_pages", "find_and_download_pdfs", "multi_engine_search_summary"],
755 | "classification": ["text_classification"],
756 | }
757 |
758 | # Find category for each tool
759 | tool_categories = {}
760 | for cat_name, tools in categories.items():
761 | for tool in tools:
762 | tool_categories[tool] = cat_name
763 |
764 | # Add rows to table
765 | for tool_func in STANDALONE_TOOL_FUNCTIONS:
766 | if callable(tool_func):
767 | tool_name = getattr(tool_func, "__name__", str(tool_func))
768 | tool_category = tool_categories.get(tool_name, "other")
769 |
770 | # Skip if category filter is provided and doesn't match
771 | if category and category.lower() != tool_category.lower():
772 | continue
773 |
774 | # Get docstring (first line only for description)
775 | docstring = inspect.getdoc(tool_func) or ""
776 | description = docstring.split("\n")[0] if docstring else ""
777 |
778 | table.add_row(tool_name, tool_category, description)
779 |
780 | # Add the special meta tool registrars
781 | if not category or category.lower() in ["meta", "other"]:
782 | if not category or category.lower() == "meta":
783 | table.add_row("register_api_meta_tools", "meta", "Register Meta API tools")
784 |
785 | # Sort table by category and tool name
786 | console.print(table)
787 |
788 | # Print usage hint
789 | console.print("\n[bold]Usage with tool filtering:[/bold]")
790 | console.print("To include only specific tools:")
791 | console.print(" umcp run --include-tools tool1 tool2 tool3")
792 | console.print("\nTo exclude specific tools:")
793 | console.print(" umcp run --exclude-tools tool1 tool2 tool3")
794 | console.print("\nTo include tools by category:")
795 | console.print(" umcp tools --category filesystem # List filesystem tools")
796 | console.print(" umcp run --include-tools read_file write_file edit_file # Include only these filesystem tools")
```
--------------------------------------------------------------------------------
/ultimate_mcp_server/tools/extraction.py:
--------------------------------------------------------------------------------
```python
1 | """Advanced extraction tools for Ultimate MCP Server.
2 |
3 | This module provides tools for extracting structured data (JSON, tables, key-value pairs, code)
4 | from unstructured or semi-structured text using LLMs.
5 | """
6 |
7 | import asyncio
8 | import json
9 | import re # Added for code extraction
10 | import time
11 | from typing import Any, Dict, List, Optional, Tuple
12 |
13 | import jsonschema
14 |
15 | from ultimate_mcp_server.constants import Provider
16 |
17 | # Removed CompletionRequest import as not directly used by standalone functions
18 | from ultimate_mcp_server.exceptions import ProviderError, ToolInputError
19 | from ultimate_mcp_server.tools.base import BaseTool, with_error_handling, with_tool_metrics
20 |
21 | # Import the standardized completion tool
22 | from ultimate_mcp_server.tools.completion import generate_completion
23 | from ultimate_mcp_server.utils import get_logger
24 |
25 | logger = get_logger("ultimate_mcp_server.tools.extraction")
26 |
27 | def _extract_and_parse_json(text: str) -> Tuple[Any, bool, Optional[str]]:
28 | """
29 | Robust utility to extract and parse JSON from text, handling various formats and edge cases.
30 |
31 | Args:
32 | text: The text that may contain JSON.
33 |
34 | Returns:
35 | Tuple of (parsed_data, success_flag, error_message)
36 | """
37 | # Start with a clean slate
38 | text = text.strip()
39 | error_message = None
40 |
41 | # Try a series of increasingly aggressive extraction techniques
42 | extraction_methods = [
43 | # Method 1: Direct parsing if it's already valid JSON
44 | lambda t: json.loads(t),
45 |
46 | # Method 2: Extract JSON using regex for common patterns
47 | lambda t: json.loads(re.search(r'(?s)(?:```(?:json)?\s*)?({[\s\S]*?}|\[[\s\S]*?\])(?:\s*```)?', t).group(1).strip()),
48 |
49 | # Method 3: Remove markdown fences and try again
50 | lambda t: json.loads(re.sub(r'```(?:json)?\s*|\s*```', '', t).strip()),
51 |
52 | # Method 4: Fix common JSON syntax errors and try again
53 | lambda t: json.loads(_fix_common_json_errors(t)),
54 |
55 | # Method 5: Use ast.literal_eval as a fallback for Python literals
56 | lambda t: _safe_literal_eval(t)
57 | ]
58 |
59 | # Try each method in sequence until one works
60 | for i, method in enumerate(extraction_methods):
61 | try:
62 | result = method(text)
63 | logger.debug(f"Successfully parsed JSON using method {i+1}")
64 | return result, True, None
65 | except Exception as e:
66 | # Continue to next method on failure
67 | if i == len(extraction_methods) - 1: # Last method
68 | error_message = f"All JSON parsing methods failed. Last error: {str(e)}"
69 |
70 | return None, False, error_message
71 |
72 | def _fix_common_json_errors(json_str: str) -> str:
73 | """
74 | Fix common JSON syntax errors found in LLM outputs.
75 |
76 | Args:
77 | json_str: The JSON string to fix
78 |
79 | Returns:
80 | Corrected JSON string
81 | """
82 | # Remove any text before the first '{' or '['
83 | json_str = re.sub(r'^.*?([{\[])', r'\1', json_str, flags=re.DOTALL)
84 |
85 | # Remove any text after the last '}' or ']'
86 | json_str = re.sub(r'([}\]])[^}\]]*$', r'\1', json_str, flags=re.DOTALL)
87 |
88 | # Fix missing quotes around keys
89 | json_str = re.sub(r'(\s*)(\w+)(\s*):', r'\1"\2"\3:', json_str)
90 |
91 | # Fix trailing commas in arrays
92 | json_str = re.sub(r',(\s*[\]}])', r'\1', json_str)
93 |
94 | # Fix missing commas between elements
95 | json_str = re.sub(r'(["}\]])(\s*)(["{\[])', r'\1,\2\3', json_str)
96 |
97 | return json_str
98 |
99 | def _safe_literal_eval(text: str) -> Any:
100 | """
101 | Safely evaluate a string containing a Python literal.
102 |
103 | Args:
104 | text: The text containing a Python literal
105 |
106 | Returns:
107 | The evaluated Python object
108 |
109 | Raises:
110 | SyntaxError: If the text cannot be parsed as a Python literal
111 | """
112 | import ast
113 |
114 | # Remove any text before the first '{' or '['
115 | text = re.sub(r'^.*?([{\[])', r'\1', text, flags=re.DOTALL)
116 |
117 | # Remove any text after the last '}' or ']'
118 | text = re.sub(r'([}\]])[^}\]]*$', r'\1', text, flags=re.DOTALL)
119 |
120 | return ast.literal_eval(text)
121 |
122 | @with_tool_metrics
123 | @with_error_handling
124 | async def extract_json(
125 | text: str,
126 | json_schema: Optional[Dict] = None,
127 | provider: str = Provider.OPENAI.value,
128 | model: Optional[str] = None,
129 | validate_output: bool = True
130 | # Removed ctx=None
131 | ) -> Dict[str, Any]:
132 | """Extracts structured data formatted as JSON from within a larger text body.
133 |
134 | Use this tool when the input text contains a JSON object or list (potentially embedded
135 | within other text or markdown code fences) that needs to be isolated and parsed.
136 | Optionally validates the extracted JSON against a provided schema.
137 |
138 | Args:
139 | text: The input text potentially containing an embedded JSON object or list.
140 | json_schema: (Optional) A JSON schema (as a Python dictionary) to validate the extracted
141 | JSON against. If validation fails, the error is included in the result.
142 | provider: The name of the LLM provider (e.g., "openai"). Defaults to "openai".
143 | Providers supporting JSON mode (like OpenAI) are recommended for reliability.
144 | model: The specific model ID (e.g., "openai/gpt-4.1-mini"). Uses provider default if None.
145 | validate_output: (Optional) If True (default) and `json_schema` is provided, validates
146 | the extracted data against the schema.
147 |
148 | Returns:
149 | A dictionary containing the extraction results:
150 | {
151 | "data": { ... } | [ ... ] | null, # The extracted JSON data (or null if extraction/parsing failed).
152 | "validation_result": { # Included if json_schema provided & validate_output=True
153 | "valid": true | false,
154 | "errors": [ "Validation error message..." ] # List of errors if not valid
155 | } | null,
156 | "raw_text": "...", # Included if JSON parsing failed
157 | "model": "provider/model-used",
158 | "provider": "provider-name",
159 | "tokens": { ... },
160 | "cost": 0.000045,
161 | "processing_time": 1.8,
162 | "success": true | false,
163 | "error": "Error message if success is false"
164 | }
165 |
166 | Raises:
167 | ProviderError: If the provider/LLM fails.
168 | ToolError: For other internal errors.
169 | """
170 | start_time = time.time()
171 |
172 | if not text or not isinstance(text, str):
173 | raise ToolInputError("Input 'text' must be a non-empty string.", param_name="text", provided_value=text)
174 |
175 | try:
176 | # Check if there's already valid JSON in the input text
177 | extracted_data, success, error_message = _extract_and_parse_json(text)
178 |
179 | # If we found valid JSON in the input, return it right away
180 | if success:
181 | logger.info("Found and extracted valid JSON directly from input text")
182 | return {
183 | "data": extracted_data,
184 | "validation_result": None, # No validation done for direct extraction
185 | "raw_text": None,
186 | "model": "direct-extraction", # No model used
187 | "provider": "direct-extraction", # No provider used
188 | "tokens": {"input": len(text), "output": 0, "total": len(text)},
189 | "cost": 0.0, # No cost for direct extraction
190 | "processing_time": time.time() - start_time,
191 | "success": True,
192 | "error": None
193 | }
194 |
195 | # Prepare model ID based on provider format
196 | effective_model = model
197 | # Ensure model ID includes provider prefix if not already present
198 | if model and provider not in model:
199 | effective_model = f"{provider}/{model}"
200 |
201 | schema_description = f"The extracted JSON should conform to this JSON schema:\n```json\n{json.dumps(json_schema, indent=2)}\n```\n" if json_schema else ""
202 | # Improved prompt asking the LLM to identify and extract the JSON
203 | prompt = f"Identify and extract the primary JSON object or list embedded within the following text. " \
204 | f"{schema_description}Focus on extracting only the JSON data structure itself, removing any surrounding text or markdown fences. " \
205 | f"Text:\n```\n{text}\n```\nExtracted JSON:"
206 |
207 | # Use JSON mode if supported by the provider (e.g., OpenAI)
208 | additional_params = {}
209 | if provider == Provider.OPENAI.value:
210 | additional_params["response_format"] = {"type": "json_object"}
211 |
212 | # Use the standardized completion tool instead of direct provider call
213 | completion_result = await generate_completion(
214 | prompt=prompt,
215 | model=effective_model,
216 | provider=provider,
217 | temperature=0.0, # Low temp for precise extraction
218 | max_tokens=4000, # Allow for large JSON objects
219 | additional_params=additional_params
220 | )
221 |
222 | # Extract data from the standardized result format
223 | processing_time = completion_result.get("processing_time", time.time() - start_time)
224 | actual_model_used = completion_result.get("model", effective_model)
225 | raw_text_output = completion_result.get("text", "").strip()
226 | token_info = completion_result.get("tokens", {})
227 | cost = completion_result.get("cost", 0.0)
228 | tool_success = completion_result.get("success", False)
229 |
230 | # If the tool call failed, propagate the error
231 | if not tool_success:
232 | error_message = completion_result.get("error", "Unknown error during completion")
233 | raise ProviderError(
234 | f"JSON extraction failed: {error_message}",
235 | provider=provider,
236 | model=actual_model_used
237 | )
238 |
239 | # Use our robust parsing function
240 | extracted_data, success, error_message = _extract_and_parse_json(raw_text_output)
241 | validation_result = None
242 |
243 | # Validate against schema if requested and extraction succeeded
244 | if success and json_schema and validate_output:
245 | validation_result = {"valid": True, "errors": []}
246 | try:
247 | jsonschema.validate(instance=extracted_data, schema=json_schema)
248 | logger.debug("JSON validated successfully against schema.")
249 | except jsonschema.exceptions.ValidationError as e:
250 | validation_result = {"valid": False, "errors": [str(e)]}
251 | logger.warning(f"JSON validation failed: {e}")
252 | # Keep success=True as extraction worked, but validation failed
253 |
254 | logger.info(f"JSON extraction attempt complete. Success: {success}, Validated: {validation_result.get('valid') if validation_result else 'N/A'}. Time: {processing_time:.2f}s")
255 | return {
256 | "data": extracted_data,
257 | "validation_result": validation_result,
258 | "raw_text": raw_text_output if not success else None, # Include raw only on parse failure
259 | "model": actual_model_used,
260 | "provider": provider,
261 | "tokens": token_info,
262 | "cost": cost,
263 | "processing_time": processing_time,
264 | "success": success,
265 | "error": error_message
266 | }
267 |
268 | except Exception as e:
269 | error_model = model or f"{provider}/default"
270 | if isinstance(e, ProviderError):
271 | raise # Re-raise
272 | else:
273 | raise ProviderError(f"JSON extraction failed: {str(e)}", provider=provider, model=error_model, cause=e) from e
274 |
275 | @with_tool_metrics
276 | @with_error_handling
277 | async def extract_table(
278 | text: str,
279 | headers: Optional[List[str]] = None,
280 | return_formats: Optional[List[str]] = None, # Renamed from 'formats'
281 | extract_metadata: bool = False,
282 | provider: str = Provider.OPENAI.value,
283 | model: Optional[str] = None
284 | # Removed ctx=None
285 | ) -> Dict[str, Any]:
286 | """Extracts tabular data found within text content.
287 |
288 | Identifies table structures in the input text and extracts the data, attempting
289 | to return it in specified formats (e.g., JSON list of objects, Markdown table).
290 |
291 | Args:
292 | text: The input text potentially containing one or more tables.
293 | headers: (Optional) A list of expected header strings. Providing headers helps the LLM
294 | identify the correct table and map columns accurately.
295 | return_formats: (Optional) List of desired output formats. Supported: "json", "markdown".
296 | Defaults to ["json"]. The result dictionary will contain keys matching these formats.
297 | extract_metadata: (Optional) If True, attempts to extract contextual metadata about the table,
298 | such as a title, surrounding notes, or source information. Default False.
299 | provider: The name of the LLM provider (e.g., "openai"). Defaults to "openai".
300 | model: The specific model ID (e.g., "openai/gpt-4.1-mini"). Uses provider default if None.
301 |
302 | Returns:
303 | A dictionary containing the extracted table data and metadata:
304 | {
305 | "data": { # Dictionary containing requested formats
306 | "json": [ { "Header1": "Row1Val1", "Header2": "Row1Val2" }, ... ],
307 | "markdown": "| Header1 | Header2 |\n|---|---|\n| Row1Val1 | Row1Val2 |\n...",
308 | "metadata": { "title": "Table Title...", "notes": "..." } # If extract_metadata=True
309 | } | null, # Null if extraction fails
310 | "model": "provider/model-used",
311 | "provider": "provider-name",
312 | "tokens": { ... },
313 | "cost": 0.000180,
314 | "processing_time": 3.5,
315 | "success": true | false,
316 | "error": "Error message if success is false"
317 | }
318 |
319 | Raises:
320 | ProviderError: If the provider/LLM fails.
321 | ToolError: For other internal errors, including failure to parse the LLM response.
322 | """
323 | return_formats = return_formats or ["json"]
324 | start_time = time.time()
325 |
326 | if not text or not isinstance(text, str):
327 | raise ToolInputError("Input 'text' must be a non-empty string.", param_name="text", provided_value=text)
328 |
329 | try:
330 | # Prepare model ID based on provider format
331 | effective_model = model
332 | # Ensure model ID includes provider prefix if not already present
333 | if model and provider not in model:
334 | effective_model = f"{provider}/{model}"
335 |
336 | headers_guidance = f"The table likely has headers similar to: {', '.join(headers)}.\n" if headers else "Attempt to identify table headers automatically.\n"
337 | metadata_guidance = "Also extract any surrounding metadata like a table title, caption, or source notes.\n" if extract_metadata else ""
338 | formats_guidance = f"Return the extracted table data in these formats: {', '.join(return_formats)}."
339 |
340 | # Improved prompt asking for specific formats in a JSON structure
341 | prompt = f"Identify and extract the primary data table from the following text. " \
342 | f"{headers_guidance}{metadata_guidance}{formats_guidance}" \
343 | f"Format the output as a single JSON object containing keys for each requested format ({', '.join(return_formats)}) " \
344 | f"and optionally a 'metadata' key if requested. Ensure the values are the table represented in that format." \
345 | f"\n\nText:\n```\n{text}\n```\nResult JSON:"
346 |
347 | # Use JSON mode if supported by the provider
348 | additional_params = {}
349 | if provider == Provider.OPENAI.value:
350 | additional_params["response_format"] = {"type": "json_object"}
351 |
352 | # Use the standardized completion tool instead of direct provider call
353 | completion_result = await generate_completion(
354 | prompt=prompt,
355 | model=effective_model,
356 | provider=provider,
357 | temperature=0.0, # Low temp for precise extraction
358 | max_tokens=4000,
359 | additional_params=additional_params
360 | )
361 |
362 | # Extract data from the standardized result format
363 | processing_time = completion_result.get("processing_time", time.time() - start_time)
364 | actual_model_used = completion_result.get("model", effective_model)
365 | raw_text_output = completion_result.get("text", "").strip()
366 | token_info = completion_result.get("tokens", {})
367 | cost = completion_result.get("cost", 0.0)
368 | tool_success = completion_result.get("success", False)
369 |
370 | # If the tool call failed, propagate the error
371 | if not tool_success:
372 | error_message = completion_result.get("error", "Unknown error during completion")
373 | raise ProviderError(
374 | f"Table extraction failed: {error_message}",
375 | provider=provider,
376 | model=actual_model_used
377 | )
378 |
379 | # Use our robust parsing function
380 | extraction_result, success, error_message = _extract_and_parse_json(raw_text_output)
381 |
382 | # Basic validation if extraction succeeded
383 | if success and (not isinstance(extraction_result, dict) or not any(fmt in extraction_result for fmt in return_formats)):
384 | logger.warning(f"Table extraction JSON result missing expected structure or formats ({return_formats}). Result: {extraction_result}")
385 | # Allow partial success if it's a dict, but log warning
386 | if isinstance(extraction_result, dict):
387 | error_message = f"Warning: LLM output did not contain all requested formats ({return_formats})."
388 | else:
389 | error_message = "Expected a JSON object with format keys"
390 | success = False
391 | extraction_result = None
392 |
393 | logger.info(f"Table extraction attempt complete. Success: {success}. Time: {processing_time:.2f}s")
394 | return {
395 | "data": extraction_result,
396 | "raw_text": raw_text_output if not success else None, # Include raw only on parse failure
397 | "model": actual_model_used,
398 | "provider": provider,
399 | "tokens": token_info,
400 | "cost": cost,
401 | "processing_time": processing_time,
402 | "success": success,
403 | "error": error_message
404 | }
405 |
406 | except Exception as e:
407 | error_model = model or f"{provider}/default"
408 | if isinstance(e, ProviderError):
409 | raise
410 | else:
411 | raise ProviderError(f"Table extraction failed: {str(e)}", provider=provider, model=error_model, cause=e) from e
412 |
413 | @with_tool_metrics
414 | @with_error_handling
415 | async def extract_key_value_pairs(
416 | text: str,
417 | keys: Optional[List[str]] = None,
418 | provider: str = Provider.OPENAI.value,
419 | model: Optional[str] = None
420 | # Removed ctx=None
421 | ) -> Dict[str, Any]:
422 | """Extracts key-value pairs from text, optionally targeting specific keys.
423 |
424 | Use this tool to pull out data points that appear in a "Key: Value" or similar format
425 | within unstructured text (e.g., fields from a form, details from a description).
426 |
427 | Args:
428 | text: The input text containing key-value pairs.
429 | keys: (Optional) A list of specific key names to look for and extract. If omitted,
430 | the tool attempts to extract all identifiable key-value pairs.
431 | provider: The name of the LLM provider (e.g., "openai"). Defaults to "openai".
432 | model: The specific model ID (e.g., "openai/gpt-4.1-mini"). Uses provider default if None.
433 |
434 | Returns:
435 | A dictionary containing the extracted key-value data and metadata:
436 | {
437 | "data": { # Dictionary of extracted key-value pairs
438 | "Name": "Alice",
439 | "Role": "Engineer",
440 | "Location": "Remote", ...
441 | } | null, # Null if extraction fails
442 | "model": "provider/model-used",
443 | "provider": "provider-name",
444 | "tokens": { ... },
445 | "cost": 0.000070,
446 | "processing_time": 2.1,
447 | "success": true | false,
448 | "error": "Error message if success is false"
449 | }
450 |
451 | Raises:
452 | ProviderError: If the provider/LLM fails.
453 | ToolError: For other internal errors, including failure to parse the LLM JSON response.
454 | """
455 | start_time = time.time()
456 |
457 | if not text or not isinstance(text, str):
458 | raise ToolInputError("Input 'text' must be a non-empty string.", param_name="text", provided_value=text)
459 |
460 | try:
461 | # Prepare model ID based on provider format
462 | effective_model = model
463 | # Ensure model ID includes provider prefix if not already present
464 | if model and provider not in model:
465 | effective_model = f"{provider}/{model}"
466 |
467 | keys_guidance = f"Extract the values for these specific keys: {', '.join(keys)}.\n" if keys else "Identify and extract all distinct key-value pairs present in the text.\n"
468 | prompt = f"Analyze the following text and extract key-value pairs. {keys_guidance}" \
469 | f"Format the output as a single, flat JSON object mapping the extracted keys (as strings) to their corresponding values (as strings or appropriate simple types). " \
470 | f"Infer the value associated with each key from the text context. Ignore keys not present in the text.\n\n" \
471 | f"Text:\n```\n{text}\n```\nResult JSON object:"
472 |
473 | # Use JSON mode if supported by the provider
474 | additional_params = {}
475 | if provider == Provider.OPENAI.value:
476 | additional_params["response_format"] = {"type": "json_object"}
477 |
478 | # Use the standardized completion tool instead of direct provider call
479 | completion_result = await generate_completion(
480 | prompt=prompt,
481 | model=effective_model,
482 | provider=provider,
483 | temperature=0.0, # Low temp for precise extraction
484 | max_tokens=2000,
485 | additional_params=additional_params
486 | )
487 |
488 | # Extract data from the standardized result format
489 | processing_time = completion_result.get("processing_time", time.time() - start_time)
490 | actual_model_used = completion_result.get("model", effective_model)
491 | raw_text_output = completion_result.get("text", "").strip()
492 | token_info = completion_result.get("tokens", {})
493 | cost = completion_result.get("cost", 0.0)
494 | tool_success = completion_result.get("success", False)
495 |
496 | # If the tool call failed, propagate the error
497 | if not tool_success:
498 | error_message = completion_result.get("error", "Unknown error during completion")
499 | raise ProviderError(
500 | f"Key-value pair extraction failed: {error_message}",
501 | provider=provider,
502 | model=actual_model_used
503 | )
504 |
505 | # Use our robust parsing function
506 | kv_data, success, error_message = _extract_and_parse_json(raw_text_output)
507 |
508 | # Validate it's a dictionary if extraction succeeded
509 | if success and not isinstance(kv_data, dict):
510 | error_message = "Extracted data is not a valid key-value dictionary"
511 | logger.warning(error_message)
512 | success = False
513 | kv_data = None
514 |
515 | logger.info(f"Key-Value pair extraction attempt complete. Success: {success}. Time: {processing_time:.2f}s")
516 | return {
517 | "data": kv_data,
518 | "raw_text": raw_text_output if not success else None,
519 | "model": actual_model_used,
520 | "provider": provider,
521 | "tokens": token_info,
522 | "cost": cost,
523 | "processing_time": processing_time,
524 | "success": success,
525 | "error": error_message
526 | }
527 |
528 | except Exception as e:
529 | error_model = model or f"{provider}/default"
530 | if isinstance(e, ProviderError):
531 | raise
532 | else:
533 | raise ProviderError(f"Key-value pair extraction failed: {str(e)}", provider=provider, model=error_model, cause=e) from e
534 |
535 | @with_tool_metrics
536 | @with_error_handling
537 | async def extract_semantic_schema(
538 | text: str,
539 | # Schema should ideally be passed as a structured dict, not within the prompt
540 | semantic_schema: Dict[str, Any], # Changed from embedding prompt
541 | provider: str = Provider.OPENAI.value,
542 | model: Optional[str] = None
543 | # Removed ctx=None
544 | ) -> Dict[str, Any]:
545 | """Extracts information from text matching a specified semantic structure (schema).
546 |
547 | Use this tool when you need to populate a predefined JSON structure with information
548 | found or inferred from the input text. Unlike `extract_json`, the target JSON structure
549 | is *defined by you* (via `semantic_schema`), not expected to be present in the input text.
550 |
551 | Args:
552 | text: The input text containing information to extract.
553 | semantic_schema: A Python dictionary representing the desired JSON schema for the output.
554 | Use JSON Schema conventions (e.g., {"type": "object", "properties": { ... }}).
555 | This guides the LLM on what fields to extract and their expected types.
556 | provider: The name of the LLM provider (e.g., "openai"). Defaults to "openai".
557 | Providers supporting JSON mode or strong instruction following are recommended.
558 | model: The specific model ID (e.g., "openai/gpt-4o"). Uses provider default if None.
559 |
560 | Returns:
561 | A dictionary containing the extracted data conforming to the schema and metadata:
562 | {
563 | "data": { ... }, # The extracted data, structured according to semantic_schema
564 | "model": "provider/model-used",
565 | "provider": "provider-name",
566 | "tokens": { ... },
567 | "cost": 0.000210,
568 | "processing_time": 4.1,
569 | "success": true | false,
570 | "error": "Error message if success is false"
571 | }
572 |
573 | Raises:
574 | ToolInputError: If `semantic_schema` is not a valid dictionary.
575 | ProviderError: If the provider/LLM fails.
576 | ToolError: For other internal errors, including failure to parse the LLM JSON response.
577 | """
578 | start_time = time.time()
579 |
580 | if not text or not isinstance(text, str):
581 | raise ToolInputError("Input 'text' must be a non-empty string.", param_name="text", provided_value=text)
582 | if not semantic_schema or not isinstance(semantic_schema, dict):
583 | raise ToolInputError("Input 'semantic_schema' must be a non-empty dictionary representing a JSON schema.", param_name="semantic_schema", provided_value=semantic_schema)
584 |
585 | try:
586 | # Prepare model ID based on provider format
587 | effective_model = model
588 | # Ensure model ID includes provider prefix if not already present
589 | if model and provider not in model:
590 | effective_model = f"{provider}/{model}"
591 |
592 | # Create a clear prompt explaining the task and providing the schema
593 | schema_str = json.dumps(semantic_schema, indent=2)
594 | prompt = f"Analyze the following text and extract information that conforms to the provided JSON schema. " \
595 | f"Populate the fields in the schema based *only* on information present in the text. " \
596 | f"If information for a field is not found, omit the field or use a null value as appropriate according to the schema. " \
597 | f"Return ONLY the populated JSON object conforming to the schema.\n\n" \
598 | f"JSON Schema:\n```json\n{schema_str}\n```\n\n" \
599 | f"Text:\n```\n{text}\n```\nPopulated JSON object:"
600 |
601 | # Use JSON mode if supported by the provider
602 | additional_params = {}
603 | if provider == Provider.OPENAI.value:
604 | additional_params["response_format"] = {"type": "json_object"}
605 |
606 | # Use the standardized completion tool instead of direct provider call
607 | completion_result = await generate_completion(
608 | prompt=prompt,
609 | model=effective_model,
610 | provider=provider,
611 | temperature=0.0, # Low temp for precise extraction
612 | max_tokens=4000,
613 | additional_params=additional_params
614 | )
615 |
616 | # Extract data from the standardized result format
617 | processing_time = completion_result.get("processing_time", time.time() - start_time)
618 | actual_model_used = completion_result.get("model", effective_model)
619 | raw_text_output = completion_result.get("text", "").strip()
620 | token_info = completion_result.get("tokens", {})
621 | cost = completion_result.get("cost", 0.0)
622 | tool_success = completion_result.get("success", False)
623 |
624 | # If the tool call failed, propagate the error
625 | if not tool_success:
626 | error_message = completion_result.get("error", "Unknown error during completion")
627 | raise ProviderError(
628 | f"Semantic schema extraction failed: {error_message}",
629 | provider=provider,
630 | model=actual_model_used
631 | )
632 |
633 | # Use our robust parsing function
634 | extracted_data, success, error_message = _extract_and_parse_json(raw_text_output)
635 |
636 | # Validate against the provided schema if extraction succeeded
637 | if success:
638 | try:
639 | jsonschema.validate(instance=extracted_data, schema=semantic_schema)
640 | logger.debug("Successfully parsed and validated semantic schema JSON.")
641 | except jsonschema.exceptions.ValidationError as e:
642 | error_message = f"Warning: LLM output did not strictly conform to schema: {str(e)}"
643 | logger.warning(f"{error_message}. Data: {extracted_data}")
644 | # Still consider extraction successful if parsable
645 |
646 | logger.info(f"Semantic schema extraction attempt complete. Success: {success}. Time: {processing_time:.2f}s")
647 | return {
648 | "data": extracted_data,
649 | "raw_text": raw_text_output if not success else None,
650 | "model": actual_model_used,
651 | "provider": provider,
652 | "tokens": token_info,
653 | "cost": cost,
654 | "processing_time": processing_time,
655 | "success": success,
656 | "error": error_message
657 | }
658 |
659 | except Exception as e:
660 | error_model = model or f"{provider}/default"
661 | if isinstance(e, ProviderError):
662 | raise
663 | else:
664 | raise ProviderError(f"Semantic schema extraction failed: {str(e)}", provider=provider, model=error_model, cause=e) from e
665 |
666 | # Note: This is a utility function, not typically exposed as a direct tool,
667 | # but kept here as it relates to extraction from LLM *responses*.
668 | # No standard decorators applied.
669 | async def extract_code_from_response(
670 | response_text: str,
671 | model: str = "openai/gpt-4.1-mini",
672 | timeout: int = 15,
673 | tracker: Optional[Any] = None # Add optional tracker (use Any for now to avoid circular import)
674 | ) -> str:
675 | """Extracts code blocks from LLM response text, using an LLM for complex cases.
676 |
677 | Primarily designed to clean up responses from code generation tasks.
678 | It first tries simple regex matching for markdown code fences. If that fails,
679 | it uses a specified LLM to identify and extract the code.
680 |
681 | Args:
682 | response_text: The text potentially containing code blocks.
683 | model: The specific model ID to use for LLM-based extraction if regex fails.
684 | Defaults to "openai/gpt-4.1-mini".
685 | timeout: Timeout in seconds for the LLM extraction call. Default 15.
686 | tracker: (Optional) An instance of a CostTracker for tracking cost and metrics.
687 |
688 | Returns:
689 | The extracted code block as a string, or the original text if no code is found or extraction fails.
690 | """
691 | if not response_text or not isinstance(response_text, str):
692 | return "" # Return empty if no input
693 |
694 | # Try simple regex extraction first (common markdown format)
695 | code_blocks = re.findall(r"```(?:[a-zA-Z0-9\-_]*\n)?(.*?)\n?```", response_text, re.DOTALL)
696 |
697 | if code_blocks:
698 | # Return the content of the first code block found
699 | logger.debug("Extracted code using regex.")
700 | return code_blocks[0].strip()
701 |
702 | # If regex fails, use LLM for more robust extraction
703 | logger.debug("Regex failed, attempting LLM-based code extraction.")
704 | try:
705 | # Parse provider from model string if it contains a slash
706 | provider_id = model.split('/')[0] if '/' in model else Provider.OPENAI.value
707 | effective_model = model # Use the full model string as provided
708 |
709 | prompt = f"Extract only the main code block from the following text. Return just the code itself, without any explanations or markdown fences.\n\nText:\n```\n{response_text}\n```\n\nCode:"
710 |
711 | # Set a timeout using asyncio.wait_for
712 | completion_task = generate_completion(
713 | prompt=prompt,
714 | model=effective_model,
715 | provider=provider_id,
716 | temperature=0.0,
717 | max_tokens=len(response_text) # Allow enough tokens, approx original length
718 | )
719 |
720 | # Use asyncio.wait_for to implement timeout
721 | completion_result = await asyncio.wait_for(completion_task, timeout=timeout)
722 |
723 | # Check if completion succeeded
724 | if not completion_result.get("success", False):
725 | logger.warning(f"LLM code extraction failed: {completion_result.get('error', 'Unknown error')}. Returning original text.")
726 | return response_text
727 |
728 | # Track cost if tracker is provided
729 | if tracker:
730 | try:
731 | # Use getattr to safely access attributes, provide defaults
732 | # Create a temporary object for tracking as CostTracker expects attributes
733 | class Trackable:
734 | pass
735 | trackable = Trackable()
736 | trackable.cost = completion_result.get('cost', 0.0)
737 | trackable.input_tokens = completion_result.get('tokens', {}).get('input', 0)
738 | trackable.output_tokens = completion_result.get('tokens', {}).get('output', 0)
739 | trackable.provider = provider_id
740 | trackable.model = completion_result.get('model', effective_model)
741 | trackable.processing_time = completion_result.get('processing_time', 0.0)
742 | tracker.add_call(trackable)
743 | except Exception as track_err:
744 | logger.warning(f"Could not track cost for LLM code extraction: {track_err}", exc_info=False)
745 |
746 | extracted_code = completion_result.get("text", "").strip()
747 | logger.info(f"Extracted code using LLM ({effective_model}).")
748 | return extracted_code
749 |
750 | except asyncio.TimeoutError:
751 | logger.warning(f"LLM code extraction timed out after {timeout}s. Returning original text.")
752 | return response_text # Fallback to original on timeout
753 | except Exception as e:
754 | logger.error(f"LLM code extraction failed: {str(e)}. Returning original text.", exc_info=False)
755 | return response_text # Fallback to original on error
756 |
757 | class ExtractionTools(BaseTool):
758 | """Tools for extracting structured data from unstructured text."""
759 |
760 | tool_name = "extraction"
761 | description = "Tools for extracting structured data from unstructured text, including JSON, tables, and key-value pairs."
762 |
763 | def __init__(self, gateway):
764 | """Initialize extraction tools.
765 |
766 | Args:
767 | gateway: Gateway or MCP server instance
768 | """
769 | super().__init__(gateway)
770 | self._register_tools()
771 |
772 | def _register_tools(self):
773 | """Register extraction tools with MCP server."""
774 | # Register the extraction functions as tools
775 | self.mcp.tool(name="extract_json")(extract_json)
776 | self.mcp.tool(name="extract_table")(extract_table)
777 | self.mcp.tool(name="extract_key_value_pairs")(extract_key_value_pairs)
778 | self.mcp.tool(name="extract_semantic_schema")(extract_semantic_schema)
779 | self.logger.info("Registered extraction tools", emoji_key="success")
```
--------------------------------------------------------------------------------
/ultimate_mcp_server/core/providers/base.py:
--------------------------------------------------------------------------------
```python
1 | """Base LLM provider interface."""
2 | import abc
3 | import time
4 | from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple
5 |
6 | from ultimate_mcp_server.config import get_config
7 | from ultimate_mcp_server.constants import COST_PER_MILLION_TOKENS, Provider
8 | from ultimate_mcp_server.utils import get_logger
9 |
10 | logger = get_logger(__name__)
11 |
12 |
13 | class ModelResponse:
14 | """
15 | Standard response format for all LLM provider completions in the Ultimate MCP Server.
16 |
17 | This class provides a unified representation of responses from different LLM providers,
18 | normalizing their various formats into a consistent structure. It handles common
19 | operations like:
20 |
21 | - Storing and accessing the generated text content
22 | - Tracking token usage statistics (input, output, total)
23 | - Computing cost estimates based on provider pricing
24 | - Preserving metadata and raw responses for debugging
25 | - Formatting response data for serialization
26 |
27 | The ModelResponse serves as a bridge between provider-specific response formats
28 | and the standardized interface presented by the Ultimate MCP Server. This allows
29 | client code to work with responses in a consistent way regardless of which
30 | provider generated them.
31 |
32 | All provider implementations in the system should return responses wrapped in
33 | this class to ensure consistent behavior across the application.
34 | """
35 |
36 | def __init__(
37 | self,
38 | text: str,
39 | model: str,
40 | provider: str,
41 | input_tokens: int = 0,
42 | output_tokens: int = 0,
43 | total_tokens: int = 0,
44 | processing_time: float = 0.0,
45 | raw_response: Any = None,
46 | metadata: Optional[Dict[str, Any]] = None,
47 | ):
48 | """
49 | Initialize a standardized model response object.
50 |
51 | This constructor creates a unified response object that normalizes the various
52 | response formats from different LLM providers into a consistent structure.
53 | It automatically calculates derived values like total token count and cost
54 | estimates based on the provided parameters.
55 |
56 | Args:
57 | text: The generated text content from the LLM. This is the primary output
58 | that would be presented to users.
59 | model: The specific model name that generated this response (e.g., "gpt-4o",
60 | "claude-3-5-haiku-20241022").
61 | provider: The provider name that served this response (e.g., "openai",
62 | "anthropic").
63 | input_tokens: Number of input/prompt tokens consumed in this request.
64 | Used for usage tracking and cost calculation.
65 | output_tokens: Number of output/completion tokens generated in this response.
66 | Used for usage tracking and cost calculation.
67 | total_tokens: Total token count for the request. If not explicitly provided,
68 | calculated as input_tokens + output_tokens.
69 | processing_time: Time taken to generate the response in seconds, measured
70 | from request initiation to response completion.
71 | raw_response: The original, unmodified response object from the provider's API.
72 | Useful for debugging and accessing provider-specific data.
73 | metadata: Additional response metadata as a dictionary. Can contain provider-specific
74 | information like finish_reason, logprobs, etc.
75 | """
76 | self.text = text
77 | self.model = model
78 | self.provider = provider
79 | self.input_tokens = input_tokens
80 | self.output_tokens = output_tokens
81 | self.total_tokens = total_tokens or (input_tokens + output_tokens)
82 | self.processing_time = processing_time
83 | self.raw_response = raw_response
84 | self.metadata = metadata or {}
85 |
86 | # Calculate cost based on token usage
87 | self.cost = self._calculate_cost()
88 |
89 | def _calculate_cost(self) -> float:
90 | """
91 | Calculate the estimated cost of the request based on token usage and current pricing.
92 |
93 | This internal method computes a cost estimate by:
94 | 1. Looking up the per-million-token costs for the specific model used
95 | 2. Applying different rates for input (prompt) and output (completion) tokens
96 | 3. Computing the final cost based on actual token counts
97 |
98 | If pricing data isn't available for the specific model, the method falls back
99 | to reasonable default estimations and logs a warning.
100 |
101 | Returns:
102 | Estimated cost in USD as a floating-point value. Returns 0.0 if token counts
103 | are not available or if the model name is not recognized.
104 |
105 | Note:
106 | This is an estimation and may not precisely match actual billing from providers,
107 | especially as pricing changes over time or for custom deployment configurations.
108 | """
109 | if not self.model or not self.input_tokens or not self.output_tokens:
110 | return 0.0
111 |
112 | # Extract model name without provider prefix (e.g., strip "openai/" from "openai/gpt-4o")
113 | model_name = self.model
114 | if "/" in model_name:
115 | model_name = model_name.split("/", 1)[1]
116 |
117 | # Get cost per token for this model
118 | model_costs = COST_PER_MILLION_TOKENS.get(model_name, None)
119 | if not model_costs:
120 | # If model not found, use a default estimation
121 | model_costs = {"input": 0.5, "output": 1.5}
122 | logger.warning(
123 | f"Cost data not found for model {self.model}. Using estimates.",
124 | emoji_key="cost"
125 | )
126 |
127 | # Calculate cost
128 | input_cost = (self.input_tokens / 1_000_000) * model_costs["input"]
129 | output_cost = (self.output_tokens / 1_000_000) * model_costs["output"]
130 |
131 | return input_cost + output_cost
132 |
133 | def to_dict(self) -> Dict[str, Any]:
134 | """
135 | Convert the response object to a dictionary suitable for serialization.
136 |
137 | This method creates a structured dictionary representation of the response
138 | that can be easily serialized to JSON or other formats. The dictionary
139 | preserves all important fields while organizing them into a clean,
140 | hierarchical structure.
141 |
142 | The token usage statistics are grouped under a 'usage' key, making it
143 | easier to access and analyze metrics separately from the content.
144 |
145 | Returns:
146 | A dictionary containing all relevant response data with the following structure:
147 | {
148 | "text": str, # The generated text content
149 | "model": str, # Model name used
150 | "provider": str, # Provider name
151 | "usage": { # Token usage statistics
152 | "input_tokens": int,
153 | "output_tokens": int,
154 | "total_tokens": int
155 | },
156 | "processing_time": float, # Time taken in seconds
157 | "cost": float, # Estimated cost in USD
158 | "metadata": dict # Additional response metadata
159 | }
160 |
161 | Example:
162 | ```python
163 | response = await provider.generate_completion(prompt="Hello")
164 | response_dict = response.to_dict()
165 | json_response = json.dumps(response_dict)
166 | ```
167 | """
168 | return {
169 | "text": self.text,
170 | "model": self.model,
171 | "provider": self.provider,
172 | "usage": {
173 | "input_tokens": self.input_tokens,
174 | "output_tokens": self.output_tokens,
175 | "total_tokens": self.total_tokens,
176 | },
177 | "processing_time": self.processing_time,
178 | "cost": self.cost,
179 | "metadata": self.metadata,
180 | }
181 |
182 |
183 | class BaseProvider(abc.ABC):
184 | """
185 | Abstract base class that defines the interface for all LLM providers in Ultimate MCP Server.
186 |
187 | This class establishes the common API contract that all provider implementations must follow,
188 | ensuring consistent behavior regardless of the underlying LLM service (OpenAI, Anthropic, etc.).
189 | It standardizes key operations like:
190 |
191 | - Provider initialization and API key management
192 | - Text completion generation (both synchronous and streaming)
193 | - Model listing and default model selection
194 | - API key validation
195 | - Request timing and performance tracking
196 |
197 | By implementing this interface, each provider ensures compatibility with the broader
198 | Ultimate MCP Server framework. This abstraction layer allows the system to work with multiple
199 | LLM providers interchangeably, while hiding the provider-specific implementation details
200 | from the rest of the application.
201 |
202 | Provider implementations should inherit from this class and override all abstract methods.
203 | They may also extend the interface with provider-specific functionality as needed,
204 | though core components of the Ultimate MCP Server should rely only on the methods defined
205 | in this base class to ensure provider-agnostic operation.
206 |
207 | Usage example:
208 | ```python
209 | class OpenAIProvider(BaseProvider):
210 | provider_name = "openai"
211 |
212 | async def initialize(self) -> bool:
213 | # OpenAI-specific initialization...
214 |
215 | async def generate_completion(self, prompt: str, **kwargs) -> ModelResponse:
216 | # OpenAI-specific completion implementation...
217 |
218 | # Other required method implementations...
219 | ```
220 | """
221 |
222 | provider_name: str = "base"
223 |
224 | def __init__(self, api_key: Optional[str] = None, **kwargs):
225 | """Initialize the provider.
226 |
227 | Args:
228 | api_key: API key for the provider
229 | **kwargs: Additional provider-specific options
230 | """
231 | # Get API key from environment if not provided
232 | if api_key is None:
233 | api_key = None # No longer try to get from env, will be provided by config system
234 |
235 | self.api_key = api_key
236 | self.options = kwargs
237 | self.client = None
238 | self.logger = get_logger(f"provider.{self.provider_name}")
239 |
240 | @abc.abstractmethod
241 | async def initialize(self) -> bool:
242 | """
243 | Initialize the provider client and verify API connectivity.
244 |
245 | This abstract method defines the standard interface for initializing a provider
246 | connection. All provider implementations must override this method with their
247 | provider-specific initialization logic while maintaining this signature.
248 |
249 | The initialization process typically includes:
250 | 1. Creating the provider-specific client with the API key and configuration
251 | 2. Setting up any required HTTP headers, authentication, or session management
252 | 3. Verifying API connectivity with a lightweight request when possible
253 | 4. Setting up provider-specific rate limiting or retry mechanisms
254 | 5. Loading any required provider-specific resources or configurations
255 |
256 | This method is called:
257 | - When a provider is first instantiated via the get_provider factory
258 | - When a provider connection needs to be refreshed or re-established
259 | - Before any operations that require an active client connection
260 |
261 | Returns:
262 | bool: True if initialization was successful and the provider is ready for use,
263 | False if initialization failed for any reason. A False return will
264 | typically prevent the provider from being used by the calling code.
265 |
266 | Raises:
267 | No exceptions should be raised directly. All errors should be handled
268 | internally, logged appropriately, and reflected in the return value.
269 | If initialization fails, detailed error information should be logged
270 | to help diagnose the issue.
271 |
272 | Implementation guidelines:
273 | - Handle API keys securely, avoiding logging them even in error messages
274 | - Implement retries with exponential backoff for transient errors
275 | - Set reasonable timeouts on API connection attempts
276 | - Log detailed diagnostics on initialization failures
277 | - Cache expensive resources to improve subsequent initialization times
278 | """
279 | pass
280 |
281 | @abc.abstractmethod
282 | async def generate_completion(
283 | self,
284 | prompt: str,
285 | model: Optional[str] = None,
286 | max_tokens: Optional[int] = None,
287 | temperature: float = 0.7,
288 | **kwargs
289 | ) -> ModelResponse:
290 | """
291 | Generate a text completion from the provider (non-streaming).
292 |
293 | This abstract method defines the standard interface for generating text completions
294 | from any LLM provider. All provider implementations must override this method with
295 | their provider-specific implementation while maintaining this signature.
296 |
297 | The method handles sending a prompt to the LLM, processing the response, and
298 | converting it to the standardized ModelResponse format. It is responsible for
299 | handling provider-specific API calls, error handling, and token counting.
300 |
301 | Args:
302 | prompt: The text prompt to send to the model. This is the primary input that
303 | the model will generate a completion for.
304 | model: The specific model identifier to use (e.g., "gpt-4o", "claude-3-opus").
305 | If None, the provider's default model will be used.
306 | max_tokens: Maximum number of tokens to generate in the response. If None,
307 | the provider's default or maximum limit will be used.
308 | temperature: Controls randomness in the output. Lower values (e.g., 0.1) make
309 | the output more deterministic, while higher values (e.g., 1.0)
310 | make it more random and creative. Range is typically 0.0-1.0
311 | but may vary by provider.
312 | **kwargs: Additional provider-specific parameters such as:
313 | - top_p: Nucleus sampling parameter (alternative to temperature)
314 | - stop_sequences: Tokens/strings that will stop generation when encountered
315 | - frequency_penalty: Penalty for using frequent tokens
316 | - presence_penalty: Penalty for repeated tokens
317 | - system_prompt: System instructions for providers that support it
318 | - response_format: Structured format request (e.g., JSON)
319 |
320 | Returns:
321 | ModelResponse: A standardized response object containing:
322 | - The generated text
323 | - Token usage statistics (input, output, total)
324 | - Cost estimation
325 | - Processing time
326 | - Provider and model information
327 | - Any provider-specific metadata
328 |
329 | Raises:
330 | ValueError: For invalid parameter combinations or values
331 | ConnectionError: For network or API connectivity issues
332 | AuthenticationError: For API key or authentication problems
333 | RateLimitError: When provider rate limits are exceeded
334 | ProviderError: For other provider-specific errors
335 |
336 | Implementation guidelines:
337 | - Use the provider's official client library when available
338 | - Handle error conditions gracefully with meaningful error messages
339 | - Track token usage precisely for accurate cost estimation
340 | - Measure processing time with the process_with_timer utility
341 | - Include relevant provider-specific metadata in the response
342 | """
343 | pass
344 |
345 | @abc.abstractmethod
346 | async def generate_completion_stream(
347 | self,
348 | prompt: str,
349 | model: Optional[str] = None,
350 | max_tokens: Optional[int] = None,
351 | temperature: float = 0.7,
352 | **kwargs
353 | ) -> AsyncGenerator[Tuple[str, Dict[str, Any]], None]:
354 | """
355 | Generate a streaming text completion with real-time token delivery.
356 |
357 | This abstract method defines the standard interface for streaming text completions
358 | from any LLM provider. All provider implementations must override this method with
359 | their provider-specific streaming implementation while maintaining this signature.
360 |
361 | Unlike the non-streaming generate_completion method, this method:
362 | - Returns content incrementally as it's generated
363 | - Uses an async generator that yields content chunks
364 | - Provides metadata with each chunk to track generation progress
365 | - Enables real-time display and processing of partial responses
366 |
367 | Args:
368 | prompt: The text prompt to send to the model. This is the primary input that
369 | the model will generate a completion for.
370 | model: The specific model identifier to use (e.g., "gpt-4o", "claude-3-opus").
371 | If None, the provider's default model will be used.
372 | max_tokens: Maximum number of tokens to generate in the response. If None,
373 | the provider's default or maximum limit will be used.
374 | temperature: Controls randomness in the output. Lower values (e.g., 0.1) make
375 | the output more deterministic, while higher values (e.g., 1.0)
376 | make it more random and creative. Range is typically 0.0-1.0
377 | but may vary by provider.
378 | **kwargs: Additional provider-specific parameters, identical to those
379 | supported by generate_completion.
380 |
381 | Yields:
382 | Tuple[str, Dict[str, Any]]: Each yield returns:
383 | - str: The next chunk of generated text
384 | - Dict: Metadata about the generation process, including at minimum:
385 | - done: Boolean indicating if this is the final chunk
386 | - chunk_index: Integer index of the current chunk (0-based)
387 |
388 | The metadata may also include provider-specific information such as:
389 | - finish_reason: Why the generation stopped (e.g., "stop", "length")
390 | - token_count: Running count of tokens generated
391 | - model: Model information if it changed during generation
392 |
393 | Raises:
394 | ValueError: For invalid parameter combinations or values
395 | ConnectionError: For network or API connectivity issues
396 | AuthenticationError: For API key or authentication problems
397 | RateLimitError: When provider rate limits are exceeded
398 | ProviderError: For other provider-specific errors
399 |
400 | Implementation guidelines:
401 | - Use the provider's official streaming endpoints when available
402 | - Ensure chunks represent logical breaks where possible (e.g., words, not partial UTF-8)
403 | - Handle connection interruptions gracefully
404 | - Set the 'done' flag to True only in the final yielded chunk
405 | - Provide consistent metadata structure across all yielded chunks
406 | """
407 | pass
408 |
409 | async def list_models(self) -> List[Dict[str, Any]]:
410 | """
411 | List available models from this provider with their capabilities and metadata.
412 |
413 | This method retrieves information about all available models from the provider,
414 | including their identifiers, capabilities, and contextual metadata. Providers
415 | typically override this method to query their API's model list endpoint and
416 | normalize the responses into a consistent format.
417 |
418 | The base implementation returns a minimal default model entry, but provider-specific
419 | implementations should:
420 | 1. Query the provider's models API or endpoint
421 | 2. Transform provider-specific model data into the standard format
422 | 3. Enrich the models with useful metadata like token limits and capabilities
423 | 4. Filter models based on access permissions if applicable
424 |
425 | Returns:
426 | A list of dictionaries, each representing a model with at least these keys:
427 | - id (str): The model identifier (e.g., "gpt-4o", "claude-3-opus")
428 | - provider (str): The provider name (e.g., "openai", "anthropic")
429 | - description (str): A human-readable description of the model
430 |
431 | Models may also include additional metadata such as:
432 | - max_tokens (int): Maximum combined tokens (prompt + completion)
433 | - created (str): Creation/version date of the model
434 | - pricing (dict): Cost information for input/output tokens
435 | - capabilities (list): Features the model supports (e.g., "vision", "function_calling")
436 | - deprecated (bool): Whether the model is deprecated or scheduled for retirement
437 |
438 | Raises:
439 | ConnectionError: If the provider API cannot be reached
440 | AuthenticationError: If authentication fails during the request
441 | ProviderError: For other provider-specific errors
442 |
443 | Note:
444 | Model data may be cached internally to reduce API calls. Providers should
445 | implement appropriate caching strategies to balance freshness with performance.
446 | """
447 | # Default implementation - override in provider-specific classes
448 | return [
449 | {
450 | "id": "default-model",
451 | "provider": self.provider_name,
452 | "description": "Default model",
453 | }
454 | ]
455 |
456 | def get_default_model(self) -> str:
457 | """
458 | Get the default model identifier for this provider.
459 |
460 | This method returns the standard or recommended model identifier to use when
461 | no specific model is requested. Each provider implementation must override this
462 | method to specify its default model.
463 |
464 | The default model should be:
465 | - Generally available to all users of the provider
466 | - Well-balanced between capabilities and cost
467 | - Appropriate for general-purpose text generation tasks
468 | - Stable and reliable for production use
469 |
470 | The implementation should consider:
471 | 1. Provider-specific configuration settings
472 | 2. Environment variables or system settings
473 | 3. User access level and permissions
474 | 4. Regional availability of models
475 | 5. Current model deprecation status
476 |
477 | Returns:
478 | str: The model identifier string (e.g., "gpt-4o", "claude-3-haiku")
479 | that will be used when no model is explicitly specified.
480 | This identifier should be valid and usable without additional prefixing.
481 |
482 | Raises:
483 | NotImplementedError: In the base class implementation, signaling that
484 | subclasses must override this method.
485 |
486 | Note:
487 | Provider implementations should periodically review and update their default
488 | model selections as newer, more capable models become available or as pricing
489 | structures change.
490 | """
491 | raise NotImplementedError("Provider must implement get_default_model")
492 |
493 | async def check_api_key(self) -> bool:
494 | """
495 | Check if the API key for this provider is valid and functional.
496 |
497 | This method verifies that the configured API key is valid and can be used
498 | to authenticate with the provider's API. The default implementation simply
499 | checks if an API key is present, but provider-specific implementations
500 | should override this to perform actual validation against the provider's API.
501 |
502 | A proper implementation should:
503 | 1. Make a lightweight API call to an endpoint that requires authentication
504 | 2. Handle authentication errors specifically to differentiate from other failures
505 | 3. Cache results when appropriate to avoid excessive validation calls
506 | 4. Respect rate limits during validation
507 |
508 | This method is typically called during:
509 | - Server startup to verify all configured providers
510 | - Before first use of a provider to ensure it's properly configured
511 | - Periodically as a health check to detect expired or revoked keys
512 | - After configuration changes that might affect authentication
513 |
514 | Returns:
515 | bool: True if the API key is valid and usable, False otherwise.
516 | The default implementation returns True if an API key is present,
517 | which does not guarantee the key is actually valid or functional.
518 |
519 | Note:
520 | Provider implementations should log descriptive error messages when
521 | validation fails to help with troubleshooting, but should avoid logging
522 | the actual API key or other sensitive credentials.
523 | """
524 | # Default implementation just checks if key exists
525 | return bool(self.api_key)
526 |
527 | async def process_with_timer(
528 | self,
529 | func: callable,
530 | *args,
531 | **kwargs
532 | ) -> Tuple[Any, float]:
533 | """
534 | Process an async function call with precise timing measurement.
535 |
536 | This utility method provides a standardized way to execute any async function
537 | while measuring its execution time with high precision. It's particularly useful for:
538 |
539 | - Tracking LLM API call latency for performance monitoring
540 | - Measuring request-response round trip times
541 | - Providing accurate timing data for usage reports and optimizations
542 | - Including processing time in log messages and response metadata
543 |
544 | The method handles the time measurement boilerplate, ensuring consistent
545 | timing practices across all provider implementations. The measured processing
546 | time is returned alongside the function's result, allowing both to be used
547 | in subsequent operations.
548 |
549 | Args:
550 | func: The async function (callable) to execute and time. This should be
551 | an awaitable function that performs the actual operation.
552 | *args: Positional arguments to pass to the function.
553 | **kwargs: Keyword arguments to pass to the function.
554 |
555 | Returns:
556 | Tuple containing:
557 | - The result returned by the executed function (any type)
558 | - The processing time in seconds as a float, measured with
559 | high precision from just before the function call to just after
560 | it completes.
561 |
562 | Example usage:
563 | ```python
564 | # Timing an API call
565 | response, duration = await self.process_with_timer(
566 | self.client.create,
567 | model="gpt-4o",
568 | prompt="Hello, world!"
569 | )
570 |
571 | # Using the measured time in a response
572 | return ModelResponse(
573 | text=response.choices[0].text,
574 | model="gpt-4o",
575 | provider=self.provider_name,
576 | processing_time=duration
577 | )
578 | ```
579 | """
580 | start_time = time.time()
581 | result = await func(*args, **kwargs)
582 | processing_time = time.time() - start_time
583 |
584 | return result, processing_time
585 |
586 |
587 | def parse_model_string(model_string: str) -> Tuple[str, str]:
588 | """Parse a model string that might include a provider prefix.
589 |
590 | This function parses a model identifier string that may include a provider prefix
591 | (e.g., 'openai/gpt-4o' or 'anthropic:claude-3-sonnet'). It supports two separator
592 | formats: forward slash ('/') and colon (':'). If a valid provider prefix is found,
593 | the function returns the provider name and model name as separate strings.
594 |
595 | Provider validation is performed against the Provider enum values to ensure the
596 | prefix represents a supported provider. If no valid provider prefix is found, the
597 | provider component will be None, indicating the model should use the default provider.
598 |
599 | This function is particularly useful in contexts where users can specify models with optional
600 | provider prefixes, allowing the system to route requests to the appropriate provider
601 | even when the provider isn't explicitly specified elsewhere.
602 |
603 | Args:
604 | model_string: A model string, possibly including a provider prefix.
605 | Examples: "openai/gpt-4.1-mini", "anthropic/claude-3-opus",
606 | "gemini:gemini-pro", "gpt-4o" (no provider)
607 |
608 | Returns:
609 | Tuple of (provider_name, model_name):
610 | - provider_name (str or None): Lowercase provider name if a valid prefix was found,
611 | or None if no valid provider prefix was detected.
612 | - model_name (str): The model identifier without the provider prefix.
613 |
614 | Examples:
615 | >>> parse_model_string("openai/gpt-4o")
616 | ('openai', 'gpt-4o')
617 |
618 | >>> parse_model_string("anthropic:claude-3-opus")
619 | ('anthropic', 'claude-3-opus')
620 |
621 | >>> parse_model_string("gpt-4o") # No provider prefix
622 | (None, 'gpt-4o')
623 |
624 | >>> parse_model_string("unknown/model-name") # Invalid provider
625 | (None, 'unknown/model-name')
626 | """
627 | separator = None
628 | if '/' in model_string:
629 | separator = '/'
630 | elif ':' in model_string:
631 | separator = ':'
632 |
633 | if separator:
634 | # Try to extract provider prefix
635 | parts = model_string.split(separator, 1)
636 | if len(parts) == 2:
637 | provider_prefix, model_name = parts
638 |
639 | # Check if the prefix is a valid provider name
640 | # Use list comprehension for cleaner check against Provider enum values
641 | valid_providers = [p.value.lower() for p in Provider]
642 | if provider_prefix.lower() in valid_providers:
643 | return provider_prefix.lower(), model_name
644 |
645 | # No valid provider prefix found or no separator
646 | return None, model_string
647 |
648 |
649 | async def get_provider(provider_name: str, **kwargs) -> BaseProvider:
650 | """
651 | Factory function to dynamically create and initialize a provider instance by name.
652 |
653 | This function serves as the central provider instantiation mechanism in the Ultimate MCP Server,
654 | dynamically creating and initializing the appropriate provider implementation based on
655 | the requested provider name. It handles:
656 |
657 | 1. Provider name validation and normalization
658 | 2. Provider class selection based on the standardized Provider enum
659 | 3. Model string parsing to extract provider information from model identifiers
660 | 4. Configuration retrieval from the Ultimate MCP Server config system
661 | 5. Provider instance creation with appropriate parameters
662 | 6. Provider initialization and validation
663 |
664 | The function supports specifying provider names directly or extracting them from
665 | model identifiers that include provider prefixes (e.g., "openai/gpt-4o"). This flexibility
666 | allows for more intuitive access to providers when working with specific models.
667 |
668 | Args:
669 | provider_name: Provider identifier to instantiate. This should match one of the
670 | values in the Provider enum (case-insensitive). Examples include
671 | "openai", "anthropic", "gemini", etc.
672 | **kwargs: Additional provider-specific configuration options to pass to the
673 | provider's constructor. Common options include:
674 | - api_key: Override the API key from configuration
675 | - model: Model name to use (may include provider prefix)
676 | - base_url: Alternative API endpoint URL
677 | - organization: Organization ID for providers that support it
678 |
679 | Returns:
680 | An initialized provider instance ready for use. The specific return type will
681 | be a subclass of BaseProvider corresponding to the requested provider.
682 |
683 | Raises:
684 | ValueError: If the provider name is invalid or initialization fails. This ensures
685 | that only fully functional provider instances are returned.
686 |
687 | Example usage:
688 | ```python
689 | # Basic usage with direct provider name
690 | openai_provider = await get_provider("openai")
691 |
692 | # Using a model string with provider prefix
693 | provider = await get_provider("openai", model="anthropic/claude-3-opus")
694 | # The above actually returns an AnthropicProvider because the model string
695 | # overrides the provider_name parameter
696 |
697 | # With additional configuration
698 | custom_provider = await get_provider(
699 | "openai",
700 | api_key="custom-key",
701 | base_url="https://custom-endpoint.example.com/v1",
702 | model="gpt-4o"
703 | )
704 | ```
705 | """
706 | cfg = get_config()
707 | provider_name = provider_name.lower().strip()
708 |
709 | # If a model was provided, check if it has a provider prefix
710 | # This helps with models like "openai/gpt-4.1-mini" to ensure they go to the right provider
711 | if 'model' in kwargs and isinstance(kwargs['model'], str):
712 | extracted_provider, extracted_model = parse_model_string(kwargs['model'])
713 | if extracted_provider:
714 | # If we have a provider prefix in the model string, use that provider
715 | # and update the model name to remove the prefix
716 | provider_name = extracted_provider
717 | kwargs['model'] = extracted_model
718 | logger.debug(f"Extracted provider '{provider_name}' and model '{extracted_model}' from model string")
719 |
720 | from ultimate_mcp_server.core.providers.anthropic import AnthropicProvider
721 | from ultimate_mcp_server.core.providers.deepseek import DeepSeekProvider
722 | from ultimate_mcp_server.core.providers.gemini import GeminiProvider
723 | from ultimate_mcp_server.core.providers.grok import GrokProvider
724 | from ultimate_mcp_server.core.providers.ollama import OllamaProvider
725 | from ultimate_mcp_server.core.providers.openai import OpenAIProvider
726 | from ultimate_mcp_server.core.providers.openrouter import OpenRouterProvider
727 |
728 | providers = {
729 | Provider.OPENAI: OpenAIProvider,
730 | Provider.ANTHROPIC: AnthropicProvider,
731 | Provider.DEEPSEEK: DeepSeekProvider,
732 | Provider.GEMINI: GeminiProvider,
733 | Provider.OPENROUTER: OpenRouterProvider,
734 | Provider.GROK: GrokProvider,
735 | Provider.OLLAMA: OllamaProvider,
736 | }
737 |
738 | if provider_name not in providers:
739 | raise ValueError(f"Invalid provider name: {provider_name}")
740 |
741 | # Get the top-level 'providers' config object, default to None if it doesn't exist
742 | providers_config = getattr(cfg, 'providers', None)
743 |
744 | # Get the specific provider config (e.g., providers_config.openai) from the providers_config object
745 | # Default to None if providers_config is None or the specific provider attr doesn't exist
746 | provider_cfg = getattr(providers_config, provider_name, None) if providers_config else None
747 |
748 | # Now use provider_cfg to get the api_key if needed
749 | if 'api_key' not in kwargs and provider_cfg and hasattr(provider_cfg, 'api_key') and provider_cfg.api_key:
750 | kwargs['api_key'] = provider_cfg.api_key
751 |
752 | provider_class = providers[provider_name]
753 | instance = provider_class(**kwargs)
754 |
755 | # Initialize the provider immediately
756 | initialized = await instance.initialize()
757 | if not initialized:
758 | # Raise an error if initialization fails to prevent returning an unusable instance
759 | raise ValueError(f"Failed to initialize provider: {provider_name}")
760 |
761 | return instance
```
--------------------------------------------------------------------------------
/ultimate_mcp_server/tools/sentiment_analysis.py:
--------------------------------------------------------------------------------
```python
1 | """Business-focused sentiment analysis tools for Ultimate MCP Server."""
2 | import json
3 | import time
4 | from typing import Any, Dict, List, Optional
5 |
6 | from ultimate_mcp_server.constants import Provider, TaskType
7 | from ultimate_mcp_server.exceptions import ProviderError, ToolError, ToolInputError
8 | from ultimate_mcp_server.tools.base import with_error_handling, with_tool_metrics
9 | from ultimate_mcp_server.tools.completion import generate_completion
10 | from ultimate_mcp_server.utils import get_logger
11 |
12 | logger = get_logger("ultimate_mcp_server.tools.business_sentiment")
13 |
14 | @with_tool_metrics
15 | @with_error_handling
16 | async def analyze_business_sentiment(
17 | text: str,
18 | industry: Optional[str] = None,
19 | analysis_mode: str = "standard",
20 | entity_extraction: bool = False,
21 | aspect_based: bool = False,
22 | competitive_analysis: bool = False,
23 | intent_detection: bool = False,
24 | risk_assessment: bool = False,
25 | language: str = "english",
26 | provider: str = Provider.OPENAI.value,
27 | model: Optional[str] = None,
28 | threshold_config: Optional[Dict[str, float]] = None
29 | ) -> Dict[str, Any]:
30 | """Performs comprehensive business-oriented sentiment analysis for commercial applications.
31 |
32 | This enterprise-grade tool analyzes customer feedback, reviews, support tickets, survey responses,
33 | social media mentions and other business text data to extract actionable insights. It provides
34 | customizable analysis for specific industries and use cases with options for deep-dive analysis.
35 |
36 | Args:
37 | text: The business text to analyze (feedback, review, survey response, etc.).
38 | industry: Optional industry context to tailor analysis (e.g., "retail", "financial_services",
39 | "healthcare", "hospitality", "technology", "telecommunications", "manufacturing").
40 | Improves accuracy by applying industry-specific terminology and benchmarks.
41 | analysis_mode: Type of analysis to perform:
42 | - "standard": Basic business sentiment with key indicators
43 | - "comprehensive": Detailed analysis with all available metrics
44 | - "customer_experience": Focus on satisfaction, loyalty, and effort
45 | - "product_feedback": Focus on feature sentiment and product improvement
46 | - "brand_perception": Focus on brand attributes and competitive positioning
47 | - "support_ticket": Optimized for support ticket prioritization and resolution
48 | - "sales_opportunity": Focus on purchase intent and sales readiness
49 | entity_extraction: Whether to identify and extract mentioned products, services, features,
50 | and business entities. Useful for pinpointing what customers are discussing.
51 | aspect_based: Whether to break down sentiment by specific aspects/features mentioned.
52 | Helps identify which specific product/service elements drive sentiment.
53 | competitive_analysis: Whether to identify competitor mentions and comparative sentiment.
54 | Useful for competitive intelligence and benchmarking.
55 | intent_detection: Whether to detect customer intents (e.g., purchase interest, cancellation
56 | risk, support request, recommendation intent, complaint, praise).
57 | risk_assessment: Whether to evaluate potential business risks (e.g., churn risk, PR risk,
58 | legal/compliance issues, potential escalation) based on the text.
59 | language: Language of the input text (supports multiple languages for global businesses).
60 | provider: The name of the LLM provider (e.g., "openai", "anthropic", "gemini").
61 | model: The specific model ID. If None, uses the provider's default model.
62 | threshold_config: Optional dictionary of threshold values for various metrics to customize
63 | sensitivity levels (e.g., {"churn_risk": 0.7, "urgency": 0.8}).
64 |
65 | Returns:
66 | A dictionary containing comprehensive business sentiment analysis:
67 | {
68 | "core_metrics": {
69 | "primary_sentiment": "positive", # Overall business sentiment
70 | "sentiment_score": 0.75, # Normalized score (-1.0 to 1.0)
71 | "confidence": 0.92, # Confidence in the assessment
72 | "satisfaction_score": 4.2, # Estimated satisfaction (1-5 scale)
73 | "nps_category": "promoter", # Predicted NPS category: detractor/passive/promoter
74 | "urgency": "low", # Action urgency assessment: low/medium/high/critical
75 | "actionability": 0.35 # How actionable the feedback is (0.0-1.0)
76 | },
77 | "business_dimensions": { # Business-specific metrics
78 | "customer_satisfaction": 0.82, # Satisfaction indicator (0.0-1.0)
79 | "product_sentiment": 0.75, # Product sentiment (0.0-1.0)
80 | "value_perception": 0.68, # Price-to-value perception (0.0-1.0)
81 | "ease_of_use": 0.90, # Usability perception when relevant (0.0-1.0)
82 | "customer_effort_score": 2.1, # Estimated CES (1-7 scale, lower is better)
83 | "loyalty_indicators": 0.85, # Loyalty/retention indicators (0.0-1.0)
84 | "recommendation_likelihood": 0.87 # Likelihood to recommend (0.0-1.0)
85 | },
86 | "intent_analysis": { # Only if intent_detection=True
87 | "purchase_intent": 0.15, # Purchase interest level (0.0-1.0)
88 | "churn_risk": 0.08, # Risk of customer churn (0.0-1.0)
89 | "support_needed": 0.75, # Likelihood customer needs support (0.0-1.0)
90 | "feedback_type": "suggestion", # Type: complaint/praise/question/suggestion
91 | "information_request": false # Whether customer is requesting information
92 | },
93 | "aspect_sentiment": { # Only if aspect_based=True
94 | "product_quality": 0.85,
95 | "customer_service": 0.92,
96 | "shipping_speed": 0.45,
97 | "return_process": 0.30,
98 | "website_usability": 0.78
99 | },
100 | "entity_extraction": { # Only if entity_extraction=True
101 | "products": ["Product X Pro", "Legacy Model"],
102 | "features": ["battery life", "touchscreen responsiveness"],
103 | "services": ["customer support", "technical assistance"],
104 | "mentioned_departments": ["billing", "technical support"]
105 | },
106 | "competitive_insights": { # Only if competitive_analysis=True
107 | "competitor_mentions": ["Competitor A", "Competitor B"],
108 | "comparative_sentiment": {
109 | "Competitor A": -0.2, # Negative comparison to competitor
110 | "Competitor B": 0.3 # Positive comparison to competitor
111 | },
112 | "perceived_advantages": ["price", "features"],
113 | "perceived_disadvantages": ["support response time"]
114 | },
115 | "risk_assessment": { # Only if risk_assessment=True
116 | "churn_probability": 0.32,
117 | "response_urgency": "medium",
118 | "pr_risk": "low",
119 | "legal_compliance_flags": ["data privacy concern"],
120 | "escalation_probability": 0.15
121 | },
122 | "message_characteristics": {
123 | "key_topics": ["product quality", "customer service", "pricing"],
124 | "key_phrases": ["extremely satisfied", "quick resolution"],
125 | "tone_indicators": ["appreciative", "constructive"],
126 | "clarity": 0.9, # How clear/specific the feedback is (0.0-1.0)
127 | "subjectivity": 0.4, # Subjective vs. objective content (0.0-1.0)
128 | "emotional_intensity": 0.65 # Intensity of emotion expressed (0.0-1.0)
129 | },
130 | "industry_specific_insights": {}, # Varies based on 'industry' parameter
131 | "recommended_actions": [ # Business action recommendations
132 | "Follow up regarding mentioned technical issue",
133 | "Highlight positive experience in success stories"
134 | ],
135 | "meta": { # Metadata about the analysis
136 | "provider": "anthropic",
137 | "model": "claude-3-5-sonnet-20241022",
138 | "analysis_mode": "comprehensive",
139 | "language_detected": "english",
140 | "tokens": { "input": 350, "output": 820, "total": 1170 },
141 | "cost": 0.000843,
142 | "processing_time": 1.25,
143 | "version": "2.4.0"
144 | },
145 | "success": true
146 | }
147 |
148 | Raises:
149 | ToolInputError: If parameters are invalid or incompatible.
150 | ProviderError: If the provider is unavailable or the LLM request fails.
151 | ToolError: For other errors during processing.
152 | """
153 | start_time = time.time()
154 |
155 | # Parameter validation
156 | if not text or not isinstance(text, str):
157 | raise ToolInputError(
158 | "Input text must be a non-empty string.",
159 | param_name="text",
160 | provided_value=text
161 | )
162 |
163 | valid_analysis_modes = [
164 | "standard", "comprehensive", "customer_experience", "product_feedback",
165 | "brand_perception", "support_ticket", "sales_opportunity"
166 | ]
167 | if analysis_mode not in valid_analysis_modes:
168 | raise ToolInputError(
169 | f"Invalid analysis_mode. Must be one of: {', '.join(valid_analysis_modes)}",
170 | param_name="analysis_mode",
171 | provided_value=analysis_mode
172 | )
173 |
174 | # Construct the analysis prompt based on parameters
175 | system_prompt = _build_sentiment_system_prompt(
176 | industry=industry,
177 | analysis_mode=analysis_mode,
178 | entity_extraction=entity_extraction,
179 | aspect_based=aspect_based,
180 | competitive_analysis=competitive_analysis,
181 | intent_detection=intent_detection,
182 | risk_assessment=risk_assessment,
183 | language=language,
184 | threshold_config=threshold_config
185 | )
186 |
187 | user_prompt = f"""
188 | Analyze the following business text according to the specified parameters:
189 |
190 | Text to analyze:
191 | ```
192 | {text}
193 | ```
194 |
195 | Provide a detailed JSON response according to the format specified in the system instructions.
196 | """
197 |
198 | # Combined prompt for all providers
199 | combined_prompt = f"{system_prompt}\n\n{user_prompt}"
200 |
201 | try:
202 | # Consistently use generate_completion for all providers
203 | completion_result = await generate_completion(
204 | prompt=combined_prompt,
205 | provider=provider,
206 | model=model,
207 | temperature=0.2,
208 | max_tokens=2000,
209 | additional_params={"response_format": {"type": "json_object"}} if provider.lower() == "openai" else None
210 | )
211 |
212 | # Extract response text from the completion result
213 | response_text = completion_result["text"].strip()
214 |
215 | # Extract JSON response
216 | try:
217 | # Try to extract JSON if wrapped in code blocks
218 | if "```json" in response_text:
219 | json_start = response_text.find("```json") + 7
220 | json_end = response_text.find("```", json_start)
221 | if json_end > json_start:
222 | response_text = response_text[json_start:json_end].strip()
223 | elif "```" in response_text:
224 | json_start = response_text.find("```") + 3
225 | json_end = response_text.find("```", json_start)
226 | if json_end > json_start:
227 | response_text = response_text[json_start:json_end].strip()
228 |
229 | # Parse and validate JSON
230 | analysis_data = json.loads(response_text)
231 |
232 | # Validate minimum required fields
233 | if "core_metrics" not in analysis_data:
234 | logger.warning("Missing 'core_metrics' in response, adding empty object")
235 | analysis_data["core_metrics"] = {}
236 |
237 | # Ensure core metrics contains primary sentiment
238 | core_metrics = analysis_data["core_metrics"]
239 | if "primary_sentiment" not in core_metrics:
240 | sentiment_score = core_metrics.get("sentiment_score", 0.0)
241 | if sentiment_score > 0.2:
242 | primary_sentiment = "positive"
243 | elif sentiment_score < -0.2:
244 | primary_sentiment = "negative"
245 | else:
246 | primary_sentiment = "neutral"
247 | core_metrics["primary_sentiment"] = primary_sentiment
248 | logger.debug(f"Added missing primary_sentiment: {primary_sentiment}")
249 |
250 | # Populate metadata
251 | processing_time = time.time() - start_time
252 |
253 | # Extract provider and model info from completion result
254 | result_provider = completion_result.get("provider", provider)
255 | result_model = completion_result.get("model", model)
256 | input_tokens = completion_result.get("tokens", {}).get("input", 0)
257 | output_tokens = completion_result.get("tokens", {}).get("output", 0)
258 | total_tokens = completion_result.get("tokens", {}).get("total", 0)
259 | cost = completion_result.get("cost", 0.0)
260 |
261 | meta = {
262 | "provider": result_provider,
263 | "model": result_model,
264 | "analysis_mode": analysis_mode,
265 | "language_detected": language, # Actual detection would need more logic
266 | "tokens": {
267 | "input": input_tokens,
268 | "output": output_tokens,
269 | "total": total_tokens,
270 | },
271 | "cost": cost,
272 | "processing_time": processing_time,
273 | "version": "2.4.0" # Tool version
274 | }
275 |
276 | # Include metadata in the final response
277 | analysis_data["meta"] = meta
278 | analysis_data["success"] = True
279 |
280 | # Log successful completion
281 | logger.success(
282 | f"Business sentiment analysis completed successfully with {result_provider}/{result_model}",
283 | emoji_key=TaskType.CLASSIFICATION.value,
284 | analysis_mode=analysis_mode,
285 | sentiment=core_metrics.get("primary_sentiment", "unknown"),
286 | tokens={
287 | "input": input_tokens,
288 | "output": output_tokens
289 | },
290 | cost=cost,
291 | time=processing_time
292 | )
293 |
294 | return analysis_data
295 |
296 | except json.JSONDecodeError as e:
297 | logger.error(
298 | f"Failed to parse JSON response: {e}",
299 | emoji_key="error",
300 | raw_response=response_text[:500] # Log partial response for debugging
301 | )
302 | raise ToolError(
303 | f"Failed to parse business sentiment analysis response: {e}",
304 | error_code="invalid_response_format",
305 | details={"raw_response": response_text[:500]}
306 | ) from e
307 |
308 | except Exception as e:
309 | raise ProviderError(
310 | f"Business sentiment analysis failed: {str(e)}",
311 | provider=provider,
312 | model=model,
313 | cause=e
314 | ) from e
315 |
316 |
317 | def _build_sentiment_system_prompt(
318 | industry: Optional[str],
319 | analysis_mode: str,
320 | entity_extraction: bool,
321 | aspect_based: bool,
322 | competitive_analysis: bool,
323 | intent_detection: bool,
324 | risk_assessment: bool,
325 | language: str,
326 | threshold_config: Optional[Dict[str, float]]
327 | ) -> str:
328 | """Builds a comprehensive system prompt for business sentiment analysis based on parameters."""
329 |
330 | # Base prompt with core instructions
331 | base_prompt = """
332 | You are an enterprise-grade business sentiment analysis system designed to extract actionable insights from customer and stakeholder feedback. Your analysis should be precise, nuanced, and tailored to business decision-making.
333 |
334 | Provide analysis in a structured JSON format with the following core sections:
335 | 1. core_metrics: Essential sentiment indicators
336 | 2. business_dimensions: Business-specific metrics like satisfaction and loyalty
337 | 3. message_characteristics: Content properties like topics and tone
338 |
339 | All numerical scores should be consistent (higher is better unless otherwise specified) and normalized within their specified ranges.
340 | """
341 |
342 | # Industry-specific tailoring
343 | industry_prompt = ""
344 | if industry:
345 | industry_mappings = {
346 | "retail": "Retail and e-commerce context: Focus on product quality, shopping experience, delivery, returns, and customer service. Include retail-specific metrics like purchase satisfaction and repeat purchase intent.",
347 |
348 | "financial_services": "Financial services context: Focus on trust, security, transparency, and service quality. Include financial-specific metrics like perceived financial benefit, trust indicator, and financial confidence impact.",
349 |
350 | "healthcare": "Healthcare context: Focus on care quality, staff interactions, facility experience, and outcomes. Include healthcare-specific metrics like perceived care quality, staff empathy, and outcome satisfaction.",
351 |
352 | "hospitality": "Hospitality context: Focus on accommodations, amenities, staff service, and overall experience. Include hospitality-specific metrics like comfort rating, staff attentiveness, and value perception.",
353 |
354 | "technology": "Technology/SaaS context: Focus on software/product functionality, reliability, ease of use, and technical support. Include tech-specific metrics like feature satisfaction, reliability perception, and technical resolution satisfaction.",
355 |
356 | "telecommunications": "Telecommunications context: Focus on service reliability, coverage, customer support, and value. Include telecom-specific metrics like service reliability rating, coverage satisfaction, and value perception.",
357 |
358 | "manufacturing": "Manufacturing context: Focus on product quality, durability, specifications adherence, and support. Include manufacturing-specific metrics like quality rating, durability perception, and technical specification satisfaction."
359 | }
360 |
361 | if industry.lower() in industry_mappings:
362 | industry_prompt = f"\nINDUSTRY CONTEXT: {industry_mappings[industry.lower()]}\n"
363 | industry_prompt += "\nInclude an 'industry_specific_insights' section with metrics and insights specific to this industry."
364 | else:
365 | industry_prompt = f"\nINDUSTRY CONTEXT: {industry} - Apply industry-specific terminology and standards.\n"
366 |
367 | # Analysis mode specification
368 | mode_prompt = "\nANALYSIS MODE: "
369 | if analysis_mode == "standard":
370 | mode_prompt += "Standard business sentiment with core metrics and key indicators."
371 | elif analysis_mode == "comprehensive":
372 | mode_prompt += "Comprehensive analysis with all available metrics and maximum detail."
373 | elif analysis_mode == "customer_experience":
374 | mode_prompt += "Customer experience focus: Emphasize satisfaction, loyalty, and effort metrics. Pay special attention to service interactions, pain points, and moments of delight."
375 | elif analysis_mode == "product_feedback":
376 | mode_prompt += "Product feedback focus: Emphasize feature sentiment, product quality, and improvement suggestions. Identify specific product components mentioned and their sentiment."
377 | elif analysis_mode == "brand_perception":
378 | mode_prompt += "Brand perception focus: Emphasize brand attributes, positioning, and emotional connections. Analyze brand promise fulfillment and competitive positioning."
379 | elif analysis_mode == "support_ticket":
380 | mode_prompt += "Support ticket focus: Emphasize issue categorization, severity, urgency, and resolution path. Detect technical terms and problem indicators."
381 | elif analysis_mode == "sales_opportunity":
382 | mode_prompt += "Sales opportunity focus: Emphasize purchase intent, objections, and decision factors. Analyze buying signals and sales readiness indicators."
383 |
384 | # Optional analysis components
385 | optional_components = []
386 |
387 | if entity_extraction:
388 | optional_components.append("""
389 | ENTITY EXTRACTION: Extract and categorize business entities mentioned in the text.
390 | - Include an 'entity_extraction' section with arrays of identified products, features, services, departments, locations, etc.
391 | - Normalize entity names when variations of the same entity are mentioned.
392 | - Exclude generic mentions and focus on specific named entities.
393 | """)
394 |
395 | if aspect_based:
396 | optional_components.append("""
397 | ASPECT-BASED SENTIMENT: Break down sentiment by specific aspects or features mentioned.
398 | - Include an 'aspect_sentiment' section with sentiment scores for each identified aspect.
399 | - Aspects should be specific (e.g., 'website_usability', 'checkout_process', 'product_quality').
400 | - Only include aspects explicitly mentioned or strongly implied in the text.
401 | - Score each aspect from -1.0 (extremely negative) to 1.0 (extremely positive).
402 | """)
403 |
404 | if competitive_analysis:
405 | optional_components.append("""
406 | COMPETITIVE ANALYSIS: Identify and analyze competitor mentions and comparisons.
407 | - Include a 'competitive_insights' section with competitor names and comparative sentiment.
408 | - Capture explicit and implicit comparisons to competitors.
409 | - Identify perceived advantages and disadvantages relative to competitors.
410 | - Score comparative sentiment from -1.0 (negative comparison) to 1.0 (positive comparison).
411 | """)
412 |
413 | if intent_detection:
414 | optional_components.append("""
415 | INTENT DETECTION: Identify customer intentions and likely next actions.
416 | - Include an 'intent_analysis' section with probabilities for purchase intent, churn risk, etc.
417 | - Classify the feedback type (complaint, praise, question, suggestion).
418 | - Detect specific intents like information requests, cancellation warnings, escalation threats.
419 | - Score intent probabilities from 0.0 (no indication) to 1.0 (strong indication).
420 | """)
421 |
422 | if risk_assessment:
423 | optional_components.append("""
424 | RISK ASSESSMENT: Evaluate potential business risks in the feedback.
425 | - Include a 'risk_assessment' section with probabilities and categories of identified risks.
426 | - Assess churn probability, PR/reputation risk, legal/compliance concerns, etc.
427 | - Provide an escalation probability and urgency level.
428 | - Flag sensitive content that may require special attention.
429 | """)
430 |
431 | # Language specification
432 | language_prompt = f"\nLANGUAGE: Analyze text in {language}. Ensure all scores and categorizations are correctly interpreted within cultural and linguistic context."
433 |
434 | # Threshold configurations if provided
435 | threshold_prompt = ""
436 | if threshold_config and isinstance(threshold_config, dict):
437 | threshold_prompt = "\nTHRESHOLD CONFIGURATION:"
438 | for metric, value in threshold_config.items():
439 | threshold_prompt += f"\n- {metric}: {value}"
440 |
441 | # Combine all prompt components
442 | full_prompt = base_prompt + industry_prompt + mode_prompt + language_prompt + threshold_prompt
443 |
444 | if optional_components:
445 | full_prompt += "\n\nADDITIONAL ANALYSIS COMPONENTS:"
446 | full_prompt += "\n".join(optional_components)
447 |
448 | # Output format specification
449 | output_format = """
450 | RESPONSE FORMAT: Respond only with a valid JSON object containing all applicable sections based on the analysis parameters.
451 |
452 | Always include these core sections:
453 | - core_metrics: Overall sentiment, scores, and primary indicators
454 | - business_dimensions: Business-specific satisfaction and perception metrics
455 | - message_characteristics: Content properties, topics, and expression styles
456 | - recommended_actions: 1-3 specific business actions based on the analysis
457 | - meta: Will be populated with metadata about the analysis
458 |
459 | Add optional sections as specified by the analysis parameters.
460 |
461 | Ensure all numerical values are normalized to their specified ranges and all categorical values use consistent terminology.
462 | """
463 |
464 | full_prompt += output_format
465 |
466 | return full_prompt
467 |
468 |
469 | @with_tool_metrics
470 | @with_error_handling
471 | async def analyze_business_text_batch(
472 | texts: List[str],
473 | analysis_config: Dict[str, Any],
474 | aggregate_results: bool = True,
475 | max_concurrency: int = 3,
476 | provider: str = Provider.OPENAI.value,
477 | model: Optional[str] = None
478 | ) -> Dict[str, Any]:
479 | """Processes a batch of business texts for sentiment analysis with aggregated insights.
480 |
481 | Designed for analyzing large volumes of business feedback (reviews, surveys, tickets)
482 | efficiently with detailed individual analyses and optional aggregated metrics. Ideal for
483 | business intelligence, customer experience programs, and trend identification.
484 |
485 | Args:
486 | texts: List of text items to analyze (reviews, feedback, etc.).
487 | analysis_config: Configuration dictionary for analyze_business_sentiment.
488 | Example: {"analysis_mode": "standard", "entity_extraction": True}
489 | All parameters from analyze_business_sentiment except text, provider, model.
490 | aggregate_results: Whether to generate aggregated insights across all analyzed texts.
491 | Includes trend detection, sentiment distribution, and pattern identification.
492 | max_concurrency: Maximum number of parallel analyses to run.
493 | provider: The name of the LLM provider to use.
494 | model: The specific model ID. If None, uses the provider's default.
495 |
496 | Returns:
497 | A dictionary containing individual and aggregated results:
498 | {
499 | "individual_results": [
500 | {
501 | "text_id": 0,
502 | "text_preview": "First 50 characters of text...",
503 | "analysis": { /* Complete analysis result for this text */ }
504 | },
505 | // Additional individual results...
506 | ],
507 | "aggregate_insights": { // Only if aggregate_results=True
508 | "sentiment_distribution": {
509 | "positive": 0.65, // 65% positive
510 | "neutral": 0.20, // 20% neutral
511 | "negative": 0.15 // 15% negative
512 | },
513 | "average_metrics": {
514 | "sentiment_score": 0.42,
515 | "satisfaction_score": 3.8,
516 | // Other averaged metrics...
517 | },
518 | "top_aspects": [
519 | {"name": "customer_service", "avg_sentiment": 0.75, "mention_count": 42},
520 | {"name": "product_quality", "avg_sentiment": 0.62, "mention_count": 38},
521 | // Additional aspects...
522 | ],
523 | "key_topics": [
524 | {"topic": "shipping delays", "mention_count": 35, "avg_sentiment": -0.3},
525 | {"topic": "easy checkout", "mention_count": 28, "avg_sentiment": 0.8},
526 | // Additional topics...
527 | ],
528 | "entity_mention_frequencies": {
529 | "products": {"Product X": 45, "Product Y": 23},
530 | "features": {"user interface": 38, "reliability": 27}
531 | },
532 | "emerging_patterns": [
533 | "Increasing mentions of mobile app usability",
534 | "Growing negative sentiment about recent policy change"
535 | ],
536 | "risk_indicators": [
537 | {"issue": "shipping delays", "severity": "medium", "trend": "increasing"},
538 | {"issue": "billing confusion", "severity": "low", "trend": "stable"}
539 | ]
540 | },
541 | "meta": {
542 | "batch_size": 250,
543 | "success_count": 248,
544 | "error_count": 2,
545 | "processing_time": 128.5,
546 | "total_cost": 4.87,
547 | "timestamp": "2025-04-21T14:30:00Z"
548 | },
549 | "success": true
550 | }
551 |
552 | Raises:
553 | ToolInputError: If input parameters are invalid.
554 | ProviderError: If the provider service fails.
555 | ToolError: For other processing errors.
556 | """
557 | start_time = time.time()
558 | total_cost = 0.0
559 | success_count = 0
560 | error_count = 0
561 |
562 | # Validate inputs
563 | if not texts or not isinstance(texts, list):
564 | raise ToolInputError(
565 | "The 'texts' parameter must be a non-empty list of strings.",
566 | param_name="texts",
567 | provided_value=texts
568 | )
569 |
570 | if not analysis_config or not isinstance(analysis_config, dict):
571 | raise ToolInputError(
572 | "The 'analysis_config' parameter must be a dictionary of configuration options.",
573 | param_name="analysis_config",
574 | provided_value=analysis_config
575 | )
576 |
577 | # Process texts with concurrency control
578 | import asyncio
579 | semaphore = asyncio.Semaphore(max_concurrency)
580 | individual_results = []
581 | all_analyses = []
582 |
583 | async def process_text(idx: int, text: str):
584 | nonlocal total_cost, success_count, error_count
585 |
586 | async with semaphore:
587 | text_preview = text[:50] + ("..." if len(text) > 50 else "")
588 | logger.debug(f"Processing text {idx+1}/{len(texts)}: {text_preview}")
589 |
590 | try:
591 | # Create a copy of analysis_config to avoid modifying the original
592 | config = analysis_config.copy()
593 |
594 | # Add provider and model to config
595 | config["provider"] = provider
596 | config["model"] = model
597 |
598 | # Process the individual text using our refactored analyze_business_sentiment
599 | result = await analyze_business_sentiment(
600 | text=text,
601 | **config
602 | )
603 |
604 | # Update metrics
605 | total_cost += result.get("meta", {}).get("cost", 0.0)
606 | success_count += 1
607 |
608 | # Record result
609 | individual_results.append({
610 | "text_id": idx,
611 | "text_preview": text_preview,
612 | "analysis": result
613 | })
614 |
615 | # Store for aggregation
616 | all_analyses.append(result)
617 |
618 | return result
619 |
620 | except Exception as e:
621 | logger.error(f"Error analyzing text {idx}: {str(e)}", exc_info=True)
622 | error_count += 1
623 |
624 | # Record error
625 | individual_results.append({
626 | "text_id": idx,
627 | "text_preview": text_preview,
628 | "error": str(e)
629 | })
630 |
631 | return None
632 |
633 | # Create and run tasks
634 | tasks = [process_text(i, text) for i, text in enumerate(texts)]
635 | await asyncio.gather(*tasks)
636 |
637 | # Sort results by text_id to maintain original order
638 | individual_results.sort(key=lambda x: x["text_id"])
639 |
640 | # Build response
641 | result = {
642 | "individual_results": individual_results,
643 | "meta": {
644 | "batch_size": len(texts),
645 | "success_count": success_count,
646 | "error_count": error_count,
647 | "processing_time": time.time() - start_time,
648 | "total_cost": total_cost,
649 | "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
650 | },
651 | "success": True
652 | }
653 |
654 | # Calculate aggregate insights if requested and we have successful analyses
655 | if aggregate_results and all_analyses:
656 | try:
657 | aggregate_insights = _calculate_aggregate_insights(all_analyses)
658 | result["aggregate_insights"] = aggregate_insights
659 | except Exception as e:
660 | logger.error(f"Error calculating aggregate insights: {str(e)}", exc_info=True)
661 | result["aggregate_insights_error"] = str(e)
662 |
663 | return result
664 |
665 |
666 | def _calculate_aggregate_insights(analyses: List[Dict[str, Any]]) -> Dict[str, Any]:
667 | """Calculates aggregate insights across multiple business sentiment analyses."""
668 |
669 | # Initialize aggregation containers
670 | sentiment_counts = {"positive": 0, "neutral": 0, "negative": 0}
671 | sentiment_scores = []
672 | satisfaction_scores = []
673 | loyalty_indicators = []
674 | aspect_sentiments = {}
675 | topics = {}
676 | mentioned_entities = {
677 | "products": {},
678 | "features": {},
679 | "services": {}
680 | }
681 |
682 | # Process each analysis
683 | for analysis in analyses:
684 | # Skip any analyses without core_metrics
685 | if "core_metrics" not in analysis:
686 | continue
687 |
688 | core = analysis.get("core_metrics", {})
689 | business = analysis.get("business_dimensions", {})
690 |
691 | # Sentiment distribution
692 | sentiment = core.get("primary_sentiment", "neutral").lower()
693 | if sentiment in sentiment_counts:
694 | sentiment_counts[sentiment] += 1
695 |
696 | # Collect numerical metrics
697 | if "sentiment_score" in core:
698 | sentiment_scores.append(core["sentiment_score"])
699 |
700 | if "satisfaction_score" in business:
701 | satisfaction_scores.append(business["satisfaction_score"])
702 |
703 | if "loyalty_indicators" in business:
704 | loyalty_indicators.append(business["loyalty_indicators"])
705 |
706 | # Aspect sentiments
707 | for aspect, score in analysis.get("aspect_sentiment", {}).items():
708 | if aspect not in aspect_sentiments:
709 | aspect_sentiments[aspect] = {"scores": [], "count": 0}
710 |
711 | aspect_sentiments[aspect]["scores"].append(score)
712 | aspect_sentiments[aspect]["count"] += 1
713 |
714 | # Topics
715 | for topic in analysis.get("message_characteristics", {}).get("key_topics", []):
716 | if topic not in topics:
717 | topics[topic] = 0
718 | topics[topic] += 1
719 |
720 | # Entity mentions
721 | for entity_type, entities in analysis.get("entity_extraction", {}).items():
722 | if entity_type in mentioned_entities and isinstance(entities, list):
723 | for entity in entities:
724 | if entity not in mentioned_entities[entity_type]:
725 | mentioned_entities[entity_type][entity] = 0
726 | mentioned_entities[entity_type][entity] += 1
727 |
728 | # Calculate distributions as percentages
729 | total_sentiments = sum(sentiment_counts.values())
730 | sentiment_distribution = {
731 | k: round(v / total_sentiments, 2) if total_sentiments else 0
732 | for k, v in sentiment_counts.items()
733 | }
734 |
735 | # Calculate average metrics
736 | average_metrics = {}
737 | if sentiment_scores:
738 | average_metrics["sentiment_score"] = sum(sentiment_scores) / len(sentiment_scores)
739 |
740 | if satisfaction_scores:
741 | average_metrics["satisfaction_score"] = sum(satisfaction_scores) / len(satisfaction_scores)
742 |
743 | if loyalty_indicators:
744 | average_metrics["loyalty_indicators"] = sum(loyalty_indicators) / len(loyalty_indicators)
745 |
746 | # Process aspect sentiments
747 | top_aspects = []
748 | for aspect, data in aspect_sentiments.items():
749 | avg_sentiment = sum(data["scores"]) / len(data["scores"]) if data["scores"] else 0
750 | top_aspects.append({
751 | "name": aspect,
752 | "avg_sentiment": round(avg_sentiment, 2),
753 | "mention_count": data["count"]
754 | })
755 |
756 | # Sort aspects by mention count
757 | top_aspects.sort(key=lambda x: x["mention_count"], reverse=True)
758 |
759 | # Process topics
760 | key_topics = [{"topic": k, "mention_count": v} for k, v in topics.items()]
761 | key_topics.sort(key=lambda x: x["mention_count"], reverse=True)
762 |
763 | # Build aggregated insights
764 | aggregate_insights = {
765 | "sentiment_distribution": sentiment_distribution,
766 | "average_metrics": average_metrics,
767 | "top_aspects": top_aspects[:10], # Limit to top 10
768 | "key_topics": key_topics[:10], # Limit to top 10
769 | "entity_mention_frequencies": mentioned_entities
770 | }
771 |
772 | return aggregate_insights
```
--------------------------------------------------------------------------------
/ultimate_mcp_server/core/tournaments/tasks.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Tournament task implementations for asynchronous tournament execution.
3 | """
4 | # Standard Library Imports
5 | import asyncio
6 | import random
7 | import re
8 | import time
9 | from datetime import datetime, timezone
10 | from pathlib import Path
11 | from typing import Any, Dict, Optional
12 |
13 | from ultimate_mcp_server.core.evaluation.base import EvaluationScore
14 | from ultimate_mcp_server.core.models.tournament import (
15 | ModelConfig,
16 | ModelResponseData,
17 | TournamentData,
18 | TournamentRoundResult,
19 | TournamentStatus,
20 | )
21 | from ultimate_mcp_server.core.tournaments.manager import tournament_manager
22 | from ultimate_mcp_server.core.tournaments.utils import (
23 | calculate_weighted_score,
24 | create_round_prompt,
25 | extract_thinking,
26 | generate_comparison_file_content,
27 | generate_leaderboard_file_content,
28 | save_model_response_content,
29 | update_overall_best_response,
30 | )
31 | from ultimate_mcp_server.tools.completion import generate_completion
32 | from ultimate_mcp_server.tools.extraction import extract_code_from_response
33 | from ultimate_mcp_server.utils.logging import get_logger
34 |
35 | logger = get_logger("ultimate_mcp_server.tournaments.tasks")
36 |
37 | # --- Global semaphore for concurrent model calls ---
38 | MODEL_CALL_SEMAPHORE: Optional[asyncio.Semaphore] = None
39 |
40 | def initialize_semaphore(max_concurrent_calls: int):
41 | global MODEL_CALL_SEMAPHORE
42 | MODEL_CALL_SEMAPHORE = asyncio.Semaphore(max_concurrent_calls)
43 | logger.info(f"Tournament task semaphore initialized with concurrency: {max_concurrent_calls}")
44 |
45 | async def run_tournament_async(tournament_id: str):
46 | """Main async task to orchestrate the entire tournament."""
47 | await asyncio.sleep(0.1) # Small delay for state propagation
48 |
49 | tournament = tournament_manager.get_tournament(tournament_id, force_reload=True)
50 | if not tournament:
51 | logger.error(f"[TASK_ERROR] Tournament {tournament_id} not found for execution.")
52 | return
53 |
54 | if tournament.status != TournamentStatus.RUNNING: # Check if it was set to RUNNING
55 | logger.warning(f"[TASK_WARN] Tournament {tournament_id} not in RUNNING state ({tournament.status}). Aborting task.")
56 | return
57 |
58 | # --- Initialize semaphore based on tournament config ---
59 | if MODEL_CALL_SEMAPHORE is None or MODEL_CALL_SEMAPHORE._value != tournament.config.max_concurrent_model_calls:
60 | initialize_semaphore(tournament.config.max_concurrent_model_calls)
61 |
62 | logger.info(f"[TASK_START] Starting execution for tournament '{tournament.name}' (ID: {tournament_id})")
63 |
64 | try:
65 | if tournament.current_round < 0: # If just started
66 | tournament.current_round = 0
67 |
68 | while tournament.current_round < tournament.config.rounds:
69 | # --- Check for cancellation before starting a round ---
70 | current_tournament_state = tournament_manager.get_tournament(tournament_id, force_reload=True)
71 | if not current_tournament_state or current_tournament_state.status == TournamentStatus.CANCELLED:
72 | logger.info(f"[TASK_CANCEL] Tournament {tournament_id} cancelled. Halting execution.")
73 | if current_tournament_state and current_tournament_state.status != TournamentStatus.CANCELLED: # Ensure it's marked
74 | tournament_manager.update_tournament_status(tournament_id, TournamentStatus.CANCELLED, "Cancelled during execution.")
75 | return
76 |
77 | round_num = tournament.current_round
78 | logger.info(f"[ROUND_START] Processing Round {round_num}/{tournament.config.rounds -1 } for '{tournament.name}'")
79 |
80 | round_result_obj = tournament.rounds_results[round_num] # Assumes initialized by manager
81 | round_result_obj.status = TournamentStatus.RUNNING
82 | round_result_obj.start_time = datetime.now(timezone.utc)
83 | tournament_manager._save_tournament_state(tournament)
84 |
85 | await process_single_round(tournament, round_num, round_result_obj)
86 |
87 | round_result_obj.status = TournamentStatus.COMPLETED # Mark round as completed
88 | round_result_obj.end_time = datetime.now(timezone.utc)
89 | tournament_manager._save_tournament_state(tournament)
90 | logger.info(f"[ROUND_END] Round {round_num} for '{tournament.name}' completed.")
91 |
92 | # --- Update overall best response after each round ---
93 | update_overall_best_response(tournament) # Utility function to find and set best
94 | tournament_manager._save_tournament_state(tournament)
95 |
96 | tournament.current_round += 1
97 | tournament_manager._save_tournament_state(tournament) # Save progress
98 |
99 | tournament.status = TournamentStatus.COMPLETED
100 | logger.info(f"[TASK_COMPLETE] Tournament '{tournament.name}' (ID: {tournament_id}) completed successfully.")
101 |
102 | except Exception as e:
103 | logger.error(f"[TASK_FAILURE] Tournament '{tournament.name}' failed: {e}", exc_info=True)
104 | tournament.status = TournamentStatus.FAILED
105 | tournament.error_message = str(e)
106 | finally:
107 | tournament.end_time = datetime.now(timezone.utc)
108 | tournament_manager._save_tournament_state(tournament)
109 | logger.info(f"Final state saved for tournament {tournament_id}. Status: {tournament.status}")
110 |
111 |
112 | async def process_single_round(tournament: TournamentData, round_num: int, round_result_obj: TournamentRoundResult):
113 | """Processes all model variants for a single round."""
114 |
115 | # Determine previous responses for synthesis rounds > 0
116 | previous_round_variant_responses: Dict[str, ModelResponseData] = {}
117 | if round_num > 0:
118 | prev_round_idx = round_num - 1
119 | if prev_round_idx < len(tournament.rounds_results):
120 | previous_round_result = tournament.rounds_results[prev_round_idx]
121 | previous_round_variant_responses = previous_round_result.responses # These are already ModelResponseData objects
122 | else:
123 | logger.warning(f"Could not find previous round {prev_round_idx} data for round {round_num}. Proceeding without it.")
124 |
125 | tasks = []
126 | for model_cfg in tournament.config.models:
127 | for i in range(model_cfg.diversity_count):
128 | variant_id = f"{model_cfg.model_id}/v{i}"
129 |
130 | # --- Check for cancellation before each model task ---
131 | current_tournament_state = tournament_manager.get_tournament(tournament.tournament_id, force_reload=True)
132 | if not current_tournament_state or current_tournament_state.status == TournamentStatus.CANCELLED:
133 | logger.info(f"[MODEL_TASK_CANCEL] Cancellation detected for tournament {tournament.tournament_id}. Skipping variant {variant_id}.")
134 | continue # Skip remaining tasks in this round
135 |
136 | # Skip if already processed (e.g., resuming a failed round)
137 | if variant_id in round_result_obj.responses and round_result_obj.responses[variant_id].response_text:
138 | logger.info(f"Variant {variant_id} for round {round_num} already processed. Skipping.")
139 | continue
140 |
141 | tasks.append(
142 | process_single_model_variant(
143 | tournament,
144 | model_cfg,
145 | variant_id, # Pass the unique variant ID
146 | round_num,
147 | round_result_obj,
148 | previous_round_variant_responses # Pass full ModelResponseData dict
149 | )
150 | )
151 |
152 | if not tasks:
153 | logger.info(f"No new model variants to process for round {round_num}.")
154 | round_result_obj.status = TournamentStatus.COMPLETED
155 | return
156 |
157 | logger.info(f"Gathering {len(tasks)} model variant tasks for round {round_num}.")
158 | # Await all tasks and catch any unhandled exceptions
159 | try:
160 | await asyncio.gather(*tasks)
161 | except Exception as e:
162 | logger.error(f"An error occurred during asyncio.gather in round {round_num}: {e}", exc_info=True)
163 | round_result_obj.error_message = (getattr(round_result_obj, 'error_message', '') or "") + f"; Error during task gathering: {str(e)}"
164 | round_result_obj.status = TournamentStatus.FAILED # Mark round as failed if gather fails
165 | # Individual task errors are handled within process_single_model_variant
166 | return
167 |
168 | # --- Generate comparison and leaderboard files ---
169 | # (Ensure these utils exist and are updated)
170 | comparison_md = generate_comparison_file_content(tournament, round_num)
171 | leaderboard_md = generate_leaderboard_file_content(tournament, round_num) # New utility
172 |
173 | round_storage_path = Path(tournament.storage_path) / f"round_{round_num}"
174 | round_storage_path.mkdir(parents=True, exist_ok=True)
175 |
176 | if comparison_md:
177 | comp_path = round_storage_path / "round_comparison_report.md"
178 | comp_path.write_text(comparison_md, encoding='utf-8')
179 | round_result_obj.comparison_file_path = str(comp_path)
180 | if leaderboard_md:
181 | lead_path = round_storage_path / "round_leaderboard.md"
182 | lead_path.write_text(leaderboard_md, encoding='utf-8')
183 | round_result_obj.leaderboard_file_path = str(lead_path)
184 |
185 | # Save state after generating reports
186 | tournament_manager._save_tournament_state(tournament)
187 |
188 |
189 | async def process_single_model_variant(
190 | tournament: TournamentData,
191 | model_config: "ModelConfig", # Forward ref as string, ModelConfig is imported
192 | variant_id: str, # e.g., "openai/gpt-4o/v0"
193 | round_num: int,
194 | round_result_obj: TournamentRoundResult,
195 | previous_round_variant_responses: Dict[str, ModelResponseData]
196 | ):
197 | """Processes a single model variant (handles diversity), including retries and evaluation."""
198 |
199 | # --- Acquire semaphore ---
200 | if MODEL_CALL_SEMAPHORE: # Should always be initialized
201 | await MODEL_CALL_SEMAPHORE.acquire()
202 |
203 | response_data = ModelResponseData(
204 | model_id_original=model_config.model_id,
205 | model_id_variant=variant_id,
206 | round_num=round_num
207 | )
208 | task_start_time = time.monotonic()
209 |
210 | # --- Prepare storage paths if needed (handled in save_model_response_content) ---
211 |
212 | try:
213 | # --- Check for cancellation ---
214 | current_tournament_state = tournament_manager.get_tournament(tournament.tournament_id, force_reload=True)
215 | if not current_tournament_state or current_tournament_state.status == TournamentStatus.CANCELLED:
216 | response_data.error = "Tournament cancelled before model execution."
217 | logger.info(f"Model task {variant_id} skipped due to tournament cancellation.")
218 | raise asyncio.CancelledError("Tournament cancelled")
219 |
220 |
221 | prompt = create_round_prompt(
222 | tournament,
223 | round_num,
224 | previous_round_variant_responses,
225 | target_model_variant_id=variant_id # For personalized prompts if needed
226 | )
227 |
228 | # --- LLM Call with Retries ---
229 | current_attempt = 0
230 | llm_response_dict = None
231 | while current_attempt <= tournament.config.max_retries_per_model_call:
232 | try:
233 | logger.info(f"[MODEL_CALL_START] Attempt {current_attempt+1}/{tournament.config.max_retries_per_model_call+1} for {variant_id}, Round {round_num}")
234 |
235 | provider_id = model_config.model_id.split('/')[0] if '/' in model_config.model_id else None
236 |
237 | # Parameters that are direct arguments to the generate_completion tool
238 | tool_direct_params = {
239 | "prompt": prompt,
240 | "model": model_config.model_id, # Use original model_id for API call
241 | "provider": provider_id,
242 | "temperature": model_config.temperature,
243 | # max_tokens is added conditionally below
244 | }
245 | if model_config.max_tokens is not None:
246 | tool_direct_params["max_tokens"] = model_config.max_tokens
247 |
248 | # Parameters that should be passed via the 'additional_params' argument of the tool
249 | tool_additional_params = {}
250 | if model_config.system_prompt is not None:
251 | tool_additional_params["system_prompt"] = model_config.system_prompt
252 | if model_config.seed is not None:
253 | tool_additional_params["seed"] = model_config.seed
254 | # Example: if model_config had top_p, it would be added here too:
255 | # if hasattr(model_config, 'top_p') and model_config.top_p is not None:
256 | # tool_additional_params["top_p"] = model_config.top_p
257 |
258 | llm_response_dict = await generate_completion(
259 | **tool_direct_params,
260 | additional_params=tool_additional_params
261 | )
262 |
263 | if llm_response_dict.get("success"):
264 | logger.info(f"[MODEL_CALL_SUCCESS] {variant_id} successful on attempt {current_attempt+1}")
265 | break # Success, exit retry loop
266 | else:
267 | error_msg = llm_response_dict.get("error", "Unknown LLM error")
268 | logger.warning(f"Attempt {current_attempt+1} for {variant_id} failed: {error_msg}")
269 | if current_attempt == tournament.config.max_retries_per_model_call:
270 | raise RuntimeError(f"LLM call failed after max retries: {error_msg}")
271 |
272 | except Exception as e: # Catch exceptions from generate_completion itself
273 | logger.warning(f"Exception on attempt {current_attempt+1} for {variant_id}: {e}")
274 | if current_attempt == tournament.config.max_retries_per_model_call:
275 | raise RuntimeError(f"LLM call failed after max retries (exception): {e}") from e
276 |
277 | current_attempt += 1
278 | # Decorrelated jitter backoff
279 | sleep_time = random.uniform(
280 | tournament.config.retry_backoff_base_seconds,
281 | tournament.config.retry_backoff_base_seconds * 1.5 * (2 ** (current_attempt -1))
282 | )
283 | # Max sleep to prevent overly long waits
284 | sleep_time = min(sleep_time, 30.0) # e.g., max 30s backoff
285 | logger.info(f"Retrying {variant_id} in {sleep_time:.2f} seconds...")
286 | await asyncio.sleep(sleep_time)
287 |
288 | # --- Process Successful LLM Response ---
289 | response_data.response_text = llm_response_dict.get("text", "")
290 | response_data.metrics.update({
291 | "input_tokens": llm_response_dict.get("tokens", {}).get("input"),
292 | "output_tokens": llm_response_dict.get("tokens", {}).get("output"),
293 | "cost": llm_response_dict.get("cost", 0.0),
294 | "latency_ms": int(llm_response_dict.get("processing_time", 0) * 1000),
295 | "api_model_id_used": llm_response_dict.get("model", model_config.model_id)
296 | })
297 |
298 | response_data.thinking_process = await extract_thinking(response_data.response_text)
299 |
300 | if tournament.config.tournament_type == "code":
301 | # Use the tool function for extraction
302 | extracted_code_string = await extract_code_from_response(
303 | response_text=response_data.response_text,
304 | model=tournament.config.extraction_model_id # Pass extraction_model_id as the model for extraction
305 | # timeout parameter uses its default from extract_code_from_response
306 | )
307 | if extracted_code_string: # Check if a non-empty string was returned
308 | response_data.extracted_code = extracted_code_string.strip()
309 | else:
310 | logger.warning(f"Code extraction returned empty or failed for {variant_id}. Original response length: {len(response_data.response_text or '')}")
311 | response_data.extracted_code = None # Explicitly set to None on failure or empty string
312 |
313 | # --- Save response content ---
314 | # (This util saves the main readable MD and potentially the raw code file)
315 | saved_paths = await save_model_response_content(
316 | tournament_storage_path=Path(tournament.storage_path),
317 | round_num=round_num,
318 | variant_id=variant_id, # Use variant_id for unique filenames
319 | response_text=response_data.response_text,
320 | extracted_code=response_data.extracted_code,
321 | thinking_process=response_data.thinking_process,
322 | metrics=response_data.metrics,
323 | tournament_type=tournament.config.tournament_type
324 | )
325 | response_data.response_file_path = saved_paths.get("markdown_file")
326 | response_data.extracted_code_file_path = saved_paths.get("code_file")
327 |
328 | # --- Run Evaluations ---
329 | evaluators = tournament_manager.get_evaluators_for_tournament(tournament.tournament_id)
330 | if evaluators:
331 | logger.info(f"Running {len(evaluators)} evaluators for {variant_id}...")
332 | for evaluator_instance in evaluators:
333 | eval_config = next((e for e in tournament.config.evaluators if e.evaluator_id == evaluator_instance.config.get("evaluator_id_ref", evaluator_instance.evaluator_type)), None) # Find original config for ID
334 |
335 | eval_id_for_scores = eval_config.evaluator_id if eval_config else evaluator_instance.evaluator_type
336 |
337 | try:
338 | eval_score_obj = await evaluator_instance.score(
339 | response_data, # Pass the full ModelResponseData
340 | tournament.config.prompt,
341 | tournament.config.tournament_type
342 | )
343 | response_data.scores[eval_id_for_scores] = eval_score_obj.model_dump() # Store full score object
344 | logger.debug(f"Evaluator '{eval_id_for_scores}' score for {variant_id}: {eval_score_obj.score}")
345 | except Exception as eval_e:
346 | logger.error(f"Evaluator '{eval_id_for_scores}' failed for {variant_id}: {eval_e}", exc_info=True)
347 | response_data.scores[eval_id_for_scores] = EvaluationScore(score=0.0, details=f"Evaluation error: {str(eval_e)}").model_dump()
348 |
349 | # Calculate overall weighted score
350 | response_data.overall_score = calculate_weighted_score(response_data.scores, tournament.config.evaluators)
351 |
352 |
353 | except asyncio.CancelledError: # Handle task cancellation gracefully
354 | logger.info(f"Task for {variant_id} in round {round_num} was cancelled.")
355 | response_data.error = "Task cancelled."
356 | response_data.metrics["final_status"] = "cancelled"
357 | except Exception as e:
358 | logger.error(f"[MODEL_TASK_FAILURE] Error processing {variant_id}: {e}", exc_info=True)
359 | response_data.error = str(e)
360 | response_data.metrics["final_status"] = "failed"
361 | finally:
362 | response_data.metrics["total_task_time_ms"] = int((time.monotonic() - task_start_time) * 1000)
363 | # --- Add response to the round_result_obj (which is part of tournament state) ---
364 | # This needs to be thread-safe if multiple tasks could update this concurrently,
365 | # but asyncio tasks run on a single thread, so direct assignment is fine here.
366 | # The `tournament` object itself is shared, so saving it needs care.
367 | round_result_obj.responses[variant_id] = response_data
368 |
369 | # Defer saving the full tournament state to the calling round processor
370 | # to batch saves, but log that this variant is done.
371 | logger.info(f"Finished processing variant {variant_id}. Error: {response_data.error is not None}")
372 |
373 | # --- Release semaphore ---
374 | if MODEL_CALL_SEMAPHORE:
375 | MODEL_CALL_SEMAPHORE.release()
376 |
377 | async def process_single_model(
378 | model_id: str,
379 | prompt: str,
380 | tournament_id: str,
381 | round_num: int,
382 | is_code_tournament: bool,
383 | extraction_model_id: Optional[str] = None
384 | ) -> ModelResponseData:
385 | """
386 | Handles the logic for calling a single model provider using the generate_completion tool.
387 | """
388 | start_time = time.monotonic()
389 | logger.info(f"[MODEL TASK] Processing model {model_id} for round {round_num}")
390 |
391 | # Get tournament to access storage path
392 | tournament = tournament_manager.get_tournament(tournament_id)
393 | if not tournament:
394 | raise ValueError(f"Tournament {tournament_id} not found")
395 |
396 | # Setup storage paths
397 | round_storage_path = Path(tournament.storage_path) / f"round_{round_num}"
398 | round_storage_path.mkdir(exist_ok=True, parents=True)
399 |
400 | response_data = ModelResponseData(
401 | model_id_original=model_id,
402 | model_id_variant=model_id, # In this context, variant is the same as original
403 | round_num=round_num
404 | )
405 | extracted_code: Optional[str] = None # noqa: F841
406 | file_extension = ".py" if is_code_tournament else ".md"
407 |
408 | provider_name = model_id.split('/')[0] if '/' in model_id else None # Infer provider from model_id if possible
409 | if not provider_name:
410 | logger.warning(f"[MODEL TASK] Could not infer provider from model_id: {model_id}. Attempting call without explicit provider.")
411 | # Note: generate_completion might fail if provider isn't specified and cannot be inferred
412 |
413 | try:
414 | # Use generate_completion tool
415 | logger.info(f"[MODEL TASK] Calling generate_completion for model {model_id} with prompt length {len(prompt)}")
416 | # Log prompt preview
417 | preview_length = 100
418 | prompt_preview = prompt[:preview_length] + "..." if len(prompt) > preview_length else prompt
419 | logger.info(f"[MODEL TASK] Prompt preview: {prompt_preview}")
420 |
421 | # Call the tool function directly
422 | completion_result_dict = await generate_completion(
423 | prompt=prompt,
424 | model=model_id, # Pass the full model ID
425 | provider=provider_name # Pass inferred provider
426 | # Add other params like max_tokens, temperature if needed/available in TournamentConfig
427 | )
428 |
429 | # Check for success
430 | if not completion_result_dict.get("success"):
431 | error_msg = completion_result_dict.get("error", "generate_completion tool indicated failure")
432 | raise RuntimeError(f"Completion failed for {model_id}: {error_msg}")
433 |
434 | # Extract data from the dictionary returned by the tool
435 | response_text = completion_result_dict.get("text", "")
436 | actual_model_used = completion_result_dict.get("model", model_id) # Use actual model if returned
437 | token_info = completion_result_dict.get("tokens", {})
438 | cost = completion_result_dict.get("cost", 0.0)
439 | processing_time_sec = completion_result_dict.get("processing_time", 0.0)
440 | latency_ms = int(processing_time_sec * 1000)
441 |
442 | # Log response preview
443 | response_preview = response_text[:preview_length] + "..." if len(response_text) > preview_length else response_text
444 | logger.info(f"[MODEL TASK] Response preview for {actual_model_used}: {response_preview}")
445 |
446 | # Extract metrics from the tool result
447 | completion_metrics = {
448 | "input_tokens": token_info.get("input"),
449 | "output_tokens": token_info.get("output"),
450 | "cost": cost,
451 | "latency_ms": latency_ms, # Use processing_time from tool
452 | "api_model_id_used": actual_model_used # Store the actual model ID used by the API
453 | }
454 |
455 | # Process response - use async extract_thinking
456 | thinking = await extract_thinking(response_text)
457 | code_metrics = {} # Placeholder for potential future code analysis metrics
458 |
459 | # Save response to file with better naming pattern
460 | timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
461 | safe_model_id = re.sub(r'[^a-zA-Z0-9_\-.]', '_', actual_model_used) # Use actual model name
462 | safe_tournament_id = re.sub(r'[^a-zA-Z0-9_\-.]', '_', tournament_id)
463 |
464 | filename_base = f"tournament_{safe_tournament_id}_round-{round_num}_model-{safe_model_id}_{timestamp}"
465 | raw_response_path = round_storage_path / f"{filename_base}{file_extension}"
466 |
467 | raw_response_path.write_text(response_text or "", encoding="utf-8")
468 |
469 | # Create a more user-friendly version with added context
470 | readable_content = f"""# Tournament Response
471 | **Tournament ID:** {tournament_id}
472 | **Round:** {round_num}
473 | **Model (Configured):** {model_id}
474 | **Model (Actual API):** {actual_model_used}
475 | **Timestamp:** {datetime.now().isoformat()}
476 | **Tokens:** {completion_metrics.get('input_tokens', 'N/A')} in, {completion_metrics.get('output_tokens', 'N/A')} out
477 | **Cost:** ${completion_metrics.get('cost', 0.0):.6f}
478 | **Latency:** {completion_metrics.get('latency_ms', 'N/A')}ms
479 |
480 | ## Prompt
481 | ```
482 | {prompt}
483 | ```
484 |
485 | ## Response
486 | ```
487 | {response_text}
488 | ```
489 | """
490 | readable_path = round_storage_path / f"{filename_base}_readable{file_extension}"
491 | readable_path.write_text(readable_content, encoding="utf-8")
492 |
493 | logger.info(f"[MODEL TASK] Saved response to: {readable_path}")
494 |
495 | # Populate response data
496 | # model_id_original and model_id_variant are already set
497 | response_data.response_text = response_text
498 | response_data.thinking_process = thinking
499 | response_data.metrics = {**completion_metrics, **code_metrics}
500 | response_data.timestamp = datetime.now(timezone.utc)
501 | response_data.response_file_path = str(raw_response_path) # Store path to raw response
502 | response_data.metrics["total_processing_time_ms"] = int((time.monotonic() - start_time) * 1000) # Keep overall task time
503 |
504 | logger.info(f"[MODEL TASK] Finished processing model {actual_model_used} for round {round_num} in {response_data.metrics['total_processing_time_ms']}ms")
505 |
506 | except Exception as e:
507 | logger.error(f"[MODEL TASK] Error processing model {model_id}: {e}", exc_info=True)
508 | response_data.error = str(e)
509 |
510 | return response_data
511 |
512 | async def run_single_round_task(tournament_id: str, round_num: int):
513 | """
514 | Task that runs a single round of the tournament, including LLM calls.
515 | """
516 | logger.info(f"[ROUND TASK START] Running round {round_num} for tournament {tournament_id}")
517 | tournament = tournament_manager.get_tournament(tournament_id, force_reload=True)
518 |
519 | # --- Check if tournament exists or was cancelled before proceeding ---
520 | if not tournament:
521 | logger.error(f"[ROUND TASK FAIL] Tournament {tournament_id} not found at start of round {round_num}.")
522 | return
523 | if tournament.status == TournamentStatus.CANCELLED:
524 | logger.info(f"[ROUND TASK ABORT] Tournament {tournament_id} was cancelled. Stopping round {round_num}.")
525 | # Ensure round status reflects cancellation if it was running
526 | if round_num < len(tournament.rounds_results):
527 | round_result = tournament.rounds_results[round_num]
528 | if round_result.status == TournamentStatus.RUNNING:
529 | round_result.status = TournamentStatus.CANCELLED
530 | round_result.error = "Cancelled by user request during execution."
531 | round_result.end_time = datetime.now(timezone.utc)
532 | tournament_manager._save_tournament_state(tournament)
533 | return
534 | # -------------------------------------------------------------------
535 |
536 | if round_num >= len(tournament.rounds_results):
537 | logger.error(f"[ROUND TASK FAIL] Invalid round number {round_num} for tournament {tournament_id} state.")
538 | return
539 |
540 | round_result = tournament.rounds_results[round_num]
541 |
542 | try:
543 | # Mark round as running
544 | round_result.status = TournamentStatus.RUNNING
545 | round_result.start_time = datetime.now(timezone.utc)
546 | tournament_manager._save_tournament_state(tournament)
547 | logger.info(f"[ROUND TASK] Round {round_num} marked as running")
548 |
549 | # Get tournament config
550 | is_code_tournament = tournament.config.tournament_type == "code"
551 | extraction_model_id = tournament.config.extraction_model_id
552 |
553 | # Create prompt for this round
554 | prompt = create_round_prompt(tournament, round_num)
555 |
556 | # Create tasks for all configured models
557 | model_tasks = []
558 | for model_config in tournament.config.models:
559 | model_id = model_config.model_id
560 |
561 | # Skip if already processed
562 | if model_id in round_result.responses:
563 | logger.info(f"[ROUND TASK] Skipping already processed model {model_id}")
564 | continue
565 |
566 | # Add task for this model
567 | task = process_single_model(
568 | model_id=model_id,
569 | prompt=prompt,
570 | tournament_id=tournament_id,
571 | round_num=round_num,
572 | is_code_tournament=is_code_tournament,
573 | extraction_model_id=extraction_model_id
574 | )
575 | model_tasks.append(task)
576 | logger.info(f"[ROUND TASK] Added task for model {model_id}")
577 |
578 | # Exit if no tasks to run
579 | if not model_tasks:
580 | logger.info(f"[ROUND TASK] No models to process for round {round_num}")
581 | round_result.status = TournamentStatus.COMPLETED
582 | round_result.end_time = datetime.now(timezone.utc)
583 | tournament_manager._save_tournament_state(tournament)
584 | return
585 |
586 | # Run all model tasks in parallel
587 | logger.info(f"[ROUND TASK] Running {len(model_tasks)} model tasks in parallel")
588 | results = await asyncio.gather(*model_tasks, return_exceptions=True)
589 |
590 | # Process results
591 | for i, result in enumerate(results):
592 | model_id = tournament.config.models[i].model_id
593 |
594 | # Handle exceptions
595 | if isinstance(result, Exception):
596 | logger.error(f"[ROUND TASK] Error processing model {model_id}: {result}", exc_info=True)
597 | continue
598 |
599 | # Store result
600 | round_result.responses[model_id] = result
601 | tournament_manager._save_tournament_state(tournament)
602 |
603 | # Create comparison file
604 | comparison_content = generate_comparison_file_content(tournament, round_num)
605 | if comparison_content:
606 | round_dir = Path(tournament.storage_path) / f"round_{round_num}"
607 | round_dir.mkdir(exist_ok=True)
608 | comparison_file = round_dir / "model_comparison.md"
609 |
610 | with open(comparison_file, 'w', encoding='utf-8') as f:
611 | f.write(comparison_content)
612 |
613 | # Store the path in round results
614 | round_result.comparison_file_path = str(comparison_file)
615 | tournament_manager._save_tournament_state(tournament)
616 |
617 | # Mark round as completed
618 | round_result.status = TournamentStatus.COMPLETED
619 | round_result.end_time = datetime.now(timezone.utc)
620 | tournament_manager._save_tournament_state(tournament)
621 | logger.info(f"[ROUND TASK COMPLETE] Round {round_num} for tournament {tournament_id} completed successfully")
622 |
623 | # If this was the last round, mark the tournament as completed
624 | if round_num == tournament.config.rounds - 1:
625 | tournament.status = TournamentStatus.COMPLETED
626 | tournament.end_time = datetime.now(timezone.utc)
627 | tournament_manager._save_tournament_state(tournament)
628 | logger.info(f"[ROUND TASK] Tournament {tournament_id} marked as completed after final round")
629 |
630 | except Exception as e:
631 | logger.error(f"[ROUND TASK ERROR] Error processing round {round_num}: {e}", exc_info=True)
632 | round_result.status = TournamentStatus.FAILED
633 | round_result.error = str(e)
634 | round_result.end_time = datetime.now(timezone.utc)
635 | tournament_manager._save_tournament_state(tournament)
636 |
637 | # Mark tournament as failed
638 | tournament.status = TournamentStatus.FAILED
639 | tournament.error_message = f"Failed during round {round_num}: {str(e)}"
640 | tournament.end_time = datetime.now(timezone.utc)
641 | tournament_manager._save_tournament_state(tournament)
642 |
643 | async def process_model_task(
644 | tournament: TournamentData,
645 | model_id: str,
646 | round_num: int,
647 | previous_round_responses: Optional[Dict[str, str]] = None
648 | ) -> Dict[str, Any]:
649 | """Process a single model task for the tournament using generate_completion tool.
650 |
651 | Args:
652 | tournament: Tournament data
653 | model_id: Model to use (e.g., 'openai/gpt-4o')
654 | round_num: Current round number
655 | previous_round_responses: Previous round responses (for rounds > 0)
656 |
657 | Returns:
658 | Model task result with response text and metrics
659 | """
660 | start_task_time = time.monotonic()
661 | # Infer provider from model_id format 'provider:model_name' or 'provider/model_name'
662 | provider_id = None
663 | if ':' in model_id:
664 | provider_id = model_id.split(':')[0]
665 | elif '/' in model_id: # Keep backward compatibility if '/' is used
666 | provider_id = model_id.split('/')[0]
667 |
668 | if not provider_id:
669 | logger.warning(f"[MODEL TASK] Could not infer provider from model_id: {model_id}. Attempting call without explicit provider.")
670 |
671 | try:
672 | logger.info(f"[MODEL TASK] Processing model {model_id} for round {round_num} (Provider: {provider_id})")
673 |
674 | # Generate prompt based on tournament type and round
675 | if round_num == 0:
676 | prompt = tournament.config.prompt
677 | else:
678 | prompt = create_round_prompt(tournament, round_num, previous_round_responses)
679 |
680 | # Generate completion using the tool
681 | logger.info(f"[MODEL TASK] Calling generate_completion for model {model_id} with prompt length {len(prompt)}")
682 | preview_length = 100
683 | prompt_preview = prompt[:preview_length] + "..." if len(prompt) > preview_length else prompt
684 | logger.info(f"[MODEL TASK] Prompt preview: {prompt_preview}")
685 |
686 | completion_result_dict = await generate_completion(
687 | prompt=prompt,
688 | model=model_id,
689 | provider=provider_id # Pass the inferred provider
690 | # Add other params like max_tokens, temperature if needed/available
691 | )
692 |
693 | # Check for success
694 | if not completion_result_dict.get("success"):
695 | error_msg = completion_result_dict.get("error", "generate_completion tool indicated failure")
696 | raise RuntimeError(f"Completion failed for {model_id}: {error_msg}")
697 |
698 | # Extract data from the result dictionary
699 | response_text = completion_result_dict.get("text", "")
700 | actual_model_used = completion_result_dict.get("model", model_id)
701 | token_info = completion_result_dict.get("tokens", {})
702 | cost = completion_result_dict.get("cost", 0.0)
703 | processing_time_sec = completion_result_dict.get("processing_time", 0.0)
704 |
705 | # Log response preview
706 | response_preview = response_text[:preview_length] + "..." if len(response_text) > preview_length else response_text
707 | logger.info(f"[MODEL TASK] Response preview for {actual_model_used}: {response_preview}")
708 |
709 | # Extract metrics from the tool result
710 | completion_metrics = {
711 | "input_tokens": token_info.get("input"),
712 | "output_tokens": token_info.get("output"),
713 | "cost": cost,
714 | "processing_time_ms": int(processing_time_sec * 1000) # Use tool's processing time
715 | }
716 |
717 | # Extract thinking/reasoning if present - use async extract_thinking
718 | thinking = await extract_thinking(response_text)
719 |
720 | # Save response to a file with timestamp - use async save_model_response
721 | response_file = await save_model_response_content(
722 | tournament_storage_path=Path(tournament.storage_path),
723 | round_num=round_num,
724 | variant_id=model_id, # Use model_id for unique filenames
725 | response_text=response_text,
726 | extracted_code=None, # No extracted code for this task
727 | thinking_process=thinking,
728 | metrics=completion_metrics,
729 | tournament_type=tournament.config.tournament_type
730 | )
731 |
732 | total_task_time_ms = int((time.monotonic() - start_task_time) * 1000)
733 | completion_metrics["total_task_time_ms"] = total_task_time_ms # Add overall task time
734 |
735 | logger.info(f"[MODEL TASK] Finished processing model {actual_model_used} for round {round_num} in {total_task_time_ms}ms (LLM time: {completion_metrics['processing_time_ms']}ms)")
736 |
737 | return {
738 | "model_id": actual_model_used, # Return actual model used
739 | "response_text": response_text,
740 | "thinking": thinking,
741 | "metrics": completion_metrics,
742 | "response_file": str(response_file.get("markdown_file")) if isinstance(response_file, dict) else str(response_file) # Ensure path is string
743 | }
744 | except Exception as e:
745 | logger.error(f"[MODEL TASK] Error processing model {model_id}: {str(e)}", exc_info=True)
746 | total_task_time_ms = int((time.monotonic() - start_task_time) * 1000)
747 | return {
748 | "model_id": model_id,
749 | "error": str(e),
750 | "response_text": f"Error generating response: {str(e)}",
751 | "thinking": None,
752 | "metrics": {
753 | "error": str(e),
754 | "total_task_time_ms": total_task_time_ms,
755 | "processing_time_ms": None # LLM call failed
756 | },
757 | "response_file": None
758 | }
```