dicklesworthstone/llm_gateway_mcp

This is page 9 of 45. Use http://codebase.md/dicklesworthstone/llm_gateway_mcp_server?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .cursorignore
├── .env.example
├── .envrc
├── .gitignore
├── additional_features.md
├── check_api_keys.py
├── completion_support.py
├── comprehensive_test.py
├── docker-compose.yml
├── Dockerfile
├── empirically_measured_model_speeds.json
├── error_handling.py
├── example_structured_tool.py
├── examples
│   ├── __init__.py
│   ├── advanced_agent_flows_using_unified_memory_system_demo.py
│   ├── advanced_extraction_demo.py
│   ├── advanced_unified_memory_system_demo.py
│   ├── advanced_vector_search_demo.py
│   ├── analytics_reporting_demo.py
│   ├── audio_transcription_demo.py
│   ├── basic_completion_demo.py
│   ├── cache_demo.py
│   ├── claude_integration_demo.py
│   ├── compare_synthesize_demo.py
│   ├── cost_optimization.py
│   ├── data
│   │   ├── sample_event.txt
│   │   ├── Steve_Jobs_Introducing_The_iPhone_compressed.md
│   │   └── Steve_Jobs_Introducing_The_iPhone_compressed.mp3
│   ├── docstring_refiner_demo.py
│   ├── document_conversion_and_processing_demo.py
│   ├── entity_relation_graph_demo.py
│   ├── filesystem_operations_demo.py
│   ├── grok_integration_demo.py
│   ├── local_text_tools_demo.py
│   ├── marqo_fused_search_demo.py
│   ├── measure_model_speeds.py
│   ├── meta_api_demo.py
│   ├── multi_provider_demo.py
│   ├── ollama_integration_demo.py
│   ├── prompt_templates_demo.py
│   ├── python_sandbox_demo.py
│   ├── rag_example.py
│   ├── research_workflow_demo.py
│   ├── sample
│   │   ├── article.txt
│   │   ├── backprop_paper.pdf
│   │   ├── buffett.pdf
│   │   ├── contract_link.txt
│   │   ├── legal_contract.txt
│   │   ├── medical_case.txt
│   │   ├── northwind.db
│   │   ├── research_paper.txt
│   │   ├── sample_data.json
│   │   └── text_classification_samples
│   │       ├── email_classification.txt
│   │       ├── news_samples.txt
│   │       ├── product_reviews.txt
│   │       └── support_tickets.txt
│   ├── sample_docs
│   │   └── downloaded
│   │       └── attention_is_all_you_need.pdf
│   ├── sentiment_analysis_demo.py
│   ├── simple_completion_demo.py
│   ├── single_shot_synthesis_demo.py
│   ├── smart_browser_demo.py
│   ├── sql_database_demo.py
│   ├── sse_client_demo.py
│   ├── test_code_extraction.py
│   ├── test_content_detection.py
│   ├── test_ollama.py
│   ├── text_classification_demo.py
│   ├── text_redline_demo.py
│   ├── tool_composition_examples.py
│   ├── tournament_code_demo.py
│   ├── tournament_text_demo.py
│   ├── unified_memory_system_demo.py
│   ├── vector_search_demo.py
│   ├── web_automation_instruction_packs.py
│   └── workflow_delegation_demo.py
├── LICENSE
├── list_models.py
├── marqo_index_config.json.example
├── mcp_protocol_schema_2025-03-25_version.json
├── mcp_python_lib_docs.md
├── mcp_tool_context_estimator.py
├── model_preferences.py
├── pyproject.toml
├── quick_test.py
├── README.md
├── resource_annotations.py
├── run_all_demo_scripts_and_check_for_errors.py
├── storage
│   └── smart_browser_internal
│       ├── locator_cache.db
│       ├── readability.js
│       └── storage_state.enc
├── test_client.py
├── test_connection.py
├── TEST_README.md
├── test_sse_client.py
├── test_stdio_client.py
├── tests
│   ├── __init__.py
│   ├── conftest.py
│   ├── integration
│   │   ├── __init__.py
│   │   └── test_server.py
│   ├── manual
│   │   ├── test_extraction_advanced.py
│   │   └── test_extraction.py
│   └── unit
│       ├── __init__.py
│       ├── test_cache.py
│       ├── test_providers.py
│       └── test_tools.py
├── TODO.md
├── tool_annotations.py
├── tools_list.json
├── ultimate_mcp_banner.webp
├── ultimate_mcp_logo.webp
├── ultimate_mcp_server
│   ├── __init__.py
│   ├── __main__.py
│   ├── cli
│   │   ├── __init__.py
│   │   ├── __main__.py
│   │   ├── commands.py
│   │   ├── helpers.py
│   │   └── typer_cli.py
│   ├── clients
│   │   ├── __init__.py
│   │   ├── completion_client.py
│   │   └── rag_client.py
│   ├── config
│   │   └── examples
│   │       └── filesystem_config.yaml
│   ├── config.py
│   ├── constants.py
│   ├── core
│   │   ├── __init__.py
│   │   ├── evaluation
│   │   │   ├── base.py
│   │   │   └── evaluators.py
│   │   ├── providers
│   │   │   ├── __init__.py
│   │   │   ├── anthropic.py
│   │   │   ├── base.py
│   │   │   ├── deepseek.py
│   │   │   ├── gemini.py
│   │   │   ├── grok.py
│   │   │   ├── ollama.py
│   │   │   ├── openai.py
│   │   │   └── openrouter.py
│   │   ├── server.py
│   │   ├── state_store.py
│   │   ├── tournaments
│   │   │   ├── manager.py
│   │   │   ├── tasks.py
│   │   │   └── utils.py
│   │   └── ums_api
│   │       ├── __init__.py
│   │       ├── ums_database.py
│   │       ├── ums_endpoints.py
│   │       ├── ums_models.py
│   │       └── ums_services.py
│   ├── exceptions.py
│   ├── graceful_shutdown.py
│   ├── services
│   │   ├── __init__.py
│   │   ├── analytics
│   │   │   ├── __init__.py
│   │   │   ├── metrics.py
│   │   │   └── reporting.py
│   │   ├── cache
│   │   │   ├── __init__.py
│   │   │   ├── cache_service.py
│   │   │   ├── persistence.py
│   │   │   ├── strategies.py
│   │   │   └── utils.py
│   │   ├── cache.py
│   │   ├── document.py
│   │   ├── knowledge_base
│   │   │   ├── __init__.py
│   │   │   ├── feedback.py
│   │   │   ├── manager.py
│   │   │   ├── rag_engine.py
│   │   │   ├── retriever.py
│   │   │   └── utils.py
│   │   ├── prompts
│   │   │   ├── __init__.py
│   │   │   ├── repository.py
│   │   │   └── templates.py
│   │   ├── prompts.py
│   │   └── vector
│   │       ├── __init__.py
│   │       ├── embeddings.py
│   │       └── vector_service.py
│   ├── tool_token_counter.py
│   ├── tools
│   │   ├── __init__.py
│   │   ├── audio_transcription.py
│   │   ├── base.py
│   │   ├── completion.py
│   │   ├── docstring_refiner.py
│   │   ├── document_conversion_and_processing.py
│   │   ├── enhanced-ums-lookbook.html
│   │   ├── entity_relation_graph.py
│   │   ├── excel_spreadsheet_automation.py
│   │   ├── extraction.py
│   │   ├── filesystem.py
│   │   ├── html_to_markdown.py
│   │   ├── local_text_tools.py
│   │   ├── marqo_fused_search.py
│   │   ├── meta_api_tool.py
│   │   ├── ocr_tools.py
│   │   ├── optimization.py
│   │   ├── provider.py
│   │   ├── pyodide_boot_template.html
│   │   ├── python_sandbox.py
│   │   ├── rag.py
│   │   ├── redline-compiled.css
│   │   ├── sentiment_analysis.py
│   │   ├── single_shot_synthesis.py
│   │   ├── smart_browser.py
│   │   ├── sql_databases.py
│   │   ├── text_classification.py
│   │   ├── text_redline_tools.py
│   │   ├── tournament.py
│   │   ├── ums_explorer.html
│   │   └── unified_memory_system.py
│   ├── utils
│   │   ├── __init__.py
│   │   ├── async_utils.py
│   │   ├── display.py
│   │   ├── logging
│   │   │   ├── __init__.py
│   │   │   ├── console.py
│   │   │   ├── emojis.py
│   │   │   ├── formatter.py
│   │   │   ├── logger.py
│   │   │   ├── panels.py
│   │   │   ├── progress.py
│   │   │   └── themes.py
│   │   ├── parse_yaml.py
│   │   ├── parsing.py
│   │   ├── security.py
│   │   └── text.py
│   └── working_memory_api.py
├── unified_memory_system_technical_analysis.md
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/ultimate_mcp_server/services/cache/strategies.py:
--------------------------------------------------------------------------------

```python
  1 | """Cache strategy implementations."""
  2 | import hashlib
  3 | import json
  4 | import re
  5 | from abc import ABC, abstractmethod
  6 | from typing import Any, Dict, List, Optional
  7 | 
  8 | from ultimate_mcp_server.utils import get_logger
  9 | 
 10 | logger = get_logger(__name__)
 11 | 
 12 | 
 13 | class CacheStrategy(ABC):
 14 |     """Abstract base class for cache strategies."""
 15 |     
 16 |     @abstractmethod
 17 |     def generate_key(self, request: Dict[str, Any]) -> str:
 18 |         """Generate a cache key for the request.
 19 |         
 20 |         Args:
 21 |             request: Request parameters
 22 |             
 23 |         Returns:
 24 |             Cache key
 25 |         """
 26 |         pass
 27 |         
 28 |     @abstractmethod
 29 |     def should_cache(self, request: Dict[str, Any], response: Any) -> bool:
 30 |         """Determine if a response should be cached.
 31 |         
 32 |         Args:
 33 |             request: Request parameters
 34 |             response: Response data
 35 |             
 36 |         Returns:
 37 |             True if the response should be cached
 38 |         """
 39 |         pass
 40 |         
 41 |     def get_ttl(self, request: Dict[str, Any], response: Any) -> Optional[int]:
 42 |         """Get the TTL (time-to-live) for a cached response.
 43 |         
 44 |         Args:
 45 |             request: Request parameters
 46 |             response: Response data
 47 |             
 48 |         Returns:
 49 |             TTL in seconds or None to use default
 50 |         """
 51 |         return None
 52 | 
 53 | 
 54 | class ExactMatchStrategy(CacheStrategy):
 55 |     """Strategy for exact matching of requests."""
 56 |     
 57 |     def generate_key(self, request: Dict[str, Any]) -> str:
 58 |         """Generate an exact match cache key.
 59 |         
 60 |         Args:
 61 |             request: Request parameters
 62 |             
 63 |         Returns:
 64 |             Cache key based on normalized parameters
 65 |         """
 66 |         # Remove non-deterministic fields
 67 |         clean_request = self._clean_request(request)
 68 |         
 69 |         # Serialize and hash
 70 |         json_str = json.dumps(clean_request, sort_keys=True)
 71 |         return f"exact:{hashlib.sha256(json_str.encode('utf-8')).hexdigest()}"
 72 |         
 73 |     def should_cache(self, request: Dict[str, Any], response: Any) -> bool:
 74 |         """Determine if a response should be cached based on request type.
 75 |         
 76 |         Args:
 77 |             request: Request parameters
 78 |             response: Response data
 79 |             
 80 |         Returns:
 81 |             True if the response should be cached
 82 |         """
 83 |         # Don't cache if explicitly disabled
 84 |         if request.get("cache", True) is False:
 85 |             return False
 86 |             
 87 |         # Don't cache streaming responses
 88 |         if request.get("stream", False):
 89 |             return False
 90 |             
 91 |         # Don't cache high temperature responses (too random)
 92 |         if request.get("temperature", 0.7) > 0.9:
 93 |             return False
 94 |             
 95 |         return True
 96 |         
 97 |     def get_ttl(self, request: Dict[str, Any], response: Any) -> Optional[int]:
 98 |         """Get TTL based on request types.
 99 |         
100 |         Args:
101 |             request: Request parameters
102 |             response: Response data
103 |             
104 |         Returns:
105 |             TTL in seconds or None to use default
106 |         """
107 |         # Use custom TTL if specified
108 |         if "cache_ttl" in request:
109 |             return request["cache_ttl"]
110 |             
111 |         # Base TTL on content length - longer content gets longer TTL
112 |         if hasattr(response, "text") and isinstance(response.text, str):
113 |             content_length = len(response.text)
114 |             
115 |             # Simplified TTL scaling
116 |             if content_length > 10000:
117 |                 return 7 * 24 * 60 * 60  # 1 week for long responses
118 |             elif content_length > 1000:
119 |                 return 3 * 24 * 60 * 60  # 3 days for medium responses
120 |                 
121 |         return None  # Use default TTL
122 |         
123 |     def _clean_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
124 |         """Remove non-deterministic fields from request.
125 |         
126 |         Args:
127 |             request: Original request
128 |             
129 |         Returns:
130 |             Cleaned request for caching
131 |         """
132 |         # Make a copy to avoid modifying the original
133 |         clean = request.copy()
134 |         
135 |         # Remove non-deterministic fields
136 |         for field in [
137 |             "request_id", "timestamp", "session_id", "trace_id", 
138 |             "user_id", "cache", "cache_ttl"
139 |         ]:
140 |             clean.pop(field, None)
141 |             
142 |         return clean
143 | 
144 | 
145 | class SemanticMatchStrategy(CacheStrategy):
146 |     """Strategy for semantic matching of requests."""
147 |     
148 |     def __init__(self, similarity_threshold: float = 0.95):
149 |         """Initialize semantic matching strategy.
150 |         
151 |         Args:
152 |             similarity_threshold: Threshold for semantic similarity (0.0-1.0)
153 |         """
154 |         self.similarity_threshold = similarity_threshold
155 |         self.exact_strategy = ExactMatchStrategy()
156 |         
157 |     def generate_key(self, request: Dict[str, Any]) -> str:
158 |         """Generate both exact and semantic keys.
159 |         
160 |         Args:
161 |             request: Request parameters
162 |             
163 |         Returns:
164 |             Primary cache key (always the exact match key)
165 |         """
166 |         # Always use the exact match key as the primary key
167 |         return self.exact_strategy.generate_key(request)
168 |         
169 |     def generate_semantic_key(self, request: Dict[str, Any]) -> Optional[str]:
170 |         """Generate a semantic fingerprint for the request.
171 |         
172 |         Args:
173 |             request: Request parameters
174 |             
175 |         Returns:
176 |             Semantic key or None if request doesn't support semantic matching
177 |         """
178 |         # Extract the prompt or relevant text
179 |         text = self._extract_text(request)
180 |         if not text:
181 |             return None
182 |             
183 |         # Normalize text
184 |         text = self._normalize_text(text)
185 |         
186 |         # Generate fingerprint based on significant words and structure
187 |         significant_words = self._extract_significant_words(text)
188 |         
189 |         # Create a fuzzy key
190 |         if significant_words:
191 |             words_key = " ".join(sorted(significant_words))
192 |             return f"semantic:{hashlib.md5(words_key.encode('utf-8')).hexdigest()}"
193 |             
194 |         return None
195 |         
196 |     def should_cache(self, request: Dict[str, Any], response: Any) -> bool:
197 |         """Determine if a response should be cached.
198 |         
199 |         Args:
200 |             request: Request parameters
201 |             response: Response data
202 |             
203 |         Returns:
204 |             True if the response should be cached
205 |         """
206 |         # Use the same logic as exact matching
207 |         return self.exact_strategy.should_cache(request, response)
208 |         
209 |     def get_ttl(self, request: Dict[str, Any], response: Any) -> Optional[int]:
210 |         """Get the TTL for semantic matches.
211 |         
212 |         Args:
213 |             request: Request parameters
214 |             response: Response data
215 |             
216 |         Returns:
217 |             TTL in seconds (shorter for semantic matches)
218 |         """
219 |         # Get base TTL from exact strategy
220 |         base_ttl = self.exact_strategy.get_ttl(request, response)
221 |         
222 |         # For semantic matching, use shorter TTL
223 |         if base_ttl is not None:
224 |             return int(base_ttl * 0.5)  # 50% of exact match TTL
225 |             
226 |         return None
227 |         
228 |     def _extract_text(self, request: Dict[str, Any]) -> Optional[str]:
229 |         """Extract the relevant text for semantic matching.
230 |         
231 |         Args:
232 |             request: Request parameters
233 |             
234 |         Returns:
235 |             Extracted text or None
236 |         """
237 |         # Try to extract prompt text
238 |         if "prompt" in request:
239 |             return request["prompt"]
240 |             
241 |         # Try to extract from messages
242 |         if "messages" in request and isinstance(request["messages"], list):
243 |             # Extract text from the last user message
244 |             for message in reversed(request["messages"]):
245 |                 if message.get("role") == "user" and "content" in message:
246 |                     if isinstance(message["content"], str):
247 |                         return message["content"]
248 |                     elif isinstance(message["content"], list):
249 |                         # Handle content list (multimodal messages)
250 |                         text_parts = []
251 |                         for part in message["content"]:
252 |                             if isinstance(part, dict) and part.get("type") == "text":
253 |                                 text_parts.append(part.get("text", ""))
254 |                         return " ".join(text_parts)
255 |         
256 |         # No suitable text found
257 |         return None
258 |         
259 |     def _normalize_text(self, text: str) -> str:
260 |         """Normalize text for semantic matching.
261 |         
262 |         Args:
263 |             text: Original text
264 |             
265 |         Returns:
266 |             Normalized text
267 |         """
268 |         # Convert to lowercase
269 |         text = text.lower()
270 |         
271 |         # Remove extra whitespace
272 |         text = re.sub(r'\s+', ' ', text).strip()
273 |         
274 |         # Remove punctuation
275 |         text = re.sub(r'[^\w\s]', '', text)
276 |         
277 |         return text
278 |         
279 |     def _extract_significant_words(self, text: str, max_words: int = 15) -> List[str]:
280 |         """Extract significant words from text.
281 |         
282 |         Args:
283 |             text: Normalized text
284 |             max_words: Maximum number of words to include
285 |             
286 |         Returns:
287 |             List of significant words
288 |         """
289 |         # Split into words
290 |         words = text.split()
291 |         
292 |         # Filter out short words and common stop words
293 |         stop_words = {
294 |             "the", "and", "a", "an", "in", "to", "for", "of", "with", "on",
295 |             "is", "are", "am", "was", "were", "be", "been", "being",
296 |             "this", "that", "these", "those", "it", "they", "them",
297 |             "their", "have", "has", "had", "do", "does", "did", "will",
298 |             "would", "could", "should", "may", "might", "must", "can",
299 |             "about", "above", "after", "again", "against", "all", "any",
300 |             "because", "before", "below", "between", "both", "but", "by",
301 |             "down", "during", "each", "few", "from", "further", "here",
302 |             "how", "into", "more", "most", "no", "nor", "not", "only",
303 |             "or", "other", "out", "over", "own", "same", "so", "than",
304 |             "then", "there", "through", "under", "until", "up", "very",
305 |             "what", "when", "where", "which", "while", "who", "whom",
306 |             "why", "you", "your", "yours", "yourself", "ourselves",
307 |             "i", "me", "my", "mine", "myself", "we", "us", "our", "ours"
308 |         }
309 |         
310 |         # Initial filtering of short words and stopwords
311 |         filtered_words = [w for w in words if len(w) > 3 and w not in stop_words]
312 |         
313 |         # Calculate word frequencies for TF-IDF like weighing
314 |         word_freq = {}
315 |         for word in filtered_words:
316 |             word_freq[word] = word_freq.get(word, 0) + 1
317 |             
318 |         # Score words based on a combination of:
319 |         # 1. Length (longer words tend to be more significant)
320 |         # 2. Frequency (less common words are often more significant)
321 |         # 3. Position (words at the beginning are often more significant)
322 |         word_scores = {}
323 |         
324 |         # Normalize position weight based on document length
325 |         position_weight_factor = 100 / max(1, len(words))
326 |         
327 |         for i, word in enumerate(filtered_words):
328 |             if word in word_scores:
329 |                 continue
330 |                 
331 |             # Length score: favor longer words (0.1 to 1.0)
332 |             length_score = min(1.0, 0.1 + (len(word) / 20))
333 |             
334 |             # Rarity score: favor words that appear less frequently (0.2 to 1.0)
335 |             freq = word_freq[word]
336 |             rarity_score = 1.0 / (0.5 + (freq / 5))
337 |             rarity_score = max(0.2, min(1.0, rarity_score))
338 |             
339 |             # Position score: favor words that appear earlier (0.2 to 1.0)
340 |             earliest_pos = min([i for i, w in enumerate(filtered_words) if w == word])
341 |             position_score = 1.0 - min(0.8, (earliest_pos * position_weight_factor) / 100)
342 |             
343 |             # Calculate final score
344 |             final_score = (length_score * 0.3) + (rarity_score * 0.5) + (position_score * 0.2)
345 |             word_scores[word] = final_score
346 |         
347 |         # Sort words by score and take top max_words
348 |         significant_words = sorted(word_scores.keys(), key=lambda w: word_scores[w], reverse=True)
349 |         
350 |         # Include at least a few of the most frequent words for context
351 |         top_by_freq = sorted(word_freq.keys(), key=lambda w: word_freq[w], reverse=True)[:5]
352 |         
353 |         # Ensure these frequent words are included in the result
354 |         result = significant_words[:max_words]
355 |         for word in top_by_freq:
356 |             if word not in result and len(result) < max_words:
357 |                 result.append(word)
358 |                 
359 |         return result
360 | 
361 | 
362 | class TaskBasedStrategy(CacheStrategy):
363 |     """Strategy based on task type."""
364 |     
365 |     def __init__(self):
366 |         """Initialize task-based strategy."""
367 |         self.exact_strategy = ExactMatchStrategy()
368 |         self.semantic_strategy = SemanticMatchStrategy()
369 |         
370 |     def generate_key(self, request: Dict[str, Any]) -> str:
371 |         """Generate key based on task type.
372 |         
373 |         Args:
374 |             request: Request parameters
375 |             
376 |         Returns:
377 |             Cache key
378 |         """
379 |         task_type = self._detect_task_type(request)
380 |         
381 |         # Use exact matching for most tasks
382 |         key = self.exact_strategy.generate_key(request)
383 |         
384 |         # Add task type to the key
385 |         return f"{task_type}:{key}"
386 |         
387 |     def should_cache(self, request: Dict[str, Any], response: Any) -> bool:
388 |         """Determine if a response should be cached based on task type.
389 |         
390 |         Args:
391 |             request: Request parameters
392 |             response: Response data
393 |             
394 |         Returns:
395 |             True if the response should be cached
396 |         """
397 |         task_type = self._detect_task_type(request)
398 |         
399 |         # Always cache these task types
400 |         always_cache_tasks = {
401 |             "summarization", "information_extraction", "classification",
402 |             "translation", "rewriting", "question_answering"
403 |         }
404 |         
405 |         if task_type in always_cache_tasks:
406 |             return True
407 |             
408 |         # Use base strategy for other tasks
409 |         return self.exact_strategy.should_cache(request, response)
410 |         
411 |     def get_ttl(self, request: Dict[str, Any], response: Any) -> Optional[int]:
412 |         """Get TTL based on task type.
413 |         
414 |         Args:
415 |             request: Request parameters
416 |             response: Response data
417 |             
418 |         Returns:
419 |             TTL in seconds
420 |         """
421 |         task_type = self._detect_task_type(request)
422 |         
423 |         # Task-specific TTLs
424 |         ttl_map = {
425 |             "summarization": 30 * 24 * 60 * 60,  # 30 days
426 |             "information_extraction": 14 * 24 * 60 * 60,  # 14 days
427 |             "extraction": 14 * 24 * 60 * 60,  # 14 days - Add explicit mapping for extraction
428 |             "classification": 30 * 24 * 60 * 60,  # 30 days
429 |             "translation": 60 * 24 * 60 * 60,  # 60 days
430 |             "creative_writing": 1 * 24 * 60 * 60,  # 1 day
431 |             "chat": 1 * 24 * 60 * 60,  # 1 day
432 |         }
433 |         
434 |         if task_type in ttl_map:
435 |             return ttl_map[task_type]
436 |             
437 |         # Default to base strategy
438 |         return self.exact_strategy.get_ttl(request, response)
439 |         
440 |     def _detect_task_type(self, request: Dict[str, Any]) -> str:
441 |         """Detect the task type from the request using multiple techniques.
442 |         
443 |         This function uses a combination of:
444 |         1. Explicit tags in the request
445 |         2. Request structure analysis
446 |         3. NLP-based content analysis
447 |         4. Model and parameter hints
448 |         
449 |         Args:
450 |             request: Request parameters
451 |             
452 |         Returns:
453 |             Task type identifier
454 |         """
455 |         # 1. Check for explicit task type
456 |         if "task_type" in request:
457 |             return request["task_type"]
458 |             
459 |         # 2. Check for task-specific parameters
460 |         if "format" in request and request["format"] in ["json", "structured", "extraction"]:
461 |             return "information_extraction"
462 |             
463 |         if "max_tokens" in request and request.get("max_tokens", 0) < 100:
464 |             return "classification"  # Short responses often indicate classification
465 |         
466 |         # 3. Check system prompt for clues
467 |         system_prompt = None
468 |         if "system" in request:
469 |             system_prompt = request["system"]
470 |         elif "messages" in request:
471 |             for msg in request.get("messages", []):
472 |                 if msg.get("role") == "system":
473 |                     system_prompt = msg.get("content", "")
474 |                     break
475 |         
476 |         if system_prompt:
477 |             system_lower = system_prompt.lower()
478 |             # Check system prompt for task indicators
479 |             if any(x in system_lower for x in ["summarize", "summary", "summarization", "summarize the following"]):
480 |                 return "summarization"
481 |             
482 |             if any(x in system_lower for x in ["extract", "extraction", "identify all", "parse"]):
483 |                 return "information_extraction"
484 |                 
485 |             if any(x in system_lower for x in ["classify", "categorize", "determine the type"]):
486 |                 return "classification"
487 |                 
488 |             if any(x in system_lower for x in ["translate", "translation", "convert to"]):
489 |                 return "translation"
490 |                 
491 |             if any(x in system_lower for x in ["creative", "write a story", "compose", "generate a poem"]):
492 |                 return "creative_writing"
493 |                 
494 |             if any(x in system_lower for x in ["reasoning", "solve", "think step by step"]):
495 |                 return "reasoning"
496 |                 
497 |             if any(x in system_lower for x in ["chat", "conversation", "assistant", "helpful"]):
498 |                 return "chat"
499 |         
500 |         # 4. Extract text for content analysis
501 |         text = self.semantic_strategy._extract_text(request)
502 |         if not text:
503 |             return "unknown"
504 |         
505 |         # 5. Sophisticated content analysis
506 |         import re
507 |         
508 |         # Normalize text
509 |         text_lower = text.lower()
510 |         
511 |         # Task-specific pattern matching
512 |         task_patterns = {
513 |             "summarization": [
514 |                 r"\bsummarize\b", r"\bsummary\b", r"\btldr\b", r"\bcondense\b",
515 |                 r"(provide|give|create).{1,20}(summary|overview)",
516 |                 r"(summarize|summarise).{1,30}(text|document|paragraph|content|article)",
517 |                 r"(key|main|important).{1,20}(points|ideas|concepts)"
518 |             ],
519 |             
520 |             "information_extraction": [
521 |                 r"\bextract\b", r"\bidentify\b", r"\bfind all\b", r"\blist the\b", 
522 |                 r"(extract|pull out|identify).{1,30}(information|data|details)",
523 |                 r"(list|enumerate).{1,20}(all|the)",
524 |                 r"(find|extract).{1,30}(names|entities|locations|dates)"
525 |             ],
526 |             
527 |             "classification": [
528 |                 r"\bclassify\b", r"\bcategorize\b", r"\bgroup\b", r"\blabel\b",
529 |                 r"what (type|kind|category|class)",
530 |                 r"(determine|identify).{1,20}(type|class|category)",
531 |                 r"(which|what).{1,20}(category|group|type|class)"
532 |             ],
533 |             
534 |             "translation": [
535 |                 r"\btranslate\b", r"\btranslation\b", 
536 |                 r"(translate|convert).{1,30}(into|to|from).{1,20}(language|english|spanish|french)",
537 |                 r"(in|into).{1,10}(spanish|french|german|italian|japanese|chinese|korean)"
538 |             ],
539 |             
540 |             "creative_writing": [
541 |                 r"\bwrite\b", r"\bcreate\b", r"\bgenerate\b", r"\bcompose\b",
542 |                 r"(write|create|generate|compose).{1,30}(story|poem|essay|article|blog post)",
543 |                 r"(creative|fiction|imaginative).{1,20}(writing|text|content)",
544 |                 r"(story|narrative|tale|fiction)"
545 |             ],
546 |             
547 |             "question_answering": [
548 |                 r"(why|how|what|who|where|when).{1,30}\?",
549 |                 r"(explain|describe|define).{1,40}",
550 |                 r"(question|answer|respond)",
551 |                 r"(can you|could you|please).{1,30}(tell me|explain|describe)"
552 |             ],
553 |             
554 |             "reasoning": [
555 |                 r"(solve|calculate|compute|reason|deduce)",
556 |                 r"(step by step|detailed|reasoning|rationale)",
557 |                 r"(problem|puzzle|challenge|riddle|question)",
558 |                 r"(math|mathematical|logic|logical)"
559 |             ],
560 |             
561 |             "coding": [
562 |                 r"(code|function|program|script|algorithm)",
563 |                 r"(write|create|generate|implement).{1,30}(code|function|class|method)",
564 |                 r"(python|javascript|java|c\+\+|ruby|go|rust|typescript)"
565 |             ],
566 |             
567 |             "chat": [
568 |                 r"(chat|conversation|discuss|talk)",
569 |                 r"(assist|help).{1,20}(me|with|in)",
570 |                 r"(you are|as a|you're).{1,20}(assistant|helper)"
571 |             ]
572 |         }
573 |         
574 |         # Score each task type
575 |         task_scores = {}
576 |         
577 |         for task, patterns in task_patterns.items():
578 |             score = 0
579 |             for pattern in patterns:
580 |                 matches = re.findall(pattern, text_lower)
581 |                 score += len(matches) * 2  # Each match adds 2 points
582 |                 
583 |                 # Award bonus point for match in the first 50 chars (likely the main request)
584 |                 if re.search(pattern, text_lower[:50]):
585 |                     score += 3
586 |             
587 |             # Check for indicators in the first 100 characters (usually the intent)
588 |             first_100 = text_lower[:100]
589 |             if any(re.search(pattern, first_100) for pattern in patterns):
590 |                 score += 5
591 |                 
592 |             task_scores[task] = score
593 |         
594 |         # 6. Check for additional structural clues
595 |         
596 |         # If JSON output requested, likely extraction
597 |         if "json" in text_lower or "structured" in text_lower:
598 |             task_scores["information_extraction"] += 5
599 |         
600 |         # If it contains code blocks or technical terms, likely coding
601 |         if "```" in text or any(lang in text_lower for lang in ["python", "javascript", "java", "html", "css"]):
602 |             task_scores["coding"] += 5
603 |         
604 |         # Check for question mark presence and density
605 |         question_marks = text.count("?")
606 |         if question_marks > 0:
607 |             # Multiple questions indicate question answering
608 |             task_scores["question_answering"] += min(question_marks * 2, 10)
609 |         
610 |         # 7. Check model hints
611 |         model = request.get("model", "")
612 |         
613 |         # Some models are specialized for specific tasks
614 |         if "instruct" in model.lower():
615 |             task_scores["question_answering"] += 2
616 |             
617 |         if "chat" in model.lower():
618 |             task_scores["chat"] += 2
619 |             
620 |         if "code" in model.lower() or "davinci-code" in model.lower():
621 |             task_scores["coding"] += 5
622 |         
623 |         # 8. Determine highest scoring task
624 |         if not task_scores:
625 |             return "general"
626 |             
627 |         # Get task with highest score
628 |         best_task = max(task_scores.items(), key=lambda x: x[1])
629 |         
630 |         # If score is too low, default to general
631 |         if best_task[1] < 3:
632 |             return "general"
633 |             
634 |         return best_task[0]
635 | 
636 | 
637 | # Factory function
638 | def get_strategy(strategy_name: str) -> CacheStrategy:
639 |     """Get a cache strategy by name.
640 |     
641 |     Args:
642 |         strategy_name: Strategy name
643 |         
644 |     Returns:
645 |         CacheStrategy instance
646 |         
647 |     Raises:
648 |         ValueError: If strategy name is invalid
649 |     """
650 |     strategies = {
651 |         "exact": ExactMatchStrategy(),
652 |         "semantic": SemanticMatchStrategy(),
653 |         "task": TaskBasedStrategy(),
654 |     }
655 |     
656 |     if strategy_name not in strategies:
657 |         raise ValueError(
658 |             f"Invalid cache strategy: {strategy_name}. " +
659 |             f"Valid options: {', '.join(strategies.keys())}"
660 |         )
661 |         
662 |     return strategies[strategy_name]
```

--------------------------------------------------------------------------------
/example_structured_tool.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Example of a well-structured MCP tool with best practices.
  3 | 
  4 | This module demonstrates how to create a comprehensive MCP tool
  5 | that implements all the best practices for LLM usability:
  6 | - Tool annotations for better decision-making
  7 | - Standardized error handling
  8 | - Input validation
  9 | - Detailed documentation with examples
 10 | - Structured outputs with consistent formats
 11 | """
 12 | import time
 13 | import uuid
 14 | from typing import Any, Dict, Optional
 15 | 
 16 | # ---------------------------------
 17 | from error_handling import non_empty_string, validate_inputs, with_error_handling
 18 | from tool_annotations import ToolAnnotations
 19 | 
 20 | # --- Import RAG tools/services --- 
 21 | # Assuming direct function import for simplicity in example
 22 | # In a real structured app, might use dependency injection or service locators
 23 | from ultimate_mcp_server.tools.rag import (
 24 |     add_documents,
 25 |     create_knowledge_base,
 26 |     delete_knowledge_base,
 27 |     retrieve_context,
 28 | )
 29 | 
 30 | # --- Define KB Name for Demo --- 
 31 | DEMO_KB_NAME = f"example_tool_kb_{uuid.uuid4().hex[:8]}" 
 32 | # ------------------------------
 33 | 
 34 | # --- Sample Data (moved to top) ---
 35 | # This data will now be *added* to the KB during setup
 36 | SAMPLE_DOCUMENTS = [
 37 |     {
 38 |         "id": "kb-001",
 39 |         "title": "Introduction to Climate Change",
 40 |         "text": "An overview of climate change causes and effects.",
 41 |         "type": "article",
 42 |         "level": "beginner",
 43 |         "date": "2023-01-15",
 44 |         "score_for_ranking": 0.95 # Keep score for potential sorting demonstration?
 45 |     },
 46 |     {
 47 |         "id": "kb-002",
 48 |         "title": "Machine Learning Fundamentals",
 49 |         "text": "Learn the basics of machine learning algorithms.",
 50 |         "type": "tutorial",
 51 |         "level": "beginner",
 52 |         "date": "2023-02-20",
 53 |         "score_for_ranking": 0.92
 54 |     },
 55 |     {
 56 |         "id": "kb-003",
 57 |         "title": "Advanced Neural Networks",
 58 |         "text": "Deep dive into neural network architectures.",
 59 |         "type": "tutorial",
 60 |         "level": "advanced",
 61 |         "date": "2023-03-10",
 62 |         "score_for_ranking": 0.88
 63 |     },
 64 |     {
 65 |         "id": "kb-004",
 66 |         "title": "Climate Policy FAQ",
 67 |         "text": "Frequently asked questions about climate policies.",
 68 |         "type": "faq",
 69 |         "level": "intermediate",
 70 |         "date": "2023-04-05",
 71 |         "score_for_ranking": 0.82
 72 |     },
 73 |     {
 74 |         "id": "kb-005",
 75 |         "title": "Python Reference for Data Science",
 76 |         "text": "Reference guide for Python in data science applications.",
 77 |         "type": "reference",
 78 |         "level": "intermediate",
 79 |         "date": "2023-05-12",
 80 |         "score_for_ranking": 0.78
 81 |     }
 82 | ]
 83 | # -------------------------------------
 84 | 
 85 | class ExampleTool:
 86 |     """
 87 |     Example implementation of a well-structured MCP tool with best practices.
 88 |     
 89 |     The ExampleTool class serves as a reference implementation that demonstrates how to properly
 90 |     design and implement tools for the Model Control Protocol (MCP) ecosystem. It showcases
 91 |     a real-world RAG (Retrieval-Augmented Generation) tool that interacts with a knowledge base.
 92 |     
 93 |     Key design features:
 94 |     - Proper tool registration with the MCP server
 95 |     - Comprehensive schema definitions for inputs and outputs
 96 |     - Clear tool descriptions with usage guidance for LLMs
 97 |     - Tool annotations that provide semantic hints about tool behavior
 98 |     - Consistent error handling and input validation
 99 |     - Well-structured implementation with clean separation of concerns
100 |     
101 |     The class implements a search_knowledge_base tool that allows querying a vector store
102 |     containing sample documents. The implementation demonstrates how to:
103 |     - Process input parameters and apply validation
104 |     - Interact with external services (the knowledge base)
105 |     - Format response data in a consistent structure
106 |     - Handle errors gracefully with meaningful error messages
107 |     - Add appropriate metadata to help LLMs use the tool effectively
108 |     
109 |     This implementation is intended as an educational reference for developers creating
110 |     their own MCP tools, showing patterns and practices that lead to tools that are
111 |     easily discoverable, usable, and maintainable.
112 |     
113 |     Usage:
114 |         ```python
115 |         # Initialize the MCP server
116 |         server = MCPServer()
117 |         
118 |         # Create an instance (automatically registers all tools)
119 |         tool = ExampleTool(server)
120 |         
121 |         # The tool is now available for use through the server
122 |         # After ensuring the knowledge base is set up
123 |         await setup_demo_kb()
124 |         ```
125 |     """
126 |     
127 |     def __init__(self, mcp_server):
128 |         """
129 |         Initialize an ExampleTool instance and register its tools with the MCP server.
130 |         
131 |         This constructor creates a new instance of the ExampleTool class and automatically
132 |         registers all tools implemented by this class with the provided MCP server. It
133 |         serves as the entry point for integrating the example tools into an MCP server.
134 |         
135 |         The initialization process:
136 |         1. Stores a reference to the provided MCP server instance
137 |         2. Calls the _register_tools method to define and register all tools
138 |         3. Establishes all necessary connections to the underlying knowledge base
139 |         
140 |         After initialization, the tools become available for use through the MCP server's
141 |         tool invocation interface. No further setup is required for the tools themselves,
142 |         although the underlying knowledge base (see setup_demo_kb) must be initialized
143 |         before the tools can be used effectively.
144 |         
145 |         Args:
146 |             mcp_server: An instance of the MCP server to register tools with. This must be
147 |                        a fully initialized server object with a functional tool registration
148 |                        system available through its 'tool' decorator.
149 |                        
150 |         Returns:
151 |             None
152 |             
153 |         Notes:
154 |             - Tool registration happens immediately during initialization
155 |             - Tool usage requires the demo knowledge base to be set up separately
156 |             - The server instance is stored but not modified beyond tool registration
157 |         """
158 |         self.mcp = mcp_server
159 |         self._register_tools()
160 |         
161 |     def _register_tools(self):
162 |         """
163 |         Register all tools provided by this class with the MCP server.
164 |         
165 |         This private method is called during initialization and handles the registration
166 |         of all tools implemented by the ExampleTool class. It defines and registers 
167 |         individual tools with appropriate metadata, schemas, and implementations.
168 |         
169 |         For each tool, the method:
170 |         1. Creates tool annotations with appropriate behavioral hints
171 |         2. Defines the tool's description, input schema, and output schema
172 |         3. Implements the tool function with error handling and input validation
173 |         4. Registers the complete tool with the MCP server
174 |         
175 |         The primary tool defined here is:
176 |         - search_knowledge_base: Searches the demo knowledge base for relevant documents
177 |           based on user queries and optional filters
178 |         
179 |         Each tool is decorated with:
180 |         - @self.mcp.tool: Registers the function as an MCP tool
181 |         - @with_error_handling: Provides standardized exception handling
182 |         - @validate_inputs: Validates required parameters before execution
183 |         
184 |         The detailed tool definitions include human-readable descriptions, parameter
185 |         schemas with comprehensive type information, and examples demonstrating proper
186 |         tool usage for LLMs.
187 |         
188 |         Returns:
189 |             None - Tools are registered as a side effect
190 |         """
191 |         
192 |         # Create tool annotations with appropriate hints
193 |         search_annotations = ToolAnnotations(
194 |             read_only_hint=True,             # This tool doesn't modify anything
195 |             destructive_hint=False,          # No destructive operations
196 |             idempotent_hint=True,            # Can be called repeatedly with same results
197 |             open_world_hint=True,            # Interacts with external data sources
198 |             audience=["assistant"],          # Intended for the LLM to use
199 |             priority=0.8,                    # High priority tool
200 |             title="Search Knowledge Base",   # Human-readable title
201 |             examples=[
202 |                 {
203 |                     "name": "Basic search",
204 |                     "description": "Search for information about a topic",
205 |                     "input": {"query": "climate change", "filters": {"type": "article"}},
206 |                     "output": {
207 |                         "results": [
208 |                             {"title": "Climate Change Basics", "score": 0.92},
209 |                             {"title": "Effects of Global Warming", "score": 0.87}
210 |                         ],
211 |                         "total_matches": 2,
212 |                         "search_time_ms": 105
213 |                     }
214 |                 },
215 |                 {
216 |                     "name": "Advanced search",
217 |                     "description": "Search with multiple filters and limits",
218 |                     "input": {
219 |                         "query": "machine learning",
220 |                         "filters": {"type": "tutorial", "level": "beginner"},
221 |                         "limit": 1
222 |                     },
223 |                     "output": {
224 |                         "results": [
225 |                             {"title": "Introduction to Machine Learning", "score": 0.95}
226 |                         ],
227 |                         "total_matches": 1,
228 |                         "search_time_ms": 87
229 |                     }
230 |                 }
231 |             ]
232 |         )
233 |         
234 |         @self.mcp.tool(
235 |             name="search_knowledge_base",
236 |             description=(
237 |                 "Search for information in the knowledge base using keywords and filters.\n\n"
238 |                 "This tool is ideal for finding relevant information on specific topics. "
239 |                 "It supports filtering by content type, date ranges, and other metadata. "
240 |                 "The tool returns a list of matching results sorted by relevance score.\n\n"
241 |                 "WHEN TO USE:\n"
242 |                 "- When you need to find specific information on a topic\n"
243 |                 "- When you want to discover relevant articles or documentation\n"
244 |                 "- Before generating content to ensure accuracy\n\n"
245 |                 "WHEN NOT TO USE:\n"
246 |                 "- When you need to modify or create content (use content_* tools instead)\n"
247 |                 "- When you need very recent information that might not be in the knowledge base\n"
248 |                 "- When you need exact answers to questions (use qa_* tools instead)"
249 |             ),
250 |             annotations=search_annotations.to_dict(),
251 |             input_schema={
252 |                 "type": "object",
253 |                 "properties": {
254 |                     "query": {
255 |                         "type": "string",
256 |                         "description": "Search query (required)"
257 |                     },
258 |                     "filters": {
259 |                         "type": "object",
260 |                         "description": "Optional filters to narrow results",
261 |                         "properties": {
262 |                             "type": {
263 |                                 "type": "string",
264 |                                 "enum": ["article", "tutorial", "reference", "faq"],
265 |                                 "description": "Content type filter"
266 |                             },
267 |                             "level": {
268 |                                 "type": "string",
269 |                                 "enum": ["beginner", "intermediate", "advanced"],
270 |                                 "description": "Difficulty level filter"
271 |                             },
272 |                             "date_after": {
273 |                                 "type": "string",
274 |                                 "format": "date",
275 |                                 "description": "Only include content after this date (YYYY-MM-DD)"
276 |                             }
277 |                         }
278 |                     },
279 |                     "limit": {
280 |                         "type": "integer",
281 |                         "minimum": 1,
282 |                         "maximum": 20,
283 |                         "default": 5,
284 |                         "description": "Maximum number of results to return (1-20, default 5)"
285 |                     }
286 |                 },
287 |                 "required": ["query"]
288 |             },
289 |             output_schema={
290 |                 "type": "object",
291 |                 "properties": {
292 |                     "results": {
293 |                         "type": "array",
294 |                         "items": {
295 |                             "type": "object",
296 |                             "properties": {
297 |                                 "id": {"type": "string"},
298 |                                 "title": {"type": "string"},
299 |                                 "summary": {"type": "string"},
300 |                                 "type": {"type": "string"},
301 |                                 "date": {"type": "string", "format": "date"},
302 |                                 "score": {"type": "number"}
303 |                             }
304 |                         }
305 |                     },
306 |                     "total_matches": {"type": "integer"},
307 |                     "search_time_ms": {"type": "integer"}
308 |                 }
309 |             }
310 |         )
311 |         @with_error_handling
312 |         @validate_inputs(query=non_empty_string)
313 |         async def search_knowledge_base(
314 |             query: str,
315 |             filters: Optional[Dict[str, Any]] = None,
316 |             limit: int = 5,
317 |             ctx=None
318 |         ) -> Dict[str, Any]:
319 |             """
320 |             Search for information in the knowledge base using keywords and filters.
321 |             
322 |             This tool is ideal for finding relevant information on specific topics.
323 |             It supports filtering by content type, date ranges, and other metadata.
324 |             
325 |             Args:
326 |                 query: Search query string (required)
327 |                 filters: Optional filters to narrow results
328 |                     - type: Content type filter (article, tutorial, reference, faq)
329 |                     - level: Difficulty level filter (beginner, intermediate, advanced)
330 |                     - date_after: Only include content after this date (YYYY-MM-DD)
331 |                 limit: Maximum number of results to return (1-20, default 5)
332 |                 ctx: Context object passed by the MCP server
333 |                 
334 |             Returns:
335 |                 Dictionary containing:
336 |                 - results: List of retrieved document chunks with metadata and scores.
337 |                 - count: Number of results returned (respecting limit).
338 |                 - retrieval_time: Time taken for retrieval in seconds.
339 |                 
340 |             Examples:
341 |                 Basic search:
342 |                   search_knowledge_base(query="climate change")
343 |                   
344 |                 Filtered search:
345 |                   search_knowledge_base(
346 |                     query="machine learning", 
347 |                     filters={"type": "tutorial", "level": "beginner"},
348 |                     limit=3
349 |                   )
350 |             """
351 |             # Start timing
352 |             start_time = time.time()
353 |             
354 |             # Convert simple filters to ChromaDB compatible format if needed
355 |             # The retrieve_context tool might already handle this, depending on its implementation.
356 |             # For simplicity, we pass the filters dict directly.
357 |             metadata_filter = filters # Pass filters directly
358 |             
359 |             # Ensure limit is positive
360 |             limit = max(1, limit)
361 |             
362 |             try:
363 |                 # Call the actual retrieve_context tool
364 |                 # Ensure DEMO_KB_NAME is defined appropriately
365 |                 retrieval_result = await retrieve_context(
366 |                     knowledge_base_name=DEMO_KB_NAME,
367 |                     query=query,
368 |                     top_k=limit,
369 |                     metadata_filter=metadata_filter
370 |                     # Add other relevant params like min_score if needed
371 |                 )
372 |                 
373 |                 # Return formatted results
374 |                 # The retrieve_context tool already returns a dict with 'success', 'results', etc.
375 |                 # We can return it directly or reformat if needed.
376 |                 if retrieval_result.get("success"):
377 |                     return {
378 |                         "results": retrieval_result.get("results", []),
379 |                         "count": len(retrieval_result.get("results", [])),
380 |                         "retrieval_time": retrieval_result.get("retrieval_time", time.time() - start_time)
381 |                     }
382 |                 else:
383 |                     # Propagate the error from retrieve_context
384 |                     return {
385 |                         "error": retrieval_result.get("message", "Retrieval failed"),
386 |                         "results": [],
387 |                         "count": 0,
388 |                         "retrieval_time": time.time() - start_time
389 |                     }
390 |                 
391 |             except Exception as e:
392 |                 # Log the error (in a real implementation)
393 |                 print(f"Search error: {str(e)}")
394 |                 
395 |                 # Return error response
396 |                 return {"error": f"Search failed: {str(e)}"}
397 | 
398 | # --- Added Setup/Teardown for Demo KB ---
399 | async def setup_demo_kb():
400 |     """
401 |     Creates and populates the demo knowledge base with sample documents.
402 |     
403 |     This function handles the initialization of the demo knowledge base used by
404 |     the example tools. It performs the following operations in sequence:
405 |     1. Creates a new knowledge base with the name defined in DEMO_KB_NAME
406 |     2. Extracts documents, metadata, and IDs from the SAMPLE_DOCUMENTS constant
407 |     3. Adds the extracted information to the newly created knowledge base
408 |     
409 |     The knowledge base is created with overwrite=True, which means any existing
410 |     knowledge base with the same name will be deleted and recreated. This ensures
411 |     a clean starting state for the demo.
412 |     
413 |     Each document in the sample data is structured with:
414 |     - id: Unique identifier for the document
415 |     - title: Document title
416 |     - text: The actual document content to be vectorized
417 |     - type: Document category (article, tutorial, reference, faq)
418 |     - level: Difficulty level (beginner, intermediate, advanced)
419 |     - date: Publication date in YYYY-MM-DD format
420 |     - score_for_ranking: A number between 0-1 used for demonstration purposes
421 |     
422 |     The function logs its progress to stdout and raises any exceptions it encounters,
423 |     allowing the caller to handle failures appropriately.
424 |     
425 |     Returns:
426 |         None
427 |         
428 |     Raises:
429 |         Exception: If any step in the setup process fails. The original exception is
430 |                  preserved and propagated with context information.
431 |     
432 |     Usage:
433 |         await setup_demo_kb()  # Must be called in an async context
434 |     """
435 |     print(f"Setting up demo knowledge base: {DEMO_KB_NAME}...")
436 |     try:
437 |         await create_knowledge_base(name=DEMO_KB_NAME, overwrite=True)
438 |         texts_to_add = [doc["text"] for doc in SAMPLE_DOCUMENTS]
439 |         metadatas_to_add = [{k:v for k,v in doc.items() if k != 'text'} for doc in SAMPLE_DOCUMENTS]
440 |         ids_to_add = [doc["id"] for doc in SAMPLE_DOCUMENTS]
441 |         await add_documents(
442 |             knowledge_base_name=DEMO_KB_NAME,
443 |             documents=texts_to_add,
444 |             metadatas=metadatas_to_add,
445 |             ids=ids_to_add
446 |         )
447 |         print("Demo knowledge base setup complete.")
448 |     except Exception as e:
449 |         print(f"Error setting up demo KB: {e}")
450 |         raise
451 | 
452 | async def teardown_demo_kb():
453 |     """
454 |     Deletes the demo knowledge base and cleans up associated resources.
455 |     
456 |     This function is responsible for properly disposing of the demo knowledge base
457 |     after the examples have been run. It ensures that temporary resources created
458 |     for demonstration purposes don't persist unnecessarily. Specifically, it:
459 |     
460 |     1. Attempts to delete the knowledge base identified by DEMO_KB_NAME
461 |     2. Logs the success or failure of the operation to stdout
462 |     3. Suppresses any exceptions to prevent cleanup errors from propagating
463 |     
464 |     Unlike setup_demo_kb(), this function does not raise exceptions for failures,
465 |     as cleanup errors should not prevent the application from continuing or shutting
466 |     down normally. Instead, errors are logged but suppressed.
467 |     
468 |     The function can be safely called multiple times or even if the knowledge base
469 |     doesn't exist (the underlying delete_knowledge_base function should handle such cases).
470 |     
471 |     This function should be called during application shutdown or after example
472 |     tools are no longer needed, typically in one of these contexts:
473 |     - Server shutdown hooks/lifecycle events
474 |     - After example demonstration is complete
475 |     - During application cleanup phases
476 |     
477 |     Returns:
478 |         None
479 |     
480 |     Usage:
481 |         await teardown_demo_kb()  # Must be called in an async context
482 |         
483 |     Note:
484 |         In production systems, more robust cleanup might involve tracking created
485 |         resources and ensuring proper disposal even after unexpected termination.
486 |     """
487 |     print(f"Cleaning up demo knowledge base: {DEMO_KB_NAME}...")
488 |     try:
489 |         await delete_knowledge_base(name=DEMO_KB_NAME)
490 |         print("Demo knowledge base cleaned up.")
491 |     except Exception as e:
492 |         print(f"Error cleaning up demo KB: {e}")
493 | # -----------------------------------------
494 | 
495 | def register_example_tools(mcp_server):
496 |     """
497 |     Register all example tools with the MCP server and set up required resources.
498 |     
499 |     This function serves as the main entry point for integrating the example tools
500 |     into an MCP server instance. It instantiates the ExampleTool class, which registers 
501 |     all individual tools with the provided server. Additionally, it handles concerns
502 |     related to the setup and teardown of resources required by the example tools.
503 |     
504 |     Key responsibilities:
505 |     1. Creates an instance of ExampleTool, which registers all example tools with the server
506 |     2. Manages the initialization of required resources (demo knowledge base)
507 |     3. Documents integration concerns and known limitations
508 |     
509 |     Integration notes:
510 |     - The demo knowledge base (DEMO_KB_NAME) must be set up before tools are used
511 |     - In a production environment, the async setup should be handled as part of the
512 |       server lifecycle (e.g., using lifespan or startup events) rather than directly here
513 |     - Current implementation leaves knowledge base setup as a separate step due to
514 |       challenges with mixing sync/async code in the registration process
515 |     
516 |     Args:
517 |         mcp_server: An instance of the MCP server to register tools with. This should be
518 |                    a fully initialized server object with a working tool registration system.
519 |                    
520 |     Returns:
521 |         None
522 |         
523 |     Usage:
524 |         ```python
525 |         # During server initialization:
526 |         server = MCPServer()
527 |         register_example_tools(server)
528 |         
529 |         # Remember to set up the knowledge base separately (due to async requirements):
530 |         await setup_demo_kb()  # Before using the tools
531 |         
532 |         # And clean up when done:
533 |         await teardown_demo_kb()  # After tools are no longer needed
534 |         ```
535 |     
536 |     Known limitations:
537 |     - Cannot perform async setup directly in this function due to sync/async boundary issues
538 |     - Knowledge base setup must be handled separately as an async operation
539 |     - Resource cleanup must also be manually triggered as an async operation
540 |     """
541 |     # Perform setup when tools are registered
542 |     # Note: In a real server, setup/teardown might be handled differently (e.g., lifespan)
543 |     # Running async setup directly here might block if called synchronously.
544 |     # A better approach might be to trigger setup after server start.
545 |     # For this example modification, we assume it can be awaited here or handled externally.
546 |     # asyncio.run(setup_demo_kb()) # This would block if register_example_tools is sync
547 |     # TODO: Need a way to run async setup/teardown non-blockingly or during server lifespan.
548 |     # Skipping async setup call here due to potential blocking issues.
549 |     # KB needs to be set up *before* the tool is called in a demo.
550 |     
551 |     ExampleTool(mcp_server) 
```

--------------------------------------------------------------------------------
/examples/single_shot_synthesis_demo.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | Single-Shot Synthesis Demo - Demonstrates the single_shot_synthesis tool
  4 | 
  5 | This script shows how to:
  6 | 1. Define a prompt and a set of "expert" models.
  7 | 2. Specify a "synthesizer" model.
  8 | 3. Call the single_shot_synthesis tool to get a fused response.
  9 | 4. Display the individual expert responses and the final synthesized output.
 10 | 
 11 | Usage:
 12 |   python examples/single_shot_synthesis_demo.py [--prompt "Your question here"] [--type text|code]
 13 | 
 14 | Options:
 15 |   --prompt TEXT    The prompt/question for the models.
 16 |   --name TEXT      A descriptive name for the synthesis task.
 17 |   --type TYPE      Type of synthesis: 'text' or 'code' (default: text).
 18 |   --expert-models MODEL [MODEL...]  List of expert model IDs.
 19 |   --synthesizer-model MODEL         Model ID for the synthesizer.
 20 | """
 21 | 
 22 | import argparse
 23 | import asyncio
 24 | import json
 25 | import sys
 26 | from pathlib import Path
 27 | from typing import Any, Dict, List, Optional
 28 | 
 29 | sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 30 | 
 31 | from rich import box
 32 | from rich.console import Group
 33 | from rich.markup import escape
 34 | from rich.panel import Panel
 35 | from rich.rule import Rule
 36 | from rich.syntax import Syntax
 37 | from rich.table import Table
 38 | 
 39 | from ultimate_mcp_server.core.server import Gateway
 40 | from ultimate_mcp_server.exceptions import ProviderError, ToolError  # For error handling
 41 | 
 42 | # from ultimate_mcp_server.tools.single_shot_synthesis import single_shot_synthesis # Called via gateway
 43 | from ultimate_mcp_server.utils import get_logger, process_mcp_result
 44 | from ultimate_mcp_server.utils.display import CostTracker  # Reusing CostTracker
 45 | from ultimate_mcp_server.utils.logging.console import console
 46 | 
 47 | logger = get_logger("example.single_shot_synthesis")
 48 | gateway: Optional[Gateway] = None
 49 | 
 50 | # --- Configuration ---
 51 | DEFAULT_EXPERT_MODEL_CONFIGS_SSS: List[Dict[str, Any]] = [  # SSS suffix for SingleShotSynthesis
 52 |     {"model_id": "openai/gpt-4o-mini", "temperature": 0.7},
 53 |     {"model_id": "anthropic/claude-3-5-haiku-20241022", "temperature": 0.65},
 54 |     # {"model_id": "google/gemini-1.5-flash-latest", "temperature": 0.7},
 55 | ]
 56 | 
 57 | DEFAULT_SYNTHESIZER_MODEL_CONFIG_SSS: Dict[str, Any] = {
 58 |     "model_id": "anthropic/claude-3-7-sonnet-20250219",
 59 |     "temperature": 0.5,
 60 |     "max_tokens": 3000,  # Allow more tokens for comprehensive synthesis
 61 | }
 62 | # Fallback if preferred synthesizer isn't available
 63 | FALLBACK_SYNTHESIZER_MODEL_CONFIG_SSS: Dict[str, Any] = {
 64 |     "model_id": "anthropic/claude-3-7-sonnet-20250219", # Fallback to Sonnet 3.5
 65 |     "temperature": 0.5,
 66 |     "max_tokens": 3000,
 67 | }
 68 | 
 69 | DEFAULT_SSS_PROMPT = "Compare and contrast the query optimization strategies used in PostgreSQL versus MySQL for complex analytical queries involving multiple joins and aggregations. Highlight key differences in their execution planners and indexing techniques."
 70 | DEFAULT_SSS_TASK_NAME = "DB Query Optimization Comparison"
 71 | DEFAULT_SSS_TYPE = "text"
 72 | 
 73 | 
 74 | def parse_arguments_sss():
 75 |     parser = argparse.ArgumentParser(description="Run a single-shot multi-model synthesis demo")
 76 |     parser.add_argument("--prompt", type=str, default=DEFAULT_SSS_PROMPT)
 77 |     parser.add_argument("--name", type=str, default=DEFAULT_SSS_TASK_NAME)
 78 |     parser.add_argument("--type", type=str, default=DEFAULT_SSS_TYPE, choices=["text", "code"])
 79 |     parser.add_argument(
 80 |         "--expert-models",
 81 |         type=str,
 82 |         nargs="+",
 83 |         default=[mc["model_id"] for mc in DEFAULT_EXPERT_MODEL_CONFIGS_SSS],
 84 |         help="List of expert model IDs.",
 85 |     )
 86 |     parser.add_argument(
 87 |         "--synthesizer-model",
 88 |         type=str,
 89 |         default=DEFAULT_SYNTHESIZER_MODEL_CONFIG_SSS["model_id"],
 90 |         help="Model ID for the synthesizer.",
 91 |     )
 92 |     return parser.parse_args()
 93 | 
 94 | 
 95 | async def setup_gateway_for_demo_sss():
 96 |     global gateway
 97 |     if gateway:
 98 |         return
 99 |     logger.info("Initializing gateway for single-shot synthesis demo...", emoji_key="rocket")
100 |     try:
101 |         gateway = Gateway(name="sss_demo_gateway", register_tools=True, load_all_tools=True)
102 |         if not gateway.providers:
103 |             await gateway._initialize_providers()
104 |     except Exception as e:
105 |         logger.critical(f"Failed to initialize Gateway: {e}", exc_info=True)
106 |         raise
107 | 
108 |     mcp_tools = await gateway.mcp.list_tools()
109 |     registered_tool_names = [t.name for t in mcp_tools]
110 |     if "single_shot_synthesis" not in registered_tool_names:
111 |         logger.error(
112 |             "Gateway initialized, but 'single_shot_synthesis' tool is missing!", emoji_key="error"
113 |         )
114 |         raise RuntimeError("Required 'single_shot_synthesis' tool not registered.")
115 |     logger.success(
116 |         "Gateway for demo initialized and synthesis tool verified.", emoji_key="heavy_check_mark"
117 |     )
118 | 
119 | 
120 | def display_single_shot_synthesis_results(
121 |     results_data: Dict[str, Any],
122 |     original_prompt: str, # Added to display the prompt
123 |     console_instance
124 | ):
125 |     """Displays the results from the single_shot_synthesis tool."""
126 |     console_instance.print(
127 |         Rule(
128 |             f"[bold magenta]Single-Shot Synthesis Task: {results_data.get('name', 'N/A')}[/bold magenta]"
129 |         )
130 |     )
131 |     
132 |     if not results_data or not isinstance(results_data, dict) or not results_data.get("request_id"):
133 |         console_instance.print(Panel("[bold red]No valid results data to display or essential fields missing.[/bold red]", border_style="red"))
134 |         if isinstance(results_data, dict) and results_data.get("error_message"):
135 |              console_instance.print(f"[bold red]Error in results data:[/bold red] {escape(results_data['error_message'])}")
136 |         return
137 | 
138 |     console_instance.print(f"Request ID: [cyan]{results_data.get('request_id')}[/cyan]")
139 |     status = results_data.get("status", "UNKNOWN")
140 |     status_color = (
141 |         "green" if status == "SUCCESS" else ("yellow" if status == "PARTIAL_SUCCESS" else "red")
142 |     )
143 |     console_instance.print(f"Status: [bold {status_color}]{status}[/bold {status_color}]")
144 | 
145 |     if results_data.get("error_message") and status in ["FAILED", "PARTIAL_SUCCESS"]:
146 |         console_instance.print(
147 |             Panel(f"[red]{escape(results_data.get('error_message'))}[/red]", title="[bold red]Error Message[/bold red]", border_style="red")
148 |         )
149 | 
150 |     storage_path = results_data.get("storage_path")
151 |     if storage_path:
152 |         console_instance.print(
153 |             f"Artifacts Storage: [blue underline]{escape(storage_path)}[/blue underline]"
154 |         )
155 |     
156 |     console_instance.print(Panel(escape(original_prompt), title="[bold]Original Prompt[/bold]", border_style="blue", expand=False))
157 | 
158 |     console_instance.print(Rule("[bold blue]Expert Model Responses[/bold blue]"))
159 |     expert_responses = results_data.get("expert_responses", [])
160 |     if expert_responses:
161 |         for i, resp_dict in enumerate(expert_responses):
162 |             model_id_display = resp_dict.get("model_id", "Unknown Model")
163 |             has_error = bool(resp_dict.get("error"))
164 |             status_icon = "❌" if has_error else "✔️"
165 |             panel_title = f"{status_icon} Expert {i + 1}: {model_id_display}"
166 |             current_border_style = "red" if has_error else "dim cyan"
167 |             if has_error:
168 |                 panel_title += " [bold red](Failed)[/bold red]"
169 | 
170 |             content_table = Table(box=None, show_header=False, padding=(0,1))
171 |             content_table.add_column(style="dim")
172 |             content_table.add_column()
173 | 
174 |             if resp_dict.get("error"):
175 |                 content_table.add_row("[bold red]Error[/bold red]", escape(resp_dict.get('error', '')))
176 | 
177 |             text_content = resp_dict.get("response_text")
178 |             # Assuming 'code' type experts are not used in this specific demo,
179 |             # but adding for completeness if structure changes.
180 |             # code_content = resp_dict.get("extracted_code")
181 |             # if code_content:
182 |             #     content_table.add_row("Extracted Code", Syntax(code_content, "python", theme="monokai", line_numbers=True, word_wrap=True))
183 | 
184 |             if text_content:
185 |                 content_table.add_row("Response Text", escape(text_content[:1000] + ('...' if len(text_content) > 1000 else '')))
186 |             elif not resp_dict.get("error"):
187 |                 content_table.add_row("Response Text", "[italic]No content from this expert.[/italic]")
188 | 
189 |             metrics = resp_dict.get("metrics", {})
190 |             cost = metrics.get("cost", 0.0)
191 |             api_latency = metrics.get("api_latency_ms", "N/A")
192 |             total_task_time = metrics.get("total_task_time_ms", "N/A")
193 |             input_tokens = metrics.get("input_tokens", "N/A")
194 |             output_tokens = metrics.get("output_tokens", "N/A")
195 |             
196 |             metrics_table = Table(box=box.ROUNDED, show_header=False, title="Metrics")
197 |             metrics_table.add_column(style="cyan")
198 |             metrics_table.add_column(style="white")
199 |             metrics_table.add_row("Cost", f"${cost:.6f}")
200 |             metrics_table.add_row("Input Tokens", str(input_tokens))
201 |             metrics_table.add_row("Output Tokens", str(output_tokens))
202 |             metrics_table.add_row("API Latency", f"{api_latency} ms")
203 |             metrics_table.add_row("Total Task Time", f"{total_task_time} ms")
204 |             if metrics.get("api_model_id_used") and metrics.get("api_model_id_used") != model_id_display:
205 |                  metrics_table.add_row("API Model Used", str(metrics.get("api_model_id_used")))
206 |             
207 |             main_panel_content = [content_table, metrics_table]
208 | 
209 |             console_instance.print(
210 |                 Panel(
211 |                     Group(*main_panel_content),
212 |                     title=f"[bold cyan]{panel_title}[/bold cyan]",
213 |                     border_style=current_border_style,
214 |                     expand=False,
215 |                 )
216 |             )
217 |     else:
218 |         console_instance.print("[italic]No expert responses available.[/italic]")
219 | 
220 |     console_instance.print(Rule("[bold green]Synthesized Response[/bold green]"))
221 |     
222 |     synthesizer_metrics = results_data.get("synthesizer_metrics", {})
223 |     synthesizer_model_id_used_api = synthesizer_metrics.get("api_model_id_used")
224 |     
225 |     # Attempt to get configured synthesizer model from input if API one is not available (should be rare)
226 |     # This requires passing synthesizer_config to this function or storing it in results_data
227 |     # For now, we rely on api_model_id_used from metrics.
228 |     # Example: configured_synth_model = results_data.get("synthesizer_model_config", {}).get("model_id", "N/A")
229 |     # synthesizer_model_display = synthesizer_model_id_used_api or configured_synth_model
230 | 
231 |     if synthesizer_model_id_used_api:
232 |         console_instance.print(f"Synthesizer Model Used (from API): [magenta]{synthesizer_model_id_used_api}[/magenta]")
233 |     else:
234 |         # If not in metrics, try to infer from input or display N/A (needs input passed)
235 |         console_instance.print("Synthesizer Model: [magenta]N/A (configured model not directly in output, check logs or input config)[/magenta]")
236 | 
237 | 
238 |     thinking_process = results_data.get("synthesizer_thinking_process")
239 |     if thinking_process:
240 |         console_instance.print(
241 |             Panel(
242 |                 escape(thinking_process),
243 |                 title="[bold]Synthesizer Thinking Process[/bold]",
244 |                 border_style="yellow",
245 |                 expand=False,
246 |             )
247 |         )
248 | 
249 |     final_text = results_data.get("synthesized_response_text")
250 |     final_code = results_data.get("synthesized_extracted_code")
251 |     # Determine if the original task was for code
252 |     tournament_type = results_data.get("tournament_type", "text") # Assuming this field might be added to output for context
253 | 
254 |     if tournament_type == "code" and final_code:
255 |         console_instance.print(
256 |             Panel(
257 |                 Syntax(final_code, "python", theme="monokai", line_numbers=True, word_wrap=True),
258 |                 title="[bold]Final Synthesized Code[/bold]",
259 |                 border_style="green",
260 |             )
261 |         )
262 |     elif final_text: # Also show text if it's a code tournament but no code was extracted, or if it's text type
263 |         console_instance.print(
264 |             Panel(
265 |                 escape(final_text),
266 |                 title="[bold]Final Synthesized Text[/bold]",
267 |                 border_style="green",
268 |             )
269 |         )
270 |     else:
271 |         console_instance.print(
272 |             "[italic]No synthesized response generated (or it was empty).[/italic]"
273 |         )
274 | 
275 |     if synthesizer_metrics:
276 |         console_instance.print(Rule("[bold]Synthesizer Metrics[/bold]"))
277 |         synth_metrics_table = Table(box=box.SIMPLE, show_header=False, title_justify="left")
278 |         synth_metrics_table.add_column("Metric", style="cyan")
279 |         synth_metrics_table.add_column("Value", style="white")
280 |         synth_metrics_table.add_row("Cost", f"${synthesizer_metrics.get('cost', 0.0):.6f}")
281 |         synth_metrics_table.add_row("Input Tokens", str(synthesizer_metrics.get("input_tokens", "N/A")))
282 |         synth_metrics_table.add_row("Output Tokens", str(synthesizer_metrics.get("output_tokens", "N/A")))
283 |         synth_metrics_table.add_row(
284 |             "API Latency", f"{synthesizer_metrics.get('api_latency_ms', 'N/A')} ms"
285 |         )
286 |         synth_metrics_table.add_row(
287 |             "API Model Used", str(synthesizer_metrics.get("api_model_id_used", "N/A"))
288 |         )
289 |         console_instance.print(synth_metrics_table)
290 | 
291 |     console_instance.print(Rule("[bold]Overall Metrics for Entire Operation[/bold]"))
292 |     total_metrics = results_data.get("total_metrics", {})
293 |     # Reuse Rich Table for overall metrics
294 |     overall_metrics_table = Table(box=box.SIMPLE, show_header=False, title_justify="left")
295 |     overall_metrics_table.add_column("Metric", style="cyan")
296 |     overall_metrics_table.add_column("Value", style="white")
297 |     overall_metrics_table.add_row("Total Cost", f"${total_metrics.get('total_cost', 0.0):.6f}")
298 |     overall_metrics_table.add_row(
299 |         "Total Input Tokens (All Calls)", str(total_metrics.get("total_input_tokens", "N/A"))
300 |     )
301 |     overall_metrics_table.add_row(
302 |         "Total Output Tokens (All Calls)", str(total_metrics.get("total_output_tokens", "N/A"))
303 |     )
304 |     overall_metrics_table.add_row(
305 |         "Overall Task Time", f"{total_metrics.get('overall_task_time_ms', 'N/A')} ms"
306 |     )
307 |     console_instance.print(overall_metrics_table)
308 |     console_instance.print()
309 | 
310 |     # Display the full prompt sent to the synthesizer model
311 |     if storage_path and results_data.get("status") in ["SUCCESS", "PARTIAL_SUCCESS"]:
312 |         synthesis_prompt_file = Path(storage_path) / "synthesis_prompt.md"
313 |         if synthesis_prompt_file.exists():
314 |             try:
315 |                 synthesis_prompt_content = synthesis_prompt_file.read_text(encoding='utf-8')
316 |                 console_instance.print(Rule("[bold yellow]Full Prompt to Synthesizer Model[/bold yellow]"))
317 |                 console_instance.print(
318 |                     Panel(
319 |                         Syntax(synthesis_prompt_content, "markdown", theme="monokai", line_numbers=True, word_wrap=True),
320 |                         title="[bold]Synthesizer Input Prompt[/bold]",
321 |                         border_style="yellow",
322 |                         expand=False # Keep it collapsed by default as it can be long
323 |                     )
324 |                 )
325 |             except Exception as e:
326 |                 logger.warning(f"Could not read or display synthesis_prompt.md: {e}", exc_info=True)
327 |         else:
328 |             logger.info("synthesis_prompt.md not found, skipping display.")
329 | 
330 | 
331 | async def run_single_shot_demo(tracker: CostTracker, args: argparse.Namespace):
332 |     console.print(Rule(f"[bold blue]Single-Shot Synthesis Demo - Task: {args.name}[/bold blue]"))
333 |     console.print(
334 |         f"Prompt: [yellow]{escape(args.prompt[:100] + ('...' if len(args.prompt) > 100 else ''))}[/yellow]"
335 |     )
336 |     console.print(f"Task Type: [magenta]{args.type}[/magenta]")
337 | 
338 |     expert_configs_for_tool: List[Dict[str, Any]] = []
339 |     for model_id_str in args.expert_models:
340 |         default_mc = next(
341 |             (mc for mc in DEFAULT_EXPERT_MODEL_CONFIGS_SSS if mc["model_id"] == model_id_str), None
342 |         )
343 |         if default_mc:
344 |             expert_configs_for_tool.append(default_mc.copy())  # Use copy
345 |         else:
346 |             expert_configs_for_tool.append({"model_id": model_id_str})
347 | 
348 |     synthesizer_config_for_tool: Dict[str, Any] = {"model_id": args.synthesizer_model}
349 |     if args.synthesizer_model == DEFAULT_SYNTHESIZER_MODEL_CONFIG_SSS["model_id"]:
350 |         synthesizer_config_for_tool = DEFAULT_SYNTHESIZER_MODEL_CONFIG_SSS.copy()
351 |     elif args.synthesizer_model == FALLBACK_SYNTHESIZER_MODEL_CONFIG_SSS["model_id"]:
352 |         synthesizer_config_for_tool = FALLBACK_SYNTHESIZER_MODEL_CONFIG_SSS.copy()
353 | 
354 |     console.print(
355 |         f"Expert Models: [cyan]{', '.join([mc['model_id'] for mc in expert_configs_for_tool])}[/cyan]"
356 |     )
357 |     console.print(f"Synthesizer Model: [cyan]{synthesizer_config_for_tool['model_id']}[/cyan]")
358 | 
359 |     # Tool expects "expert_models" and "synthesizer_model" as per Pydantic aliases
360 |     synthesis_input_for_tool = {
361 |         "name": args.name,
362 |         "prompt": args.prompt,
363 |         "expert_models": expert_configs_for_tool,
364 |         "synthesizer_model": synthesizer_config_for_tool,
365 |         "tournament_type": args.type,
366 |         # "synthesis_instructions": "Please synthesize these for clarity and impact..."
367 |     }
368 | 
369 |     try:
370 |         logger.info(f"Calling single_shot_synthesis tool for task: {args.name}", emoji_key="gear")
371 |         
372 |         console.print(
373 |             Panel(
374 |                 f"Initiating Single-Shot Synthesis task: '[bold]{escape(args.name)}[/bold]'.\\n"
375 |                 f"This involves parallel calls to [cyan]{len(expert_configs_for_tool)}[/cyan] expert model(s) "
376 |                 f"followed by the synthesizer model ([cyan]{synthesizer_config_for_tool['model_id']}[/cyan]).\\n"
377 |                 f"Prompt: '{escape(args.prompt[:150] + ('...' if len(args.prompt)>150 else ''))}'\\n"
378 |                 "[italic]Please wait, this may take a few moments...[/italic]",
379 |                 title="[bold blue]🚀 Starting Synthesis Process[/bold blue]",
380 |                 border_style="blue",
381 |                 expand=False
382 |             )
383 |         )
384 |         
385 |         synthesis_data_dict: Optional[Dict[str, Any]] = None
386 |         with console.status("[bold yellow]Processing synthesis request via single_shot_synthesis tool...", spinner="dots"):
387 |             # The tool 'single_shot_synthesis' is already registered with the gateway
388 |             synthesis_result_raw = await gateway.mcp.call_tool("single_shot_synthesis", synthesis_input_for_tool)
389 | 
390 |         # Process the result (moved out of the status context)
391 |         if isinstance(synthesis_result_raw, dict):
392 |             logger.info("Tool call returned a dictionary directly. Using it as result.", emoji_key="package")
393 |             synthesis_data_dict = synthesis_result_raw
394 |         elif isinstance(synthesis_result_raw, list):
395 |             logger.info("Tool call returned a list. Processing its first element.", emoji_key="package")
396 |             if synthesis_result_raw:
397 |                 first_element = synthesis_result_raw[0]
398 |                 if isinstance(first_element, dict):
399 |                     synthesis_data_dict = first_element
400 |                 elif hasattr(first_element, 'text') and isinstance(first_element.text, str):
401 |                     logger.info("First element has a .text attribute (like TextContent). Attempting to parse its .text attribute as JSON.", emoji_key="memo")
402 |                     try:
403 |                         synthesis_data_dict = json.loads(first_element.text)
404 |                     except json.JSONDecodeError as e:
405 |                         logger.warning(f"JSON parsing of .text attribute failed: {e}. Falling back to process_mcp_result with the .text content.", emoji_key="warning")
406 |                         synthesis_data_dict = await process_mcp_result(first_element.text) # Pass the string for LLM repair
407 |                 else:
408 |                     logger.warning(f"Tool call returned a list, but its first element is not a dictionary or TextContent-like. Content: {synthesis_result_raw!r:.500}", emoji_key="warning")
409 |                     synthesis_data_dict = await process_mcp_result(synthesis_result_raw) # Fallback with the whole list
410 |             else:
411 |                 logger.warning("Tool call returned an empty list. Falling back to process_mcp_result.", emoji_key="warning")
412 |                 synthesis_data_dict = await process_mcp_result(synthesis_result_raw) # Fallback
413 |         elif isinstance(synthesis_result_raw, str): # If it's a string, try to parse
414 |             logger.info("Tool call returned a string. Attempting to parse with process_mcp_result.", emoji_key="memo")
415 |             synthesis_data_dict = await process_mcp_result(synthesis_result_raw)
416 |         else: # If it's some other type, log and try process_mcp_result
417 |             logger.warning(f"Tool call returned an unexpected type: {type(synthesis_result_raw)}. Attempting to process with process_mcp_result.", emoji_key="warning")
418 |             synthesis_data_dict = await process_mcp_result(synthesis_result_raw)
419 | 
420 | 
421 |         # Check for errors from the tool call itself or if the synthesis_data_dict is problematic
422 |         if not synthesis_data_dict or not isinstance(synthesis_data_dict, dict) or \
423 |            synthesis_data_dict.get("success", True) is False or \
424 |            (synthesis_data_dict.get("status") == "FAILED" and synthesis_data_dict.get("error_message")):
425 |             
426 |             error_msg = "Unknown error or empty/invalid data from synthesis tool call."
427 |             if synthesis_data_dict and isinstance(synthesis_data_dict, dict):
428 |                  error_msg = synthesis_data_dict.get("error_message", synthesis_data_dict.get("error", error_msg))
429 | 
430 |             logger.error(
431 |                 f"Single-shot synthesis tool call failed or returned invalid data: {error_msg}", emoji_key="cross_mark"
432 |             )
433 |             console.print(
434 |                 f"[bold red]Error from synthesis tool call:[/bold red] {escape(error_msg)}"
435 |             )
436 |             # Still attempt to display partial data if the structure is somewhat intact
437 |             if synthesis_data_dict and isinstance(synthesis_data_dict, dict):
438 |                 display_single_shot_synthesis_results(synthesis_data_dict, args.prompt, console)
439 |             else:
440 |                 console.print(Panel("[bold red]Received no usable data from the synthesis tool.[/bold red]", border_style="red"))
441 |             return 1
442 |         
443 |         console.print(Rule("[bold green]✔️ Synthesis Process Completed[/bold green]"))
444 |         # Pass the original prompt (args.prompt) to the display function
445 |         display_single_shot_synthesis_results(synthesis_data_dict, args.prompt, console)
446 | 
447 |         # Cost tracking
448 |         total_metrics = synthesis_data_dict.get("total_metrics", {})
449 |         cost = total_metrics.get("total_cost", 0.0)
450 |         input_tokens = total_metrics.get("total_input_tokens", 0)
451 |         output_tokens = total_metrics.get("total_output_tokens", 0)
452 | 
453 |         # For tracker, provider/model is ambiguous for the whole operation, use task name
454 |         tracker.record_call(
455 |             cost=cost,
456 |             provider="synthesis_tool_operation",
457 |             model=args.name,
458 |             input_tokens=input_tokens,
459 |             output_tokens=output_tokens
460 |         )
461 | 
462 |     except (ToolError, ProviderError, Exception) as e:
463 |         logger.error(
464 |             f"An error occurred during the single-shot synthesis demo: {e}",
465 |             exc_info=True,
466 |             emoji_key="error",
467 |         )
468 |         console.print(f"[bold red]Demo Error:[/bold red] {escape(str(e))}")
469 |         return 1
470 |     finally:
471 |         tracker.display_summary(console)
472 |         logger.info("Single-shot synthesis demo finished.", emoji_key="party_popper")
473 |     return 0
474 | 
475 | 
476 | async def main_async_sss():
477 |     args = parse_arguments_sss()
478 |     tracker = CostTracker()
479 |     exit_code = 1
480 |     try:
481 |         await setup_gateway_for_demo_sss()
482 |         exit_code = await run_single_shot_demo(tracker, args)
483 |     except Exception as e:
484 |         console.print(
485 |             f"[bold red]Critical error in demo setup or execution:[/bold red] {escape(str(e))}"
486 |         )
487 |         logger.critical(f"Demo main_async_sss failed: {e}", exc_info=True)
488 |     finally:
489 |         logger.info("Demo finished.")
490 |     return exit_code
491 | 
492 | 
493 | if __name__ == "__main__":
494 |     try:
495 |         final_exit_code = asyncio.run(main_async_sss())
496 |     except KeyboardInterrupt:
497 |         console.print("\n[bold yellow]Demo interrupted by user.[/bold yellow]")
498 |         final_exit_code = 130
499 |     sys.exit(final_exit_code)
500 | 
```

--------------------------------------------------------------------------------
/examples/python_sandbox_demo.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python
  2 | """Comprehensive demonstration script for PythonSandbox tools in Ultimate MCP Server."""
  3 | 
  4 | # --- Standard Library Imports ---
  5 | import argparse
  6 | import asyncio
  7 | import sys
  8 | import uuid
  9 | from pathlib import Path
 10 | 
 11 | # --- Configuration & Path Setup ---
 12 | # Add project root to path for imports when running as script
 13 | # Adjust this path if your script location relative to the project root differs
 14 | try:
 15 |     PROJECT_ROOT = Path(__file__).resolve().parent.parent
 16 |     if not (PROJECT_ROOT / "ultimate_mcp_server").is_dir():  # Check for the actual package dir
 17 |         # Fallback if running from a different structure (e.g., examples dir directly)
 18 |         PROJECT_ROOT = (
 19 |             Path(__file__).resolve().parent.parent.parent
 20 |         )  # Go up two levels if in examples
 21 |         if not (PROJECT_ROOT / "ultimate_mcp_server").is_dir():
 22 |             print(
 23 |                 "Error: Could not reliably determine project root. Make sure 'ultimate_mcp_server' is importable.",
 24 |                 file=sys.stderr,
 25 |             )
 26 |             sys.exit(1)
 27 |     sys.path.insert(0, str(PROJECT_ROOT))
 28 |     print(f"DEBUG: Added '{PROJECT_ROOT}' to sys.path")
 29 | except Exception as e:
 30 |     print(f"Error during initial path setup: {e}", file=sys.stderr)
 31 |     sys.exit(1)
 32 | 
 33 | # --- IMPORTANT: Playwright Check FIRST ---
 34 | # The sandbox relies heavily on Playwright. Check availability early.
 35 | try:
 36 |     import playwright.async_api as pw  # noqa F401 - Check import
 37 | 
 38 |     PLAYWRIGHT_AVAILABLE_DEMO = True
 39 | except ImportError:
 40 |     PLAYWRIGHT_AVAILABLE_DEMO = False
 41 |     print(
 42 |         "[ERROR] Playwright library not found. Please install it (`pip install playwright && playwright install chromium`).",
 43 |         file=sys.stderr,
 44 |     )
 45 |     # Exit immediately if Playwright is crucial for the demo's purpose
 46 |     sys.exit(1)
 47 | 
 48 | # --- Defer ultimate_mcp_server imports AFTER path setup ---
 49 | # Import Rich components
 50 | from rich.markup import escape
 51 | from rich.panel import Panel
 52 | from rich.rule import Rule
 53 | from rich.traceback import install as install_rich_traceback
 54 | 
 55 | # Import necessary tool functions and exceptions
 56 | from ultimate_mcp_server.exceptions import ProviderError, ToolError, ToolInputError
 57 | from ultimate_mcp_server.tools.python_sandbox import (
 58 |     _close_all_sandboxes,  # Import cleanup function
 59 |     display_sandbox_result,
 60 |     execute_python,
 61 |     repl_python,
 62 | )
 63 | from ultimate_mcp_server.utils import get_logger
 64 | 
 65 | # Use the generic display helper and make a sandbox-specific one
 66 | from ultimate_mcp_server.utils.display import safe_tool_call
 67 | from ultimate_mcp_server.utils.logging.console import console
 68 | 
 69 | # --- Logger and Constants ---
 70 | logger = get_logger("example.python_sandbox")
 71 | # Use a unique session ID for REPL tests
 72 | REPL_SESSION_HANDLE = f"demo-repl-{uuid.uuid4().hex[:8]}"
 73 | 
 74 | # Install rich tracebacks for better error display
 75 | install_rich_traceback(show_locals=False, width=console.width)
 76 | 
 77 | # --- Enhanced Display Helper (from older script) ---
 78 | 
 79 | 
 80 | 
 81 | # --- Argument Parsing ---
 82 | def parse_arguments():
 83 |     """Parse command line arguments for the demo."""
 84 |     parser = argparse.ArgumentParser(
 85 |         description="Python Sandbox Demo for Ultimate MCP Server Tools",
 86 |         formatter_class=argparse.RawDescriptionHelpFormatter,
 87 |         epilog="""Available demos:
 88 |   all           - Run all demos (default)
 89 |   basic         - Basic execution, stdout/stderr, result capture
 90 |   errors        - Syntax and Runtime error handling
 91 |   timeout       - Execution timeout handling
 92 |   packages      - Package loading (numpy, pandas)
 93 |   wheels        - Wheel loading via micropip (requires --allow-network)
 94 |   repl          - Persistent REPL state and reset functionality
 95 |   security      - Network and filesystem access controls
 96 |   visualization - Data visualization using matplotlib (requires package)
 97 | """,
 98 |     )
 99 | 
100 |     parser.add_argument(
101 |         "demo",
102 |         nargs="?",
103 |         default="all",
104 |         choices=[
105 |             "all",
106 |             "basic",
107 |             "errors",
108 |             "timeout",
109 |             "packages",
110 |             "wheels",
111 |             "repl",
112 |             "security",
113 |             "visualization",
114 |         ],
115 |         help="Specific demo to run (default: all)",
116 |     )
117 | 
118 |     parser.add_argument(
119 |         "--allow-network",
120 |         action="store_true",
121 |         help="Enable network access within the sandbox for demos requiring it (e.g., wheel loading)",
122 |     )
123 | 
124 |     parser.add_argument(
125 |         "--allow-fs",
126 |         action="store_true",
127 |         help="Enable filesystem access bridge (mcpfs) within the sandbox. Requires filesystem tool to be configured.",
128 |     )
129 | 
130 |     parser.add_argument(
131 |         "-v",
132 |         "--verbose",
133 |         action="store_true",
134 |         help="Increase output verbosity (Note: internal tool logging is often DEBUG level)",
135 |     )
136 | 
137 |     return parser.parse_args()
138 | 
139 | 
140 | # --- Demo Functions ---
141 | 
142 | 
143 | async def demonstrate_basic_execution(args):
144 |     """Demonstrate basic code execution, I/O capture, results."""
145 |     console.print(Rule("[bold cyan]1. Basic Execution & I/O[/bold cyan]", style="cyan"))
146 |     logger.info("Demonstrating basic execution...", emoji_key="code")
147 | 
148 |     # --- Simple Execution with Result ---
149 |     code_simple = """
150 | result = 40 + 2
151 | print("Calculation done.")
152 | """
153 |     result = await safe_tool_call(
154 |         execute_python,
155 |         {"code": code_simple},
156 |         description="Executing simple addition (result = 40 + 2)",
157 |     )
158 |     display_sandbox_result("Basic Addition", result, code_simple)
159 | 
160 |     # --- Stdout/Stderr Capture ---
161 |     code_io = """
162 | import sys
163 | print("Hello to stdout!")
164 | print("This is line 2 on stdout.")
165 | print("Error message to stderr!", file=sys.stderr)
166 | print("Another error line!", file=sys.stderr)
167 | result = "IO test complete"
168 | """
169 |     result = await safe_tool_call(
170 |         execute_python, {"code": code_io}, description="Capturing stdout and stderr"
171 |     )
172 |     display_sandbox_result("Stdout/Stderr Capture", result, code_io)
173 | 
174 |     # --- No 'result' variable assigned ---
175 |     code_no_result = """
176 | x = 10
177 | y = 20
178 | print(f"x + y = {x+y}")
179 | # 'result' variable is not assigned
180 | """
181 |     result = await safe_tool_call(
182 |         execute_python,
183 |         {"code": code_no_result},
184 |         description="Executing code without assigning to 'result'",
185 |     )
186 |     display_sandbox_result("No 'result' Variable Assigned", result, code_no_result)
187 | 
188 | 
189 | async def demonstrate_error_handling(args):
190 |     """Demonstrate handling of syntax and runtime errors."""
191 |     console.print(Rule("[bold cyan]2. Error Handling Demo[/bold cyan]", style="cyan"))
192 |     logger.info("Starting error handling demo")
193 | 
194 |     # --- Syntax Error ---
195 |     code_syntax_error = "result = 1 +"  # Missing operand
196 |     result = await safe_tool_call(
197 |         execute_python,
198 |         {"code": code_syntax_error},
199 |         description="Executing code with SyntaxError (should fail)",
200 |     )
201 |     display_sandbox_result("Syntax Error Handling", result, code_syntax_error)
202 | 
203 |     # --- Runtime Error ---
204 |     code_runtime_error = """
205 | def divide(a, b):
206 |     return a / b
207 | result = divide(10, 0) # ZeroDivisionError
208 | """
209 |     result = await safe_tool_call(
210 |         execute_python,
211 |         {"code": code_runtime_error},
212 |         description="Executing code with ZeroDivisionError (should fail)",
213 |     )
214 |     display_sandbox_result("Runtime Error Handling (ZeroDivisionError)", result, code_runtime_error)
215 | 
216 |     # --- Name Error ---
217 |     code_name_error = "result = undefined_variable + 5"
218 |     result = await safe_tool_call(
219 |         execute_python,
220 |         {"code": code_name_error},
221 |         description="Executing code with NameError (should fail)",
222 |     )
223 |     display_sandbox_result("Runtime Error Handling (NameError)", result, code_name_error)
224 | 
225 | 
226 | async def demonstrate_timeout(args):
227 |     """Demonstrate timeout handling."""
228 |     console.print(Rule("[bold cyan]3. Timeout Handling Demo[/bold cyan]", style="cyan"))
229 |     logger.info("Starting timeout handling demo")
230 | 
231 |     code_timeout = """
232 | import time
233 | print("Starting computation that will time out...")
234 | time.sleep(5) # Sleep for 5 seconds
235 | print("This line should not be reached due to timeout")
236 | result = "Completed successfully despite timeout request?" # Should not happen
237 | """
238 |     # Use a short timeout (3 seconds) to trigger the error
239 |     result = await safe_tool_call(
240 |         execute_python,
241 |         {"code": code_timeout, "timeout_ms": 3000},
242 |         description="Executing code that exceeds timeout (3s)",
243 |     )
244 |     display_sandbox_result("Timeout Handling (3s Timeout)", result, code_timeout)
245 | 
246 | 
247 | async def demonstrate_packages(args):
248 |     """Demonstrate loading Python packages."""
249 |     console.print(
250 |         Rule("[bold cyan]4. Package Loading Demo (NumPy & Pandas)[/bold cyan]", style="cyan")
251 |     )
252 |     logger.info("Starting package loading demo")
253 | 
254 |     # NumPy example
255 |     numpy_code = """
256 | import numpy as np
257 | a = np.array([[1, 2], [3, 4]])
258 | result = {
259 |     'shape': a.shape,
260 |     'mean': np.mean(a).item(), # Use .item() for scalar
261 |     'determinant': np.linalg.det(a).item() if a.shape == (2, 2) else 'N/A'
262 | }
263 | print(f"Array:\\n{a}")
264 | """
265 |     result = await safe_tool_call(
266 |         execute_python,
267 |         {"code": numpy_code, "packages": ["numpy"], "timeout_ms": 15000},
268 |         description="Using numpy package",
269 |     )
270 |     display_sandbox_result("NumPy Package Demo", result, numpy_code)
271 | 
272 |     # Pandas example (depends on numpy)
273 |     pandas_code = """
274 | import pandas as pd
275 | data = {'col1': [1, 2, 3], 'col2': [4, 5, 6]}
276 | df = pd.DataFrame(data)
277 | print("DataFrame Head:")
278 | print(df.head())
279 | result = df.describe().to_dict() # Return summary stats as dict
280 | """
281 |     result = await safe_tool_call(
282 |         execute_python,
283 |         {"code": pandas_code, "packages": ["pandas"], "timeout_ms": 20000},
284 |         description="Using pandas package",
285 |     )
286 |     display_sandbox_result("Pandas Package Demo", result, pandas_code)
287 | 
288 | 
289 | async def demonstrate_wheels(args):
290 |     """Demonstrate loading wheels (requires network)."""
291 |     console.print(Rule("[bold cyan]5. Wheel Loading Demo (httpx)[/bold cyan]", style="cyan"))
292 |     logger.info("Starting wheel loading demo")
293 | 
294 |     if not args.allow_network:
295 |         console.print(
296 |             Panel(
297 |                 "Skipping wheel loading demo.\n"
298 |                 "Network access is required to install wheels from URLs or PyPI.\n"
299 |                 "Rerun with the [yellow]--allow-network[/yellow] flag to include this test.",
300 |                 title="Network Access Disabled",
301 |                 border_style="yellow",
302 |                 expand=False,
303 |             )
304 |         )
305 |         return
306 | 
307 |     code_wheel = """
308 | try:
309 |     import httpx
310 |     print(f"httpx version: {httpx.__version__}")
311 |     # Make a simple request to test network access
312 |     response = httpx.get('https://httpbin.org/get?demo=wheel', timeout=10)
313 |     response.raise_for_status()
314 |     data = response.json()
315 |     result = f"Successfully fetched URL via httpx. Origin IP: {data.get('origin', 'Unknown')}"
316 | except Exception as e:
317 |     # Raising an exception shows up nicely in stderr display
318 |     raise RuntimeError(f"Error using httpx: {e}") from e
319 | """
320 |     # Specify package 'httpx'. Micropip should handle fetching it if not preloaded.
321 |     result = await safe_tool_call(
322 |         execute_python,
323 |         {"code": code_wheel, "packages": ["httpx"], "allow_network": True, "timeout_ms": 25000},
324 |         description="Loading 'httpx' package/wheel (requires network)",
325 |     )
326 |     display_sandbox_result("Wheel Loading Demo (httpx)", result, code_wheel)
327 | 
328 | 
329 | async def demonstrate_repl(args):
330 |     """Demonstrate persistent REPL sessions and reset."""
331 |     console.print(Rule("[bold cyan]6. Persistent REPL Sessions[/bold cyan]", style="cyan"))
332 |     logger.info("Demonstrating REPL functionality...", emoji_key="repl")
333 | 
334 |     repl_handle = REPL_SESSION_HANDLE  # Use a consistent handle for the demo
335 | 
336 |     # --- Call 1: Define Variable & Function ---
337 |     code1 = """
338 | x = 100
339 | def double(val):
340 |     return val * 2
341 | print(f"Defined x = {x} and function double()")
342 | result = "Setup complete"
343 | """
344 |     result1 = await safe_tool_call(
345 |         repl_python,
346 |         {"code": code1, "handle": repl_handle},
347 |         description=f"REPL Call 1 (Handle: {repl_handle[-8:]}): Define x and double()",
348 |     )
349 |     display_sandbox_result(f"REPL Step 1 (Handle: ...{repl_handle[-8:]})", result1, code1)
350 |     if (
351 |         not result1
352 |         or not result1.get("success")
353 |         or result1.get("result", {}).get("handle") != repl_handle
354 |     ):
355 |         console.print(
356 |             "[bold red]Error:[/bold red] Failed to get handle from first REPL call. Aborting REPL demo."
357 |         )
358 |         return
359 | 
360 |     # --- Call 2: Use Variable & Function ---
361 |     code2 = "result = double(x) # Uses x and double() from previous call"
362 |     result2 = await safe_tool_call(
363 |         repl_python,
364 |         {"code": code2, "handle": repl_handle},
365 |         description=f"REPL Call 2 (Handle: {repl_handle[-8:]}): Call double(x)",
366 |     )
367 |     display_sandbox_result(f"REPL Step 2 (Handle: ...{repl_handle[-8:]})", result2, code2)
368 | 
369 |     # --- Call 3: Import and Use ---
370 |     code3 = """
371 | import math
372 | result = math.sqrt(x) # Use x again
373 | print(f"Square root of x ({x}) is {result}")
374 | """
375 |     result3 = await safe_tool_call(
376 |         repl_python,
377 |         {
378 |             "code": code3,
379 |             "handle": repl_handle,
380 |             "packages": [],
381 |         }, 
382 |         description=f"REPL Call 3 (Handle: {repl_handle[-8:]}): Import math and use x",
383 |     )
384 |     display_sandbox_result(f"REPL Step 3 (Handle: ...{repl_handle[-8:]})", result3, code3)
385 | 
386 |     # --- Call 4: Reset Session ---
387 |     # Code is empty, only resetting
388 |     result4 = await safe_tool_call(
389 |         repl_python,
390 |         {"code": "", "handle": repl_handle, "reset": True},
391 |         description=f"REPL Call 4 (Handle: {repl_handle[-8:]}): Resetting the session",
392 |     )
393 |     display_sandbox_result(
394 |         f"REPL Step 4 - Reset (Handle: ...{repl_handle[-8:]})",
395 |         result4,
396 |         "# Resetting the REPL state",
397 |     )
398 | 
399 |     # --- Call 5: Try Using Variable After Reset (should fail) ---
400 |     code5 = """
401 | try:
402 |     result = double(x) # Should fail as x and double are gone
403 | except NameError as e:
404 |     print(f"Caught expected error: {e}")
405 |     result = f"Caught expected NameError: {e}"
406 | """
407 |     result5 = await safe_tool_call(
408 |         repl_python,
409 |         {"code": code5, "handle": repl_handle},
410 |         description=f"REPL Call 5 (Handle: {repl_handle[-8:]}): Using state after reset (should fail/catch NameError)",
411 |     )
412 |     display_sandbox_result(
413 |         f"REPL Step 5 - Post-Reset (Handle: ...{repl_handle[-8:]})", result5, code5
414 |     )
415 | 
416 | 
417 | async def demonstrate_security(args):
418 |     """Demonstrate network and filesystem access controls."""
419 |     console.print(Rule("[bold cyan]7. Security Controls[/bold cyan]", style="cyan"))
420 |     logger.info("Demonstrating security controls...", emoji_key="security")
421 | 
422 |     # --- Network Access Control ---
423 |     console.print(Rule("Network Access", style="dim"))
424 |     code_network = """
425 | import httpx
426 | try:
427 |     # Use httpx which was potentially loaded in wheel demo
428 |     print("Attempting network request to httpbin...")
429 |     response = httpx.get('https://httpbin.org/get?demo=network_security', timeout=5)
430 |     response.raise_for_status()
431 |     result = f"Network access successful. Status: {response.status_code}"
432 | except Exception as e:
433 |     # Use print instead of raise to see the output in the demo result
434 |     print(f"Network request failed: {type(e).__name__}: {e}")
435 |     result = f"Network request failed as expected (or httpx not loaded)."
436 | """
437 |     # Attempt without network access (should fail within sandbox)
438 |     console.print(
439 |         Panel(
440 |             "Attempting network access with [red]allow_network=False[/red] (expected failure or httpx import error)",
441 |             title="Network Test 1",
442 |         )
443 |     )
444 |     result_net_denied = await safe_tool_call(
445 |         execute_python,
446 |         {"code": code_network, "packages": ["httpx"], "allow_network": False},
447 |         description="Network access with allow_network=False",
448 |     )
449 |     display_sandbox_result("Network Access Denied", result_net_denied, code_network)
450 | 
451 |     # Attempt with network access (should succeed IF network flag is passed)
452 |     console.print(
453 |         Panel(
454 |             "Attempting network access with [green]allow_network=True[/green]",
455 |             title="Network Test 2",
456 |         )
457 |     )
458 |     if args.allow_network:
459 |         result_net_allowed = await safe_tool_call(
460 |             execute_python,
461 |             {"code": code_network, "packages": ["httpx"], "allow_network": True},
462 |             description="Network access with allow_network=True",
463 |         )
464 |         display_sandbox_result("Network Access Allowed", result_net_allowed, code_network)
465 |     else:
466 |         console.print(
467 |             "[yellow]Skipped:[/yellow] Rerun demo with --allow-network flag to test allowed network access."
468 |         )
469 | 
470 |     # --- Filesystem Access Control ---
471 |     console.print(Rule("Filesystem Access (via mcpfs bridge)", style="dim"))
472 |     code_fs_list = """
473 | try:
474 |     import mcpfs
475 |     print("Attempting to list directory '.' via mcpfs...")
476 |     # Note: Path inside sandbox needs to map to an allowed host path
477 |     target_path = '.' # Represents the sandbox's current dir
478 |     listing = mcpfs.listdir(target_path)
479 |     result = f"Successfully listed '{target_path}': {len(listing)} entries found via mcpfs."
480 |     print(f"Listing result: {listing}")
481 | except ModuleNotFoundError:
482 |     print("mcpfs module not available (allow_fs=False?)")
483 |     result = "mcpfs module not found (expected failure)"
484 | except Exception as e:
485 |     print(f"Filesystem access failed: {type(e).__name__}: {e}")
486 |     result = f"Filesystem access failed: {e}"
487 | """
488 |     # Attempt without FS access (should fail - ModuleNotFoundError)
489 |     console.print(
490 |         Panel(
491 |             "Attempting filesystem access with [red]allow_fs=False[/red] (expected ModuleNotFoundError)",
492 |             title="Filesystem Test 1",
493 |         )
494 |     )
495 |     result_fs_denied = await safe_tool_call(
496 |         execute_python,
497 |         {"code": code_fs_list, "allow_fs": False},
498 |         description="Filesystem access with allow_fs=False",
499 |     )
500 |     display_sandbox_result("Filesystem Access Denied (mcpfs)", result_fs_denied, code_fs_list)
501 | 
502 |     # Attempt with FS access (should succeed IF FS flag is passed AND FS tool configured on host)
503 |     console.print(
504 |         Panel(
505 |             "Attempting filesystem access with [green]allow_fs=True[/green]",
506 |             title="Filesystem Test 2",
507 |         )
508 |     )
509 |     if args.allow_fs:
510 |         console.print(
511 |             "[yellow]Note:[/yellow] Success requires the host Filesystem tool to be configured with allowed directories."
512 |         )
513 |         result_fs_allowed = await safe_tool_call(
514 |             execute_python,
515 |             {"code": code_fs_list, "allow_fs": True},
516 |             description="Filesystem access with allow_fs=True",
517 |         )
518 |         display_sandbox_result("Filesystem Access Allowed (mcpfs)", result_fs_allowed, code_fs_list)
519 |     else:
520 |         console.print(
521 |             "[yellow]Skipped:[/yellow] Rerun demo with --allow-fs flag to test allowed filesystem access bridge."
522 |         )
523 |         console.print(
524 |             "[dim](Also ensure the host Filesystem tool is configured with allowed directories.)[/dim]"
525 |         )
526 | 
527 | 
528 | async def demonstrate_visualization(args):
529 |     """Demonstrate data visualization capabilities."""
530 |     console.print(
531 |         Rule("[bold cyan]8. Data Visualization Demo (Matplotlib)[/bold cyan]", style="cyan")
532 |     )
533 |     logger.info("Starting data visualization demo")
534 | 
535 |     matplotlib_code = """
536 | # Ensure backend is non-interactive
537 | import matplotlib
538 | matplotlib.use('Agg')
539 | import matplotlib.pyplot as plt
540 | import numpy as np
541 | from io import BytesIO
542 | import base64
543 | 
544 | try:
545 |     print("Generating plot...")
546 |     # Generate data
547 |     x = np.linspace(-np.pi, np.pi, 200)
548 |     y_sin = np.sin(x)
549 |     y_cos = np.cos(x)
550 | 
551 |     # Create plot
552 |     fig, ax = plt.subplots(figsize=(8, 5)) # Use fig, ax pattern
553 |     ax.plot(x, y_sin, label='sin(x)')
554 |     ax.plot(x, y_cos, label='cos(x)', linestyle='--')
555 |     ax.set_title('Sine and Cosine Waves')
556 |     ax.set_xlabel('Radians')
557 |     ax.set_ylabel('Value')
558 |     ax.grid(True)
559 |     ax.legend()
560 |     plt.tight_layout() # Adjust layout
561 | 
562 |     # Save plot to base64
563 |     buffer = BytesIO()
564 |     fig.savefig(buffer, format='png', dpi=90) # Save the figure object
565 |     buffer.seek(0)
566 |     img_str = base64.b64encode(buffer.read()).decode('utf-8')
567 |     plt.close(fig) # Close the figure to free memory
568 | 
569 |     print(f"Generated plot as base64 string (Length: {len(img_str)} chars)")
570 |     result = f"data:image/png;base64,{img_str}"
571 | except Exception as e:
572 |     print(f"Error during plotting: {type(e).__name__}: {e}")
573 |     import traceback
574 |     traceback.print_exc() # Print traceback to stderr for diagnosis
575 |     result = f"Plot generation failed: {e}"
576 | 
577 | """
578 |     # Requires matplotlib and numpy packages
579 |     result = await safe_tool_call(
580 |         execute_python,
581 |         {"code": matplotlib_code, "packages": ["numpy", "matplotlib"], "timeout_ms": 25000},
582 |         description="Generating plot with Matplotlib",
583 |     )
584 | 
585 |     # Display result, summarizing the base64 string
586 |     result_display = result.copy()
587 |     if result_display.get("success") and "result" in result_display.get("result", {}):
588 |         res_value = result_display["result"]["result"]
589 |         if isinstance(res_value, str) and res_value.startswith("data:image/png;base64,"):
590 |             result_display["result"]["result"] = f"[Base64 image data - {len(res_value)} chars]"
591 | 
592 |     display_sandbox_result("Matplotlib Visualization", result_display, matplotlib_code)
593 |     console.print(
594 |         Panel(
595 |             "[yellow]Note:[/] The 'result' contains base64 image data. In a web UI, this could be displayed using an `<img>` tag.",
596 |             border_style="yellow",
597 |         )
598 |     )
599 | 
600 | 
601 | async def main():
602 |     """Run the Python Sandbox tools demonstration."""
603 |     args = parse_arguments()
604 |     exit_code = 0
605 | 
606 |     console.print(Rule("[bold magenta]Python Sandbox Tools Demo[/bold magenta]", style="white"))
607 | 
608 |     # Explicitly check for Playwright availability
609 |     if not PLAYWRIGHT_AVAILABLE_DEMO:
610 |         console.print(
611 |             "[bold red]Error:[/bold red] Playwright is required for the Python Sandbox tool but is not installed or importable."
612 |         )
613 |         console.print(
614 |             "Please install it via: [cyan]pip install playwright && playwright install chromium[/]"
615 |         )
616 |         return 1  # Exit if core dependency is missing
617 | 
618 |     logger.info("Starting Python Sandbox demonstration", emoji_key="start")
619 | 
620 |     try:
621 |         # --- Display Demo Options ---
622 |         if args.demo == "all":
623 |             console.print(
624 |                 Panel(
625 |                     "Running all demo sections.\n"
626 |                     "Use command-line arguments to run specific sections (e.g., `python examples/python_sandbox_demo.py repl`).\n"
627 |                     "Use `--allow-network` or `--allow-fs` to enable those features for relevant tests.",
628 |                     title="Demo Options",
629 |                     border_style="cyan",
630 |                     expand=False,
631 |                 )
632 |             )
633 | 
634 |         # --- Run Selected Demonstrations ---
635 |         run_all = args.demo == "all"
636 | 
637 |         if run_all or args.demo == "basic":
638 |             await demonstrate_basic_execution(args)
639 |             console.print()
640 | 
641 |         if run_all or args.demo == "errors":
642 |             await demonstrate_error_handling(args)
643 |             console.print()
644 | 
645 |         if run_all or args.demo == "timeout":
646 |             await demonstrate_timeout(args)
647 |             console.print()
648 | 
649 |         if run_all or args.demo == "packages":
650 |             await demonstrate_packages(args)
651 |             console.print()
652 | 
653 |         if run_all or args.demo == "wheels":
654 |             await demonstrate_wheels(args)
655 |             console.print()
656 | 
657 |         if run_all or args.demo == "repl":
658 |             await demonstrate_repl(args)
659 |             console.print()
660 | 
661 |         if run_all or args.demo == "security":
662 |             await demonstrate_security(args)
663 |             console.print()
664 | 
665 |         if run_all or args.demo == "visualization":
666 |             await demonstrate_visualization(args)
667 |             console.print()
668 | 
669 |         logger.success(f"Python Sandbox Demo(s) completed: {args.demo}", emoji_key="complete")
670 |         console.print(Rule("[bold green]Demo Complete[/bold green]", style="green"))
671 | 
672 |     except (ToolInputError, ToolError, ProviderError) as e:
673 |         logger.error(f"Tool Error during demo: {e}", emoji_key="error", exc_info=True)
674 |         console.print(f"\n[bold red]TOOL ERROR:[/bold red] {escape(str(e))}")
675 |         if hasattr(e, "details") and e.details:
676 |             console.print("[bold]Details:[/bold]")
677 |             console.print(escape(str(e.details)))
678 |         exit_code = 1
679 |     except Exception as e:
680 |         logger.critical(f"Demo crashed unexpectedly: {str(e)}", emoji_key="critical", exc_info=True)
681 |         console.print(f"\n[bold red]CRITICAL ERROR:[/bold red] {escape(str(e))}")
682 |         console.print_exception(show_locals=False)
683 |         exit_code = 1
684 |     finally:
685 |         # --- Cleanup ---
686 |         console.print(Rule("Cleanup", style="dim"))
687 |         try:
688 |             # Explicitly call the sandbox cleanup function
689 |             await _close_all_sandboxes()
690 |             logger.info("Sandbox cleanup completed.", emoji_key="cleanup")
691 |             console.print("Sandbox cleanup finished.")
692 |         except Exception as e:
693 |             logger.error(f"Error during sandbox cleanup: {e}", emoji_key="error")
694 |             console.print(f"[bold red]Error during sandbox cleanup:[/bold red] {escape(str(e))}")
695 | 
696 |     return exit_code
697 | 
698 | 
699 | if __name__ == "__main__":
700 |     # Run the demo
701 |     final_exit_code = asyncio.run(main())
702 |     sys.exit(final_exit_code)
703 | 
```

--------------------------------------------------------------------------------
/ultimate_mcp_server/services/analytics/metrics.py:
--------------------------------------------------------------------------------

```python
  1 | """Metrics collection and monitoring for Ultimate MCP Server."""
  2 | import json
  3 | import os
  4 | import time
  5 | from collections import defaultdict
  6 | from datetime import datetime, timedelta
  7 | from pathlib import Path
  8 | from typing import Any, Dict, Optional, Union
  9 | 
 10 | from ultimate_mcp_server.utils import get_logger
 11 | 
 12 | logger = get_logger(__name__)
 13 | 
 14 | try:
 15 |     import aiofiles
 16 |     AIOFILES_AVAILABLE = True
 17 | except ImportError:
 18 |     AIOFILES_AVAILABLE = False
 19 | 
 20 | try:
 21 |     from prometheus_client import Counter, Histogram
 22 |     PROMETHEUS_AVAILABLE = True
 23 | except ImportError:
 24 |     PROMETHEUS_AVAILABLE = False
 25 | 
 26 | 
 27 | class MetricsTracker:
 28 |     """Comprehensive metrics tracking and monitoring system for Ultimate MCP Server.
 29 |     
 30 |     The MetricsTracker is a singleton class that collects, processes, and persists
 31 |     operational metrics related to LLM usage, costs, performance, and errors. It provides
 32 |     the data foundation for analytics reporting and monitoring tools.
 33 |     
 34 |     Key features:
 35 |     - Singleton design pattern ensures consistent metrics across application
 36 |     - Persistent storage with automatic serialization to JSON
 37 |     - Tracks usage by provider, model, and time periods (hourly, daily)
 38 |     - Records request counts, token usage, costs, errors, and performance metrics
 39 |     - Cache efficiency monitoring (hits, misses, cost savings)
 40 |     - Optional Prometheus integration for external monitoring systems
 41 |     - Asynchronous persistence to minimize performance impact
 42 |     - Automatic data retention policies to prevent memory bloat
 43 |     
 44 |     The metrics are automatically persisted to disk and can be loaded on startup,
 45 |     providing continuity across server restarts. Time-series data is maintained
 46 |     for historical analysis and trend visualization.
 47 |     
 48 |     Usage:
 49 |         # Get the singleton instance
 50 |         metrics = get_metrics_tracker()
 51 |         
 52 |         # Record a request
 53 |         metrics.record_request(
 54 |             provider="anthropic",
 55 |             model="claude-3-opus",
 56 |             input_tokens=150,
 57 |             output_tokens=500,
 58 |             cost=0.0325,
 59 |             duration=2.5
 60 |         )
 61 |         
 62 |         # Record cache operations
 63 |         metrics.record_cache_hit(cost_saved=0.015)
 64 |         metrics.record_cache_miss()
 65 |         
 66 |         # Get current statistics
 67 |         stats = metrics.get_stats()
 68 |         
 69 |         # Manually trigger persistence (usually automatic)
 70 |         metrics.save_metrics()
 71 |     """
 72 |     
 73 |     _instance = None
 74 |     
 75 |     def __new__(cls, *args, **kwargs):
 76 |         """Create a singleton instance."""
 77 |         if cls._instance is None:
 78 |             cls._instance = super(MetricsTracker, cls).__new__(cls)
 79 |             cls._instance._initialized = False
 80 |         return cls._instance
 81 |     
 82 |     def __init__(
 83 |         self,
 84 |         metrics_dir: Optional[Union[str, Path]] = None,
 85 |         enable_prometheus: bool = False,
 86 |         reset_on_start: bool = False
 87 |     ):
 88 |         """Initialize the metrics tracker.
 89 |         
 90 |         Args:
 91 |             metrics_dir: Directory for metrics storage
 92 |             enable_prometheus: Whether to enable Prometheus metrics
 93 |             reset_on_start: Whether to reset metrics on startup
 94 |         """
 95 |         # Only initialize once for singleton
 96 |         if self._initialized:
 97 |             return
 98 |             
 99 |         # Set metrics directory
100 |         if metrics_dir:
101 |             self.metrics_dir = Path(metrics_dir)
102 |         else:
103 |             self.metrics_dir = Path.home() / ".ultimate" / "metrics"
104 |             
105 |         # Create metrics directory if it doesn't exist
106 |         self.metrics_dir.mkdir(parents=True, exist_ok=True)
107 |         
108 |         # Prometheus settings
109 |         self.enable_prometheus = enable_prometheus and PROMETHEUS_AVAILABLE
110 |         
111 |         # Initialize metrics data
112 |         if reset_on_start:
113 |             self._reset_metrics()
114 |         else:
115 |             self._load_metrics()
116 |             
117 |         # Initialize Prometheus metrics if enabled
118 |         if self.enable_prometheus:
119 |             self._init_prometheus_metrics()
120 |         
121 |         self._initialized = True
122 |         
123 |         logger.info(
124 |             f"Metrics tracker initialized (dir: {self.metrics_dir}, prometheus: {self.enable_prometheus})",
125 |             emoji_key="analytics"
126 |         )
127 |     
128 |     def _reset_metrics(self):
129 |         """Reset all metrics data."""
130 |         # General stats
131 |         self.start_time = time.time()
132 |         self.requests_total = 0
133 |         self.tokens_total = 0
134 |         self.cost_total = 0.0
135 |         
136 |         # Provider-specific stats
137 |         self.provider_requests = defaultdict(int)
138 |         self.provider_tokens = defaultdict(int)
139 |         self.provider_costs = defaultdict(float)
140 |         
141 |         # Model-specific stats
142 |         self.model_requests = defaultdict(int)
143 |         self.model_tokens = defaultdict(int)
144 |         self.model_costs = defaultdict(float)
145 |         
146 |         # Request timing stats
147 |         self.request_times = []
148 |         self.request_times_by_provider = defaultdict(list)
149 |         self.request_times_by_model = defaultdict(list)
150 |         
151 |         # Error stats
152 |         self.errors_total = 0
153 |         self.errors_by_provider = defaultdict(int)
154 |         self.errors_by_model = defaultdict(int)
155 |         
156 |         # Token usage by time period
157 |         self.hourly_tokens = defaultdict(int)
158 |         self.daily_tokens = defaultdict(int)
159 |         
160 |         # Request counts by time period
161 |         self.hourly_requests = defaultdict(int)
162 |         self.daily_requests = defaultdict(int)
163 |         
164 |         # Cost by time period
165 |         self.hourly_costs = defaultdict(float)
166 |         self.daily_costs = defaultdict(float)
167 |         
168 |         # Cache stats
169 |         self.cache_hits = 0
170 |         self.cache_misses = 0
171 |         self.cache_saved_cost = 0.0
172 |     
173 |     def _load_metrics(self):
174 |         """Load metrics from disk."""
175 |         metrics_file = self.metrics_dir / "metrics.json"
176 |         
177 |         if metrics_file.exists():
178 |             try:
179 |                 with open(metrics_file, "r") as f:
180 |                     data = json.load(f)
181 |                 
182 |                 # Load general stats
183 |                 self.start_time = data.get("start_time", time.time())
184 |                 self.requests_total = data.get("requests_total", 0)
185 |                 self.tokens_total = data.get("tokens_total", 0)
186 |                 self.cost_total = data.get("cost_total", 0.0)
187 |                 
188 |                 # Load provider stats
189 |                 self.provider_requests = defaultdict(int, data.get("provider_requests", {}))
190 |                 self.provider_tokens = defaultdict(int, data.get("provider_tokens", {}))
191 |                 self.provider_costs = defaultdict(float, data.get("provider_costs", {}))
192 |                 
193 |                 # Load model stats
194 |                 self.model_requests = defaultdict(int, data.get("model_requests", {}))
195 |                 self.model_tokens = defaultdict(int, data.get("model_tokens", {}))
196 |                 self.model_costs = defaultdict(float, data.get("model_costs", {}))
197 |                 
198 |                 # Load timing stats (limited to last 1000 for memory)
199 |                 self.request_times = data.get("request_times", [])[-1000:]
200 |                 self.request_times_by_provider = defaultdict(list)
201 |                 for provider, times in data.get("request_times_by_provider", {}).items():
202 |                     self.request_times_by_provider[provider] = times[-1000:]
203 |                 
204 |                 self.request_times_by_model = defaultdict(list)
205 |                 for model, times in data.get("request_times_by_model", {}).items():
206 |                     self.request_times_by_model[model] = times[-1000:]
207 |                 
208 |                 # Load error stats
209 |                 self.errors_total = data.get("errors_total", 0)
210 |                 self.errors_by_provider = defaultdict(int, data.get("errors_by_provider", {}))
211 |                 self.errors_by_model = defaultdict(int, data.get("errors_by_model", {}))
212 |                 
213 |                 # Load time period stats
214 |                 self.hourly_tokens = defaultdict(int, data.get("hourly_tokens", {}))
215 |                 self.daily_tokens = defaultdict(int, data.get("daily_tokens", {}))
216 |                 self.hourly_costs = defaultdict(float, data.get("hourly_costs", {}))
217 |                 self.daily_costs = defaultdict(float, data.get("daily_costs", {}))
218 |                 self.hourly_requests = defaultdict(int, data.get("hourly_requests", {}))
219 |                 self.daily_requests = defaultdict(int, data.get("daily_requests", {}))
220 |                 
221 |                 # Load cache stats
222 |                 self.cache_hits = data.get("cache_hits", 0)
223 |                 self.cache_misses = data.get("cache_misses", 0)
224 |                 self.cache_saved_cost = data.get("cache_saved_cost", 0.0)
225 |                 
226 |                 logger.info(
227 |                     f"Loaded metrics from {metrics_file}",
228 |                     emoji_key="analytics"
229 |                 )
230 |                 
231 |             except Exception as e:
232 |                 logger.error(
233 |                     f"Failed to load metrics: {str(e)}",
234 |                     emoji_key="error"
235 |                 )
236 |                 self._reset_metrics()
237 |         else:
238 |             self._reset_metrics()
239 |     
240 |     def _init_prometheus_metrics(self):
241 |         """Initialize Prometheus metrics."""
242 |         if not PROMETHEUS_AVAILABLE:
243 |             return
244 |             
245 |         # Request metrics
246 |         self.prom_requests_total = Counter(
247 |             "ultimate_requests_total",
248 |             "Total number of requests",
249 |             ["provider", "model"]
250 |         )
251 |         
252 |         # Token metrics
253 |         self.prom_tokens_total = Counter(
254 |             "ultimate_tokens_total",
255 |             "Total number of tokens",
256 |             ["provider", "model", "type"]  # type: input or output
257 |         )
258 |         
259 |         # Cost metrics
260 |         self.prom_cost_total = Counter(
261 |             "ultimate_cost_total",
262 |             "Total cost in USD",
263 |             ["provider", "model"]
264 |         )
265 |         
266 |         # Timing metrics
267 |         self.prom_request_duration = Histogram(
268 |             "ultimate_request_duration_seconds",
269 |             "Request duration in seconds",
270 |             ["provider", "model"],
271 |             buckets=(0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0)
272 |         )
273 |         
274 |         # Error metrics
275 |         self.prom_errors_total = Counter(
276 |             "ultimate_errors_total",
277 |             "Total number of errors",
278 |             ["provider", "model"]
279 |         )
280 |         
281 |         # Cache metrics
282 |         self.prom_cache_hits = Counter(
283 |             "ultimate_cache_hits_total",
284 |             "Total number of cache hits"
285 |         )
286 |         self.prom_cache_misses = Counter(
287 |             "ultimate_cache_misses_total",
288 |             "Total number of cache misses"
289 |         )
290 |         self.prom_cache_saved_cost = Counter(
291 |             "ultimate_cache_saved_cost_total",
292 |             "Total cost saved by cache in USD"
293 |         )
294 |     
295 |     async def _save_metrics_async(self):
296 |         """Save metrics to disk asynchronously."""
297 |         if not AIOFILES_AVAILABLE:
298 |             return
299 |             
300 |         metrics_file = self.metrics_dir / "metrics.json"
301 |         temp_file = metrics_file.with_suffix(".tmp")
302 |         
303 |         try:
304 |             # Prepare data for storage
305 |             data = {
306 |                 "start_time": self.start_time,
307 |                 "requests_total": self.requests_total,
308 |                 "tokens_total": self.tokens_total,
309 |                 "cost_total": self.cost_total,
310 |                 "provider_requests": dict(self.provider_requests),
311 |                 "provider_tokens": dict(self.provider_tokens),
312 |                 "provider_costs": dict(self.provider_costs),
313 |                 "model_requests": dict(self.model_requests),
314 |                 "model_tokens": dict(self.model_tokens),
315 |                 "model_costs": dict(self.model_costs),
316 |                 "request_times": self.request_times,
317 |                 "request_times_by_provider": {k: v for k, v in self.request_times_by_provider.items()},
318 |                 "request_times_by_model": {k: v for k, v in self.request_times_by_model.items()},
319 |                 "errors_total": self.errors_total,
320 |                 "errors_by_provider": dict(self.errors_by_provider),
321 |                 "errors_by_model": dict(self.errors_by_model),
322 |                 "hourly_tokens": dict(self.hourly_tokens),
323 |                 "daily_tokens": dict(self.daily_tokens),
324 |                 "hourly_costs": dict(self.hourly_costs),
325 |                 "daily_costs": dict(self.daily_costs),
326 |                 "hourly_requests": dict(self.hourly_requests),
327 |                 "daily_requests": dict(self.daily_requests),
328 |                 "cache_hits": self.cache_hits,
329 |                 "cache_misses": self.cache_misses,
330 |                 "cache_saved_cost": self.cache_saved_cost,
331 |                 "last_updated": time.time()
332 |             }
333 |             
334 |             # Save to temp file
335 |             async with aiofiles.open(temp_file, "w") as f:
336 |                 await f.write(json.dumps(data, indent=2))
337 |                 
338 |             # Rename temp file to actual file
339 |             os.replace(temp_file, metrics_file)
340 |             
341 |         except Exception as e:
342 |             logger.error(
343 |                 f"Failed to save metrics: {str(e)}",
344 |                 emoji_key="error"
345 |             )
346 |     
347 |     def save_metrics(self):
348 |         """Save metrics to disk synchronously."""
349 |         metrics_file = self.metrics_dir / "metrics.json"
350 |         temp_file = metrics_file.with_suffix(".tmp")
351 |         
352 |         try:
353 |             # Prepare data for storage
354 |             data = {
355 |                 "start_time": self.start_time,
356 |                 "requests_total": self.requests_total,
357 |                 "tokens_total": self.tokens_total,
358 |                 "cost_total": self.cost_total,
359 |                 "provider_requests": dict(self.provider_requests),
360 |                 "provider_tokens": dict(self.provider_tokens),
361 |                 "provider_costs": dict(self.provider_costs),
362 |                 "model_requests": dict(self.model_requests),
363 |                 "model_tokens": dict(self.model_tokens),
364 |                 "model_costs": dict(self.model_costs),
365 |                 "request_times": self.request_times,
366 |                 "request_times_by_provider": {k: v for k, v in self.request_times_by_provider.items()},
367 |                 "request_times_by_model": {k: v for k, v in self.request_times_by_model.items()},
368 |                 "errors_total": self.errors_total,
369 |                 "errors_by_provider": dict(self.errors_by_provider),
370 |                 "errors_by_model": dict(self.errors_by_model),
371 |                 "hourly_tokens": dict(self.hourly_tokens),
372 |                 "daily_tokens": dict(self.daily_tokens),
373 |                 "hourly_costs": dict(self.hourly_costs),
374 |                 "daily_costs": dict(self.daily_costs),
375 |                 "hourly_requests": dict(self.hourly_requests),
376 |                 "daily_requests": dict(self.daily_requests),
377 |                 "cache_hits": self.cache_hits,
378 |                 "cache_misses": self.cache_misses,
379 |                 "cache_saved_cost": self.cache_saved_cost,
380 |                 "last_updated": time.time()
381 |             }
382 |             
383 |             # Save to temp file
384 |             with open(temp_file, "w") as f:
385 |                 json.dump(data, f, indent=2)
386 |                 
387 |             # Rename temp file to actual file
388 |             os.replace(temp_file, metrics_file)
389 |             
390 |         except Exception as e:
391 |             logger.error(
392 |                 f"Failed to save metrics: {str(e)}",
393 |                 emoji_key="error"
394 |             )
395 |     
396 |     def record_request(
397 |         self,
398 |         provider: str,
399 |         model: str,
400 |         input_tokens: int,
401 |         output_tokens: int,
402 |         cost: float,
403 |         duration: float,
404 |         success: bool = True
405 |     ):
406 |         """Record metrics for a request.
407 |         
408 |         Args:
409 |             provider: Provider name
410 |             model: Model name
411 |             input_tokens: Number of input tokens
412 |             output_tokens: Number of output tokens
413 |             cost: Cost of the request
414 |             duration: Duration of the request in seconds
415 |             success: Whether the request was successful
416 |         """
417 |         # Update general stats
418 |         self.requests_total += 1
419 |         total_tokens = input_tokens + output_tokens
420 |         self.tokens_total += total_tokens
421 |         self.cost_total += cost
422 |         
423 |         # Update provider stats
424 |         self.provider_requests[provider] += 1
425 |         self.provider_tokens[provider] += total_tokens
426 |         self.provider_costs[provider] += cost
427 |         
428 |         # Update model stats
429 |         self.model_requests[model] += 1
430 |         self.model_tokens[model] += total_tokens
431 |         self.model_costs[model] += cost
432 |         
433 |         # Update timing stats
434 |         self.request_times.append(duration)
435 |         if len(self.request_times) > 1000:
436 |             self.request_times = self.request_times[-1000:]
437 |             
438 |         self.request_times_by_provider[provider].append(duration)
439 |         if len(self.request_times_by_provider[provider]) > 1000:
440 |             self.request_times_by_provider[provider] = self.request_times_by_provider[provider][-1000:]
441 |             
442 |         self.request_times_by_model[model].append(duration)
443 |         if len(self.request_times_by_model[model]) > 1000:
444 |             self.request_times_by_model[model] = self.request_times_by_model[model][-1000:]
445 |         
446 |         # Update error stats if request failed
447 |         if not success:
448 |             self.errors_total += 1
449 |             self.errors_by_provider[provider] += 1
450 |             self.errors_by_model[model] += 1
451 |         
452 |         # Update time period stats
453 |         current_time = time.time()
454 |         hour_key = datetime.fromtimestamp(current_time).strftime("%Y-%m-%d-%H")
455 |         day_key = datetime.fromtimestamp(current_time).strftime("%Y-%m-%d")
456 |         
457 |         self.hourly_tokens[hour_key] += total_tokens
458 |         self.daily_tokens[day_key] += total_tokens
459 |         self.hourly_costs[hour_key] += cost
460 |         self.daily_costs[day_key] += cost
461 |         self.hourly_requests[hour_key] += 1
462 |         self.daily_requests[day_key] += 1
463 |         
464 |         # Update Prometheus metrics if enabled
465 |         if self.enable_prometheus:
466 |             self.prom_requests_total.labels(provider=provider, model=model).inc()
467 |             self.prom_tokens_total.labels(provider=provider, model=model, type="input").inc(input_tokens)
468 |             self.prom_tokens_total.labels(provider=provider, model=model, type="output").inc(output_tokens)
469 |             self.prom_cost_total.labels(provider=provider, model=model).inc(cost)
470 |             self.prom_request_duration.labels(provider=provider, model=model).observe(duration)
471 |             
472 |             if not success:
473 |                 self.prom_errors_total.labels(provider=provider, model=model).inc()
474 |         
475 |         # Schedule metrics saving
476 |         try:
477 |             import asyncio
478 |             asyncio.create_task(self._save_metrics_async())
479 |         except (ImportError, RuntimeError):
480 |             # Fall back to synchronous saving if asyncio not available
481 |             self.save_metrics()
482 |     
483 |     def record_cache_hit(self, cost_saved: float = 0.0):
484 |         """Record a cache hit.
485 |         
486 |         Args:
487 |             cost_saved: Cost saved by the cache hit
488 |         """
489 |         self.cache_hits += 1
490 |         self.cache_saved_cost += cost_saved
491 |         
492 |         # Update Prometheus metrics if enabled
493 |         if self.enable_prometheus:
494 |             self.prom_cache_hits.inc()
495 |             self.prom_cache_saved_cost.inc(cost_saved)
496 |     
497 |     def record_cache_miss(self):
498 |         """Record a cache miss."""
499 |         self.cache_misses += 1
500 |         
501 |         # Update Prometheus metrics if enabled
502 |         if self.enable_prometheus:
503 |             self.prom_cache_misses.inc()
504 |     
505 |     def get_stats(self) -> Dict[str, Any]:
506 |         """Get current metrics.
507 |         
508 |         Returns:
509 |             Dictionary of metrics
510 |         """
511 |         # Calculate uptime
512 |         uptime = time.time() - self.start_time
513 |         
514 |         # Calculate request rate (per minute)
515 |         request_rate = self.requests_total / (uptime / 60) if uptime > 0 else 0
516 |         
517 |         # Calculate average response time
518 |         avg_response_time = sum(self.request_times) / len(self.request_times) if self.request_times else 0
519 |         
520 |         # Calculate cache hit ratio
521 |         cache_total = self.cache_hits + self.cache_misses
522 |         cache_hit_ratio = self.cache_hits / cache_total if cache_total > 0 else 0
523 |         
524 |         # Get top providers by usage
525 |         top_providers = sorted(
526 |             [(provider, tokens) for provider, tokens in self.provider_tokens.items()],
527 |             key=lambda x: x[1],
528 |             reverse=True
529 |         )[:5]
530 |         
531 |         # Get top models by usage
532 |         top_models = sorted(
533 |             [(model, tokens) for model, tokens in self.model_tokens.items()],
534 |             key=lambda x: x[1],
535 |             reverse=True
536 |         )[:5]
537 |         
538 |         # Get daily token usage for last 7 days
539 |         today = datetime.now().strftime("%Y-%m-%d")
540 |         daily_usage = []
541 |         for i in range(7):
542 |             day = (datetime.now() - timedelta(days=i)).strftime("%Y-%m-%d")
543 |             daily_usage.append((
544 |                 day, 
545 |                 self.daily_tokens.get(day, 0), 
546 |                 self.daily_costs.get(day, 0.0),
547 |                 self.daily_requests.get(day, 0)
548 |             ))
549 |         
550 |         # Compile stats
551 |         return {
552 |             "general": {
553 |                 "uptime": uptime,
554 |                 "uptime_human": self._format_duration(uptime),
555 |                 "requests_total": self.requests_total,
556 |                 "tokens_total": self.tokens_total,
557 |                 "cost_total": self.cost_total,
558 |                 "request_rate": request_rate,
559 |                 "avg_response_time": avg_response_time,
560 |                 "errors_total": self.errors_total,
561 |                 "error_rate": self.errors_total / self.requests_total if self.requests_total > 0 else 0,
562 |             },
563 |             "providers": {
564 |                 provider: {
565 |                     "requests": count,
566 |                     "tokens": self.provider_tokens.get(provider, 0),
567 |                     "cost": self.provider_costs.get(provider, 0.0),
568 |                     "avg_response_time": sum(self.request_times_by_provider.get(provider, [])) / len(self.request_times_by_provider.get(provider, [])) if self.request_times_by_provider.get(provider, []) else 0,
569 |                     "errors": self.errors_by_provider.get(provider, 0),
570 |                 }
571 |                 for provider, count in self.provider_requests.items()
572 |             },
573 |             "models": {
574 |                 model: {
575 |                     "requests": count,
576 |                     "tokens": self.model_tokens.get(model, 0),
577 |                     "cost": self.model_costs.get(model, 0.0),
578 |                     "avg_response_time": sum(self.request_times_by_model.get(model, [])) / len(self.request_times_by_model.get(model, [])) if self.request_times_by_model.get(model, []) else 0,
579 |                     "errors": self.errors_by_model.get(model, 0),
580 |                 }
581 |                 for model, count in self.model_requests.items()
582 |             },
583 |             "cache": {
584 |                 "hits": self.cache_hits,
585 |                 "misses": self.cache_misses,
586 |                 "hit_ratio": cache_hit_ratio,
587 |                 "saved_cost": self.cache_saved_cost,
588 |             },
589 |             "top_providers": [
590 |                 {
591 |                     "provider": provider,
592 |                     "tokens": tokens,
593 |                     "percentage": tokens / self.tokens_total if self.tokens_total > 0 else 0,
594 |                 }
595 |                 for provider, tokens in top_providers
596 |             ],
597 |             "top_models": [
598 |                 {
599 |                     "model": model,
600 |                     "tokens": tokens,
601 |                     "percentage": tokens / self.tokens_total if self.tokens_total > 0 else 0,
602 |                 }
603 |                 for model, tokens in top_models
604 |             ],
605 |             "daily_usage": [
606 |                 {
607 |                     "date": date,
608 |                     "tokens": tokens,
609 |                     "cost": cost,
610 |                     "requests": requests
611 |                 }
612 |                 for date, tokens, cost, requests in daily_usage
613 |             ],
614 |             "today": {
615 |                 "tokens": self.daily_tokens.get(today, 0),
616 |                 "cost": self.daily_costs.get(today, 0.0),
617 |                 "requests": self.daily_requests.get(today, 0)
618 |             }
619 |         }
620 |     
621 |     def _format_duration(self, seconds: float) -> str:
622 |         """Format duration in a human-readable format.
623 |         
624 |         Args:
625 |             seconds: Duration in seconds
626 |             
627 |         Returns:
628 |             Formatted duration
629 |         """
630 |         if seconds < 60:
631 |             return f"{seconds:.1f} seconds"
632 |         elif seconds < 3600:
633 |             minutes = seconds / 60
634 |             return f"{minutes:.1f} minutes"
635 |         elif seconds < 86400:
636 |             hours = seconds / 3600
637 |             return f"{hours:.1f} hours"
638 |         else:
639 |             days = seconds / 86400
640 |             return f"{days:.1f} days"
641 |     
642 |     def reset(self):
643 |         """Reset all metrics."""
644 |         self._reset_metrics()
645 |         logger.info(
646 |             "Metrics reset",
647 |             emoji_key="analytics"
648 |         )
649 | 
650 | 
651 | # Singleton instance getter
652 | def get_metrics_tracker(
653 |     metrics_dir: Optional[Union[str, Path]] = None,
654 |     enable_prometheus: bool = False,
655 |     reset_on_start: bool = False
656 | ) -> MetricsTracker:
657 |     """Get the metrics tracker singleton instance.
658 |     
659 |     Args:
660 |         metrics_dir: Directory for metrics storage
661 |         enable_prometheus: Whether to enable Prometheus metrics
662 |         reset_on_start: Whether to reset metrics on startup
663 |         
664 |     Returns:
665 |         MetricsTracker singleton instance
666 |     """
667 |     return MetricsTracker(metrics_dir, enable_prometheus, reset_on_start)
```

--------------------------------------------------------------------------------
/examples/web_automation_instruction_packs.py:
--------------------------------------------------------------------------------

```python
  1 | # examples/web_automation_instruction_packs.py
  2 | 
  3 | """
  4 | Instruction Packs for the Abstract `find_and_download_pdfs` Tool.
  5 | 
  6 | This file contains pre-defined instruction dictionaries that configure the behavior
  7 | of the generic `find_and_download_pdfs` tool for specific tasks.
  8 | """
  9 | 
 10 | # --- Instruction Pack 1: Academic Papers (arXiv) ---
 11 | ACADEMIC_PAPER_INSTRUCTIONS = {
 12 |     "search_phase": {
 13 |         "search_query_template": "site:arxiv.org {topic} filetype:pdf",
 14 |         "target_site_identification_prompt": """Analyze the search results summary for "{search_term}".
 15 | Identify the URL that is most likely a direct PDF link (ending in .pdf) or a specific relevant paper's abstract page on arXiv.org. PRIORITIZE direct PDF links over abstract pages. Always choose direct PDF links when available.
 16 | 
 17 | Search Results Summary:
 18 | ---
 19 | {search_results_summary}
 20 | ---
 21 | 
 22 | Respond ONLY with a valid JSON object: {{"target_url": "URL_or_null"}}.""",
 23 |         "search_engine": "google",  # Changed from duckduckgo to google which is more stable
 24 |         "num_search_results_per_query": 8  # Increased number of results to find more PDFs
 25 |     },
 26 |     "exploration_phase": {
 27 |         "exploration_goal_prompt": """You are an AI assistant tasked with finding and downloading PDF research papers from arXiv related to '{topic}'.
 28 | 
 29 | Your goal is to find PDF download links for relevant papers. Look for links labeled 'PDF', 'Download PDF', or links ending in .pdf within the page.
 30 | 
 31 | IMPORTANT: When you find a PDF link, you MUST use the "download_pdf" action with the full PDF URL to download it. Do not try to summarize the content - downloading the PDF is the primary goal.
 32 | 
 33 | Please follow these guidelines:
 34 | 1. If the current page is a direct PDF or has PDF in the URL, use "download_pdf" action immediately
 35 | 2. If you're on an abstract page, look for PDF links and use "download_pdf" action
 36 | 3. If you're on a search results page, look for relevant paper links and click them
 37 | 4. Use "scroll" action to see more results if needed
 38 | 5. If you can't find relevant papers after multiple steps, use "goal_impossible"
 39 | 6. If you successfully download at least one PDF, use "goal_achieved"
 40 | 
 41 | Remember: Your PRIMARY goal is to DOWNLOAD PDFs using the "download_pdf" action, not just navigate or summarize.""",
 42 |         "navigation_keywords": ["abstract", "pdf", "view", "download", "related", "version", "year", "author", "subject", "search"],
 43 |         "pdf_keywords": ["pdf", "download pdf", "download"],
 44 |         "pdf_url_patterns": [r'/pdf/\d+\.\d+v?\d*', r'\.pdf$'], # Updated arXiv pattern
 45 |         "max_steps": 10,
 46 |         "valid_actions": ["click", "scroll", "download_pdf", "go_back", "goal_achieved", "goal_impossible"]
 47 |     },
 48 |     "download_phase": {
 49 |         "metadata_extraction_prompt": """Based on the context below (URL, surrounding text/elements, often from an arXiv abstract page) for the PDF link below, extract the paper's TITLE, the primary AUTHOR's last name, and the YEAR of publication (YYYY).
 50 | 
 51 | Context:
 52 | ---
 53 | {context}
 54 | ---
 55 | 
 56 | Respond ONLY with a valid JSON object: {{"title": "...", "author_lastname": "...", "year": "YYYY"}}. Use "Unknown" or the current year if a value cannot be found.""",
 57 |         "filename_template": "{year}_{author_lastname}_{topic}_{title}",
 58 |         "required_metadata": ["title", "author_lastname", "year"]
 59 |     }
 60 | }
 61 | 
 62 | # --- Instruction Pack 2: Government Reports ---
 63 | GOVERNMENT_REPORT_INSTRUCTIONS = {
 64 |     "search_phase": {
 65 |         "search_query_template": '"{topic}" official report site:gov.uk OR site:*.gov OR site:*.gov.au OR site:*.gc.ca', # Added more gov domains
 66 |         "target_site_identification_prompt": """Analyze the search results summary for "{search_term}".
 67 | Identify the single most promising URL pointing to an official government webpage (e.g., *.gov.uk, *.gov, *.gc.ca, *.gov.au) or official agency site likely hosting the definitive report or publication page for the topic. Avoid news articles or commentary sites.
 68 | 
 69 | Search Results Summary:
 70 | ---
 71 | {search_results_summary}
 72 | ---
 73 | 
 74 | Respond ONLY with a valid JSON object: {{"target_url": "URL_or_null"}}.""",
 75 |         "search_engine": "google"
 76 |     },
 77 |     "exploration_phase": {
 78 |         "exploration_goal_prompt": "Explore the official government website related to '{topic}' to find and download the primary official report(s) or policy document(s) in PDF format.",
 79 |         "navigation_keywords": ["publication", "report", "document", "research", "policy", "consultation", "guidance", "download", "library", "archive", "statistics", "data"],
 80 |         "pdf_keywords": ["pdf", "download", "full report", "final report", "publication", "document", "read", "view", "annex", "appendix", "data"],
 81 |         "pdf_url_patterns": [r'\.pdf(\?|$)', r'/assets/', r'/download/', r'/file/', r'/media/'],
 82 |         "max_steps": 15
 83 |     },
 84 |     "download_phase": {
 85 |         "metadata_extraction_prompt": """Based on the context (URL, surrounding text/elements) for the government document PDF link below, determine the PUBLICATION_DATE (format YYYY-MM-DD, or YYYY-MM, or just YYYY if only year is available) and a concise DOCUMENT_TYPE (e.g., 'Policy Paper', 'Impact Assessment', 'Consultation Response', 'Research Report', 'Official Guidance', 'Statistics Release').
 86 | 
 87 | Context:
 88 | ---
 89 | {context}
 90 | ---
 91 | 
 92 | Respond ONLY with a valid JSON object: {{"date": "...", "document_type": "..."}}. Use best guess (e.g., {datetime.now().strftime('%Y-%m-%d')}, 'Report') if a value cannot be reliably determined.""",
 93 |         "filename_template": "{date}_GovReport_{topic}_{document_type}",
 94 |         "required_metadata": ["date", "document_type"]
 95 |     }
 96 | }
 97 | 
 98 | # --- Instruction Pack 3: Product Manuals/Datasheets ---
 99 | PRODUCT_MANUAL_INSTRUCTIONS = {
100 |     "search_phase": {
101 |         "search_query_template": "{topic} official manual OR datasheet OR support download PDF", # Broadened query slightly
102 |         "target_site_identification_prompt": """Analyze the search results summary for "{search_term}".
103 | Identify the single URL most likely leading to the official manufacturer's product support, downloads, or manual page for the specified product. Prioritize the manufacturer's own domain. Avoid retailer sites (like Amazon, BestBuy) or general review sites.
104 | 
105 | Search Results Summary:
106 | ---
107 | {search_results_summary}
108 | ---
109 | 
110 | Respond ONLY with a valid JSON object: {{"target_url": "URL_or_null"}}.""",
111 |         "search_engine": "google"
112 |     },
113 |     "exploration_phase": {
114 |         "exploration_goal_prompt": "Explore the manufacturer's website for the product '{topic}' to find the primary user manual, user guide, or technical datasheet available as a PDF download. Look in 'Support', 'Downloads', or 'Documentation' sections.",
115 |         "navigation_keywords": ["support", "download", "manual", "documentation", "guide", "specification", "datasheet", "product", "resource", "driver", "software", "firmware"],
116 |         "pdf_keywords": ["manual", "guide", "datasheet", "specification", "pdf", "download", "instructions", "service manual", "user guide"],
117 |         "pdf_url_patterns": [r'manual.*\.pdf', r'datasheet.*\.pdf', r'\.pdf(\?|$)', r'guide.*\.pdf', r'spec.*\.pdf'],
118 |         "max_steps": 12
119 |     },
120 |     "download_phase": {
121 |         "metadata_extraction_prompt": """Based on the context (URL, link text, surrounding elements) for the PDF link below, determine the DOCUMENT_TYPE (e.g., 'User Manual', 'Quick Start Guide', 'Datasheet', 'Specifications', 'Service Manual') and LANGUAGE (e.g., 'EN', 'DE', 'FR', 'Multi', if obvious, otherwise default to 'EN').
122 | 
123 | Context:
124 | ---
125 | {context}
126 | ---
127 | 
128 | Respond ONLY with a valid JSON object: {{"document_type": "...", "language": "..."}}. Use 'Manual' and 'EN' as defaults if unsure.""",
129 |         "filename_template": "{topic}_{document_type}_{language}",
130 |         "required_metadata": ["document_type"] # Language is helpful but not strictly required
131 |     }
132 | }
133 | 
134 | # --- Instruction Pack 4: Finding Specific Legal Documents (Example - Requires careful prompting) ---
135 | # NOTE: Legal document searches can be complex due to jurisdiction, specific courts, etc.
136 | # This is a simplified example.
137 | LEGAL_DOCUMENT_INSTRUCTIONS = {
138 |     "search_phase": {
139 |         "search_query_template": '"{topic}" court filing OR legal document OR case text PDF',
140 |         "target_site_identification_prompt": """Analyze the search results summary for "{search_term}".
141 | Identify a URL likely pointing to an official court website (e.g., *.uscourts.gov), legal repository (like CourtListener, RECAP), or official government archive hosting the specific legal case document or docket. Avoid news summaries or law firm analyses unless they directly link to the official document PDF.
142 | 
143 | Search Results Summary:
144 | ---
145 | {search_results_summary}
146 | ---
147 | 
148 | Respond ONLY with JSON: {{"target_url": "URL_or_null"}}.""",
149 |         "search_engine": "google"
150 |     },
151 |     "exploration_phase": {
152 |         "exploration_goal_prompt": "Explore the legal resource website for '{topic}'. Identify and download the relevant court filing, judgment, or legal document PDF.",
153 |         "navigation_keywords": ["document", "filing", "opinion", "judgment", "docket", "case", "pdf", "download", "view", "attachment", "exhibit"],
154 |         "pdf_keywords": ["document", "filing", "opinion", "judgment", "pdf", "download", "attachment", "exhibit", "order"],
155 |         "pdf_url_patterns": [r'\.pdf(\?|$)', r'/downloadDoc', r'/viewDoc'],
156 |         "max_steps": 15
157 |     },
158 |     "download_phase": {
159 |         "metadata_extraction_prompt": """Based on the context for the legal document PDF link below, extract the approximate FILING_DATE (YYYY-MM-DD or YYYY) and a short DOCUMENT_CATEGORY (e.g., 'Complaint', 'Motion', 'Opinion', 'Judgment', 'Order', 'Exhibit').
160 | 
161 | Context:
162 | ---
163 | {context}
164 | ---
165 | 
166 | Respond ONLY with JSON: {{"date": "...", "document_category": "..."}}. Use current date or 'Filing' if unknown.""",
167 |         "filename_template": "{date}_{topic}_{document_category}",
168 |         "required_metadata": ["date", "document_category"]
169 |     }
170 | }
171 | 
172 | # ____________________________________________________________________________________________________________________________________________________________________________________________
173 | 
174 | # --- Instruction Pack 5: Simple Search Summary ---
175 | SIMPLE_SEARCH_SUMMARY_INSTRUCTIONS = {
176 |     "search_params": {
177 |         "engines": ["google", "duckduckgo"], # Which engines to use
178 |         "num_results_per_engine": 3 # How many results to fetch from each
179 |     },
180 |     # Prompt for the LLM that will summarize each page's content
181 |     "summarization_prompt": """Concisely summarize the key information from the following web page content, focusing on its relevance to the search query '{query}'. Output a brief 2-3 sentence summary only.
182 | 
183 | Page Content:
184 | ---
185 | {page_content}
186 | ---
187 | 
188 | Concise Summary:""",
189 |     # Optional: Add filters if needed
190 |     # "url_filter_keywords": [], # e.g., ["blog", "news"] to only summarize blog/news
191 |     # "min_content_length_for_summary": 150 # e.g., skip very short pages
192 | }
193 | 
194 | # --- Instruction Pack 6: Technical Search Summary ---
195 | TECHNICAL_SEARCH_SUMMARY_INSTRUCTIONS = {
196 |     "search_params": {
197 |         "engines": ["google", "bing"], # Maybe Bing is better for some technical queries
198 |         "num_results_per_engine": 5 # Get more results for technical topics
199 |     },
200 |     "summarization_prompt": """Analyze the following web page content related to the technical search query '{query}'. Extract and summarize the core technical concepts, definitions, or conclusions presented. Focus on accuracy and specific details if available. Keep the summary to 3-4 sentences.
201 | 
202 | Page Content:
203 | ---
204 | {page_content}
205 | ---
206 | 
207 | Technical Summary:""",
208 |     "url_filter_keywords": ["docs", "tutorial", "research", "arxiv", "github", "developer"], # Prioritize technical sources
209 |     "min_content_length_for_summary": 300 # Expect longer content
210 | }
211 | 
212 | # ____________________________________________________________________________________________________________________________________________________________________________________________
213 | 
214 | 
215 | # --- Instruction Pack 7: Extract Job Posting Details ---
216 | JOB_POSTING_EXTRACTION_INSTRUCTIONS = {
217 |     "data_source": {
218 |         "source_type": "dynamic_crawl", # Find URLs by crawling
219 |         "crawl_config": {
220 |             "start_url": "https://www.google.com/search?q=software+engineer+jobs+remote", # Example search
221 |             "list_item_selector": "a[href*='/jobs/']", # Adjust selector based on actual job board/search results
222 |             "next_page_selector": "#pnnext", # Google's next page link ID (may change)
223 |             "max_pages_to_crawl": 3, # Limit crawl depth
224 |             "max_urls_limit": 20 # Limit total jobs to process
225 |         }
226 |         # Alternatively, provide a list directly:
227 |         # "source_type": "list",
228 |         # "urls": ["https://example-job-board.com/job/123", "https://example-job-board.com/job/456"]
229 |     },
230 |     "extraction_details": {
231 |         # Prompt asking LLM to extract specific fields
232 |         "schema_or_prompt": """From the provided job posting web page content, extract the following details:
233 | - job_title: The official title of the position.
234 | - company_name: The name of the hiring company.
235 | - location: The primary location(s) listed (e.g., "Remote", "New York, NY").
236 | - salary_range: Any mentioned salary range or compensation details (e.g., "$120k - $150k", "Competitive").
237 | - key_skills: A list of the top 3-5 required technical skills or qualifications mentioned.
238 | 
239 | Web Page Content Context:
240 | ---
241 | {page_content}
242 | ---
243 | 
244 | Respond ONLY with a valid JSON object containing these keys. If a field is not found, use null or an empty list for key_skills.""",
245 |         "extraction_llm_model": "openai/gpt-4.1-mini" # Specify model for extraction
246 |     },
247 |     "output_config": {
248 |         "format": "json_list", # Output as a list of JSON objects
249 |         "error_handling": "include_error" # Include URLs that failed in the errors dict
250 |     }
251 | }
252 | 
253 | # --- Instruction Pack 8: Extract Product Details (Schema Example) ---
254 | ECOMMERCE_PRODUCT_EXTRACTION_INSTRUCTIONS = {
255 |     "data_source": {
256 |         "source_type": "list",
257 |         # URLs would be provided by the calling code/agent based on what products to check
258 |         "urls": [
259 |             # Example URLs (replace with actual ones for testing)
260 |             # "https://www.amazon.com/dp/B08H75RTZ8/", # Example Kindle Paperwhite
261 |             # "https://www.bestbuy.com/site/sony-wh1000xm5-wireless-noise-cancelling-over-the-ear-headphones-black/6505725.p?skuId=6505725"
262 |         ]
263 |     },
264 |     "extraction_details": {
265 |         # Using a JSON schema to define desired output
266 |         "schema_or_prompt": {
267 |             "type": "object",
268 |             "properties": {
269 |                 "product_name": {"type": "string", "description": "The main name or title of the product."},
270 |                 "price": {"type": "string", "description": "The current listed price, including currency symbol (e.g., '$149.99')."},
271 |                 "rating": {"type": "number", "description": "The average customer rating (e.g., 4.7). Null if not found."},
272 |                 "num_reviews": {"type": "integer", "description": "The total number of customer reviews. Null if not found."},
273 |                 "availability": {"type": "string", "description": "Stock status (e.g., 'In Stock', 'Out of Stock', 'Available for Pre-order')."}
274 |             },
275 |             "required": ["product_name", "price", "availability"]
276 |         },
277 |         "extraction_llm_model": "openai/gpt-4.1-mini" # Use a capable model
278 |     },
279 |     "output_config": {
280 |         "format": "csv_string", # Output as CSV text
281 |         "error_handling": "skip" # Skip pages that fail
282 |     }
283 | }
284 | 
285 | 
286 | # ____________________________________________________________________________________________________________________________________________________________________________________________
287 | 
288 | 
289 | 
290 | # --- Instruction Pack 9: Login and Check Order Status ---
291 | ORDER_STATUS_WORKFLOW_INSTRUCTIONS = {
292 |     "start_url": "https://the-internet.herokuapp.com/login", # Example login page
293 |     "workflow_goal_prompt": "Log in using the provided 'username' and 'password', navigate to the secure area, and read the text content of the success message banner.",
294 |     "available_actions": ["type", "click", "read_value", "finish_success", "finish_failure"],
295 |     "llm_model": "openai/gpt-4.1-mini", # Model for guidance
296 |     "max_steps": 8,
297 |     "input_data_mapping": { # Maps abstract names to keys in input_data passed to the tool
298 |         "user": "username",
299 |         "pass": "password",
300 |     },
301 |     "element_finding_hints": ["username field", "password field", "login button", "success message", "logout link"],
302 |     # success_condition_prompt could be added for more complex checks
303 |     # step_prompts are likely not needed for this simple login example
304 | }
305 | 
306 | # --- Instruction Pack 10: Submit a Simple Contact Form ---
307 | CONTACT_FORM_WORKFLOW_INSTRUCTIONS = {
308 |     "start_url": "https://www.selenium.dev/selenium/web/web-form.html", # Example form page
309 |     "workflow_goal_prompt": "Fill out the web form using the provided 'name', 'email', and 'message'. Then click the submit button and confirm submission by checking if the page title changes to 'Web form processed'.",
310 |     "available_actions": ["type", "click", "finish_success", "finish_failure"],
311 |     "llm_model": "openai/gpt-4.1-mini",
312 |     "max_steps": 10,
313 |     "input_data_mapping": {
314 |         "contact_name": "name",
315 |         "contact_email": "email", # Assuming input_data has key "email"
316 |         "contact_message": "message"
317 |     },
318 |     "element_finding_hints": ["text input field (my-text)", "password input (my-password)", "textarea (my-textarea)", "submit button"],
319 |     # This workflow implicitly checks success via title change, but an explicit prompt could be added:
320 |     # "success_condition_prompt": "Does the current page title indicate the form was processed successfully (e.g., contains 'processed')?"
321 | }
322 | 
323 | # ____________________________________________________________________________________________________________________________________________________________________________________________
324 | 
325 | 
326 | # --- Instruction Pack 11: Monitor Product Price and Availability ---
327 | PRODUCT_MONITORING_INSTRUCTIONS = {
328 |     "monitoring_targets": [
329 |         {
330 |             "url": "https://www.bestbuy.com/site/sony-wh1000xm5-wireless-noise-cancelling-over-the-ear-headphones-black/6505725.p?skuId=6505725", # Example URL
331 |             "data_points": [
332 |                 {
333 |                     "name": "price",
334 |                     "identifier": ".priceView-hero-price span[aria-hidden='true']", # CSS selector for price element (INSPECT CAREFULLY!)
335 |                     "extraction_method": "selector",
336 |                     "condition": "changed" # Alert if price changes from previous_values
337 |                 },
338 |                 {
339 |                     "name": "availability",
340 |                     "identifier": "button[data-button-state='ADD_TO_CART']", # Selector for Add to Cart button
341 |                     "extraction_method": "selector", # We just check existence/text
342 |                     # Condition check via LLM
343 |                     "condition": "llm_eval",
344 |                     "llm_condition_prompt": "Based on the extracted text/presence of the element ('Current Value'), is the product currently available for purchase? Respond {\"condition_met\": true} if available, {\"condition_met\": false} otherwise."
345 |                     # If extraction returns text like "Add to Cart", LLM should say true. If "Sold Out" or None, should say false.
346 |                 },
347 |                 {
348 |                     "name": "product_title", # Example LLM extraction
349 |                     "identifier": "Extract the main product title from the page content.",
350 |                     "extraction_method": "llm",
351 |                     "condition": "contains", # Check if title contains expected keyword
352 |                     "condition_value": "WH-1000XM5"
353 |                 }
354 |             ]
355 |         },
356 |         # Add more target product URLs here...
357 |         # { "url": "https://...", "data_points": [...] }
358 |     ],
359 |     "llm_config": {
360 |         "model": "openai/gpt-4.1-mini" # Model for LLM extraction/evaluation
361 |     },
362 |     "concurrency": {
363 |         "max_concurrent_pages": 2 # Limit concurrency for scraping politeness
364 |     },
365 |     "browser_options": {
366 |         "headless": True
367 |     }
368 | }
369 | 
370 | # --- Instruction Pack 12: Monitor Website Content Section ---
371 | WEBSITE_SECTION_MONITORING_INSTRUCTIONS = {
372 |     "monitoring_targets": [
373 |         {
374 |             "url": "https://news.google.com/topics/CAAqJggKIiBDQkFTRWdvSUwyMHZNRGx6TVdZU0FtVnVHZ0pKVGlnQVAB?hl=en-US&gl=US&ceid=US%3Aen", # Example Google News AI section
375 |             "data_points": [
376 |                 {
377 |                     "name": "top_headline_text",
378 |                     "identifier": "h3 > a.gPFEn", # Selector for top headline link text (INSPECT CAREFULLY!)
379 |                     "extraction_method": "selector",
380 |                     "condition": "changed" # Alert if the top headline changes
381 |                 },
382 |                 {
383 |                     "name": "second_headline_text",
384 |                     "identifier": "article:nth-of-type(2) h3 > a.gPFEn", # Selector for second headline
385 |                     "extraction_method": "selector",
386 |                     "condition": "changed"
387 |                 }
388 |             ]
389 |         }
390 |     ],
391 |     "llm_config": {
392 |         "model": "openai/gpt-4.1-mini" # Not strictly needed if only using selectors
393 |     },
394 |     "concurrency": { "max_concurrent_pages": 3 },
395 |     "browser_options": { "headless": True }
396 | }
397 | 
398 | 
399 | # ____________________________________________________________________________________________________________________________________________________________________________________________
400 | 
401 | 
402 | # --- Instruction Pack 13: Market Trend Summary ---
403 | MARKET_TREND_RESEARCH_INSTRUCTIONS = {
404 |     "research_goal_prompt": "Generate a brief summary of the current market trends for {topic}, based on recent news articles and analysis reports.",
405 |     "search_phase": {
406 |         "search_queries": [
407 |             "{topic} market trends 2024",
408 |             "latest news {topic} industry",
409 |             "{topic} market analysis report"
410 |         ],
411 |         "search_engine": "google",
412 |         "num_search_results_per_query": 5 # Get a few results per query
413 |     },
414 |     "site_selection_phase": {
415 |         # Prompt to select relevant news/analysis sites
416 |         "selection_prompt": """From the search results for '{topic}', select up to {max_urls} URLs that appear to be recent (within the last year if possible) news articles, market analysis reports, or reputable industry blogs discussing market trends. Avoid forum discussions, product pages, or very old content.
417 | 
418 | Search Results Context:
419 | ---
420 | {search_results_context}
421 | ---
422 | 
423 | Respond ONLY with JSON: {{"selected_urls": ["url1", ...]}}""",
424 |         "max_sites_to_visit": 5 # Limit how many articles are processed
425 |     },
426 |     "extraction_phase": {
427 |         # Prompt to extract key points related to trends
428 |         "extraction_prompt_or_schema": """Extract the main points, key findings, or trend descriptions related to '{topic}' from the provided web page content. Focus on statements about market direction, growth, challenges, or notable events. Output as a JSON object with a key "key_findings" containing a list of strings (each string is a finding/point).
429 | 
430 | Web Page Content Context:
431 | ---
432 | {page_content}
433 | ---
434 | 
435 | Extracted JSON Data:""",
436 |         "extraction_llm_model": "openai/gpt-4.1-mini" # Model for extraction
437 |     },
438 |     "synthesis_phase": {
439 |         # Prompt to synthesize the findings into a paragraph
440 |         "synthesis_prompt": """Based on the extracted key findings below regarding '{topic}', write a concise paragraph (3-5 sentences) summarizing the major market trends discussed.
441 | 
442 | Extracted Information Context:
443 | ---
444 | {extracted_information_context}
445 | ---
446 | 
447 | Synthesized Market Trend Summary:""",
448 |         "synthesis_llm_model": "openai/gpt-4.1-mini", # Model for synthesis
449 |         "report_format_description": "A single paragraph summarizing market trends."
450 |     }
451 | }
452 | 
453 | # --- Instruction Pack 14: Competitive Analysis Snippets ---
454 | COMPETITIVE_ANALYSIS_INSTRUCTIONS = {
455 |     "research_goal_prompt": "Gather brief summaries of direct competitors mentioned for the product/service '{topic}' from recent reviews or comparison articles.",
456 |     "search_phase": {
457 |         "search_queries": [
458 |             "{topic} vs competitors",
459 |             "{topic} alternatives review",
460 |             "comparison {topic}"
461 |         ],
462 |         "search_engine": "google",
463 |         "num_search_results_per_query": 8
464 |     },
465 |     "site_selection_phase": {
466 |         "selection_prompt": """From the search results for '{topic}', select up to {max_urls} URLs that seem to be review sites, comparison articles, or tech news discussing competitors or alternatives to {topic}. Prioritize recent results if possible.
467 | 
468 | Search Results Context:
469 | ---
470 | {search_results_context}
471 | ---
472 | 
473 | Respond ONLY with JSON: {{"selected_urls": ["url1", ...]}}""",
474 |         "max_sites_to_visit": 4
475 |     },
476 |     "extraction_phase": {
477 |         "extraction_prompt_or_schema": """Identify any direct competitors to '{topic}' mentioned in the provided web page content. For each competitor found, extract its NAME and a brief (1-sentence) summary of how it's compared to {topic} or its key differentiator mentioned.
478 | 
479 | Web Page Content Context:
480 | ---
481 | {page_content}
482 | ---
483 | 
484 | Respond ONLY with a valid JSON object with a key "competitors", where the value is a list of objects, each like {"name": "...", "comparison_summary": "..."}. If no competitors are mentioned, return {"competitors": []}.""",
485 |         "extraction_llm_model": "openai/gpt-4.1-mini"
486 |     },
487 |     "synthesis_phase": {
488 |         "synthesis_prompt": """Consolidate the extracted competitor information related to '{topic}' into a markdown list. For each competitor found across the sources, list its name and a bullet point summary of the comparison points mentioned. Group findings by competitor name.
489 | 
490 | Extracted Information Context:
491 | ---
492 | {extracted_information_context}
493 | ---
494 | 
495 | Consolidated Competitor Markdown List:""",
496 |         "synthesis_llm_model": "openai/gpt-4.1-mini",
497 |         "report_format_description": "A markdown list summarizing mentioned competitors and comparison points."
498 |     }
499 | }
```

--------------------------------------------------------------------------------
/examples/workflow_delegation_demo.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python
  2 | """Workflow delegation example using Ultimate MCP Server."""
  3 | import asyncio
  4 | import json
  5 | import sys
  6 | import time
  7 | from collections import namedtuple  # Import namedtuple
  8 | from pathlib import Path
  9 | from typing import Any, Dict, List, Optional
 10 | 
 11 | # Add project root to path for imports when running as script
 12 | sys.path.insert(0, str(Path(__file__).parent.parent))
 13 | 
 14 | from fastmcp import FastMCP
 15 | from rich import box
 16 | from rich.markup import escape
 17 | from rich.panel import Panel
 18 | from rich.rule import Rule
 19 | from rich.syntax import Syntax
 20 | from rich.table import Table
 21 | 
 22 | from ultimate_mcp_server.constants import Provider
 23 | from ultimate_mcp_server.core.providers.base import get_provider
 24 | from ultimate_mcp_server.core.server import Gateway
 25 | from ultimate_mcp_server.exceptions import ToolExecutionError
 26 | from ultimate_mcp_server.utils import get_logger, process_mcp_result
 27 | 
 28 | # --- Add Display Utils Import ---
 29 | from ultimate_mcp_server.utils.display import CostTracker, _display_stats  # Import CostTracker
 30 | 
 31 | # --- Add Rich Imports ---
 32 | from ultimate_mcp_server.utils.logging.console import console
 33 | 
 34 | # --- Import Tools Needed ---
 35 | # Import tool functions directly if not registering them all
 36 | # from ultimate_mcp_server.tools.optimization import recommend_model, execute_optimized_workflow # No, call via MCP
 37 | # from ultimate_mcp_server.tools.completion import generate_completion # Call via MCP
 38 | # -------------------------
 39 | 
 40 | # Initialize logger
 41 | logger = get_logger("example.workflow_delegation")
 42 | 
 43 | # Create a simple structure for cost tracking from dict
 44 | TrackableResult = namedtuple("TrackableResult", ["cost", "input_tokens", "output_tokens", "provider", "model", "processing_time"])
 45 | 
 46 | # Initialize FastMCP server
 47 | mcp = FastMCP("Workflow Delegation Demo")
 48 | 
 49 | # Mock provider initialization function (replace with actual if needed)
 50 | async def initialize_providers():
 51 |     logger.info("Initializing required providers...", emoji_key="provider")
 52 |     
 53 |     # Initialize gateway to let it handle provider initialization
 54 |     gateway = Gateway("workflow-delegation-demo", register_tools=False)
 55 |     await gateway._initialize_providers()
 56 |     
 57 |     # Check if we have the necessary providers initialized
 58 |     required_providers = ["openai", "anthropic", "gemini"]
 59 |     missing_providers = []
 60 |     
 61 |     for provider_name in required_providers:
 62 |         try:
 63 |             provider = await get_provider(provider_name)
 64 |             if provider:
 65 |                 logger.info(f"Provider {provider_name} is available", emoji_key="success")
 66 |             else:
 67 |                 missing_providers.append(provider_name)
 68 |         except Exception:
 69 |             missing_providers.append(provider_name)
 70 |     
 71 |     if missing_providers:
 72 |         logger.warning(f"Missing providers: {', '.join(missing_providers)}. Some demos might fail.", emoji_key="warning")
 73 |         console.print(f"[yellow]Warning:[/yellow] Missing providers: {', '.join(missing_providers)}")
 74 |     else:
 75 |         logger.info("All required providers are available", emoji_key="success")
 76 | 
 77 | # Keep execute_workflow as a locally defined tool demonstrating the concept
 78 | @mcp.tool()
 79 | async def execute_workflow(
 80 |     workflow_steps: List[Dict[str, Any]],
 81 |     initial_input: Optional[str] = None, # Make initial_input optional
 82 |     max_concurrency: int = 1, # Keep concurrency, though sequential for demo
 83 |     ctx = None # Keep ctx for potential use by called tools
 84 | ) -> Dict[str, Any]:
 85 |     """Execute a multi-step workflow by calling registered project tools."""
 86 |     start_time = time.time()
 87 |     total_cost = 0.0
 88 |     step_results: Dict[str, Any] = {} # Store results keyed by step_id
 89 | 
 90 |     # Mapping from simple operation names to actual tool names
 91 |     operation_to_tool_map = {
 92 |         "summarize": "summarize_document",
 93 |         "extract_entities": "extract_entities",
 94 |         "generate_questions": "generate_qa_pairs", # Correct tool name
 95 |         "chunk": "chunk_document",
 96 |         # Add mappings for other tools as needed
 97 |         "completion": "generate_completion", 
 98 |         "chat": "chat_completion",
 99 |         "retrieve": "retrieve_context",
100 |         "rag_generate": "generate_with_rag",
101 |     }
102 | 
103 |     current_input_value = initial_input
104 |     logger.info(f"Starting workflow execution with {len(workflow_steps)} steps.")
105 | 
106 |     for i, step in enumerate(workflow_steps):
107 |         step_id = step.get("id")
108 |         operation = step.get("operation")
109 |         tool_name = operation_to_tool_map.get(operation)
110 |         parameters = step.get("parameters", {}).copy() # Get parameters
111 |         input_from_step = step.get("input_from") # ID of previous step for input
112 |         output_as = step.get("output_as", step_id) # Key to store output under
113 | 
114 |         if not step_id:
115 |              raise ValueError(f"Workflow step {i} is missing required 'id' key.")
116 |         if not tool_name:
117 |             raise ValueError(f"Unsupported operation '{operation}' in workflow step '{step_id}'. Mapped tool name not found.")
118 | 
119 |         logger.info(f"Executing workflow step {i+1}/{len(workflow_steps)}: ID='{step_id}', Tool='{tool_name}'")
120 | 
121 |         # Resolve input: Use previous step output or initial input
122 |         step_input_data = None
123 |         if input_from_step:
124 |             if input_from_step not in step_results:
125 |                  raise ValueError(f"Input for step '{step_id}' requires output from '{input_from_step}', which has not run or failed.")
126 |             # Decide which part of the previous result to use
127 |             # This needs a more robust mechanism (e.g., specifying the key)
128 |             # For now, assume the primary output is needed (e.g., 'text', 'summary', 'chunks', etc.)
129 |             prev_result = step_results[input_from_step]
130 |             # Simple logic: look for common output keys
131 |             if isinstance(prev_result, dict):
132 |                  if 'summary' in prev_result: 
133 |                      step_input_data = prev_result['summary']
134 |                  elif 'text' in prev_result: 
135 |                      step_input_data = prev_result['text']
136 |                  elif 'chunks' in prev_result: 
137 |                      step_input_data = prev_result['chunks'] # May need specific handling
138 |                  elif 'result' in prev_result: 
139 |                      step_input_data = prev_result['result'] # From DocumentResponse
140 |                  else: 
141 |                      step_input_data = prev_result # Pass the whole dict?
142 |             else:
143 |                  step_input_data = prev_result # Pass raw output
144 |             logger.debug(f"Using output from step '{input_from_step}' as input.")
145 |         else:
146 |             step_input_data = current_input_value # Use input from previous step or initial
147 |             logger.debug("Using input from previous step/initial input.")
148 | 
149 |         # --- Construct parameters for the target tool --- 
150 |         # This needs mapping based on the target tool's expected signature
151 |         # Example: If tool is 'summarize_document', map step_input_data to 'document' param
152 |         if tool_name == "summarize_document" and isinstance(step_input_data, str):
153 |             parameters["document"] = step_input_data
154 |         elif tool_name == "extract_entities" and isinstance(step_input_data, str):
155 |              parameters["document"] = step_input_data
156 |              # Ensure entity_types is a list
157 |              if "entity_types" not in parameters or not isinstance(parameters["entity_types"], list):
158 |                   parameters["entity_types"] = ["organization", "person", "concept"] # Default
159 |         elif tool_name == "generate_qa_pairs" and isinstance(step_input_data, str):
160 |              parameters["document"] = step_input_data
161 |              parameters["num_pairs"] = parameters.get("num_questions") or 5 # Map parameter name
162 |         elif tool_name in ["generate_completion", "chat_completion"] and isinstance(step_input_data, str):
163 |             if "prompt" not in parameters:
164 |                  parameters["prompt"] = step_input_data # Assume input is the prompt if not specified
165 |         # Add more mappings as needed for other tools...
166 |         else:
167 |              # Fallback: pass the input data under a generic key if not handled?
168 |              # Or maybe the tool parameter should explicitly name the input field?
169 |              # For now, we assume the tool can handle the input directly if not mapped.
170 |              # This requires careful workflow definition. 
171 |              # Maybe add 'input_arg_name' to workflow step definition?
172 |              logger.warning(f"Input mapping for tool '{tool_name}' not explicitly defined. Passing raw input.")
173 |              # Decide how to pass step_input_data if no specific mapping exists
174 |              # Example: parameters['input_data'] = step_input_data
175 | 
176 |         # --- Call the actual tool via MCP --- 
177 |         try:
178 |             logger.debug(f"Calling tool '{tool_name}' with params: {parameters}")
179 |             tool_result = await mcp.call_tool(tool_name, parameters)
180 |             # Process result to handle potential list format from MCP
181 |             step_output = process_mcp_result(tool_result)
182 |             logger.debug(f"Tool '{tool_name}' returned: {step_output}")
183 |             
184 |             if isinstance(step_output, dict) and step_output.get("error"):
185 |                  raise ToolExecutionError(f"Tool '{tool_name}' failed: {step_output['error']}")
186 |                  
187 |             # Store the successful result
188 |             step_results[output_as] = step_output
189 |             # Update current_input_value for the next step (assuming primary output is desired)
190 |             # This logic might need refinement based on tool outputs
191 |             if isinstance(step_output, dict):
192 |                  current_input_value = step_output.get("text") or step_output.get("summary") or step_output.get("result") or step_output
193 |             else:
194 |                  current_input_value = step_output
195 |                  
196 |             # Accumulate cost if available
197 |             if isinstance(step_output, dict) and "cost" in step_output:
198 |                 total_cost += float(step_output["cost"])
199 |                 
200 |         except Exception as e:
201 |             logger.error(f"Error executing step '{step_id}' (Tool: {tool_name}): {e}", exc_info=True)
202 |             # Propagate exception to fail the workflow
203 |             raise ToolExecutionError(f"Workflow failed at step '{step_id}': {e}") from e
204 | 
205 |     # Workflow completed successfully
206 |     processing_time = time.time() - start_time
207 |     logger.success(f"Workflow completed successfully in {processing_time:.2f}s")
208 |     return {
209 |         "outputs": step_results,
210 |         "processing_time": processing_time,
211 |         "total_cost": total_cost,
212 |         "success": True # Indicate overall success
213 |     }
214 | 
215 | # Enhanced display function for workflow demos
216 | def display_workflow_result(title: str, result: Any):
217 |     """Display workflow result with consistent formatting."""
218 |     console.print(Rule(f"[bold blue]{escape(title)}[/bold blue]"))
219 |     
220 |     # Process result to handle list or dict format
221 |     result = process_mcp_result(result)
222 |     
223 |     # Display outputs if present
224 |     if "outputs" in result and result["outputs"]:
225 |         for output_name, output_text in result["outputs"].items():
226 |             console.print(Panel(
227 |                 escape(str(output_text).strip()),
228 |                 title=f"[bold magenta]Output: {escape(output_name)}[/bold magenta]",
229 |                 border_style="magenta",
230 |                 expand=False
231 |             ))
232 |     elif "text" in result:
233 |         # Display single text output if there's no outputs dictionary
234 |         console.print(Panel(
235 |             escape(result["text"].strip()),
236 |             title="[bold magenta]Result[/bold magenta]",
237 |             border_style="magenta",
238 |             expand=False
239 |         ))
240 |     
241 |     # Display execution stats
242 |     _display_stats(result, console)
243 | 
244 | # Enhanced display function for task analysis
245 | def display_task_analysis(title: str, result: Any):
246 |     """Display task analysis result with consistent formatting."""
247 |     console.print(Rule(f"[bold blue]{escape(title)}[/bold blue]"))
248 |     
249 |     # Process result to handle list or dict format
250 |     result = process_mcp_result(result)
251 |     
252 |     # Display task type and features
253 |     analysis_table = Table(box=box.SIMPLE, show_header=False)
254 |     analysis_table.add_column("Metric", style="cyan")
255 |     analysis_table.add_column("Value", style="white")
256 |     analysis_table.add_row("Task Type", escape(result.get("task_type", "N/A")))
257 |     analysis_table.add_row("Required Features", escape(str(result.get("required_features", []))))
258 |     console.print(analysis_table)
259 |     
260 |     # Display features explanation
261 |     if "features_explanation" in result:
262 |         console.print(Panel(
263 |             escape(result["features_explanation"]),
264 |             title="[bold]Features Explanation[/bold]",
265 |             border_style="dim blue",
266 |             expand=False
267 |         ))
268 |     
269 |     # Display recommendations
270 |     if "recommendations" in result and result["recommendations"]:
271 |         rec_table = Table(title="[bold]Model Recommendations[/bold]", box=box.ROUNDED, show_header=True)
272 |         rec_table.add_column("Provider", style="magenta")
273 |         rec_table.add_column("Model", style="blue")
274 |         rec_table.add_column("Explanation", style="white")
275 |         for rec in result["recommendations"]:
276 |             rec_table.add_row(
277 |                 escape(rec.get("provider", "N/A")),
278 |                 escape(rec.get("model", "N/A")),
279 |                 escape(rec.get("explanation", "N/A"))
280 |             )
281 |         console.print(rec_table)
282 |     
283 |     # Display execution stats
284 |     _display_stats(result, console)
285 | 
286 | # Move _get_provider_for_model above run_delegate_task_demo
287 | def _get_provider_for_model(model_name: str) -> str:
288 |     """Helper to determine provider from model name."""
289 |     # Accept both 'provider/model' and legacy short names
290 |     model_lower = model_name.lower()
291 |     if '/' in model_lower:
292 |         # e.g., 'gemini/gemini-2.0-flash' or 'anthropic/claude-3-7-sonnet-20250219'
293 |         return model_lower.split('/')[0]
294 |     elif ':' in model_lower:
295 |         return model_lower.split(':')[0]
296 |     elif model_lower.startswith("gpt-"):
297 |         return Provider.OPENAI.value
298 |     elif model_lower.startswith("claude-"):
299 |         return Provider.ANTHROPIC.value
300 |     elif model_lower.startswith("gemini-"):
301 |         return Provider.GEMINI.value
302 |     elif model_lower.startswith("deepseek-"):
303 |         return "deepseek"
304 |     elif model_lower.startswith("grok-"):
305 |         return "grok"
306 |     elif model_lower.startswith("o1-") or model_lower.startswith("o3-"):
307 |         return Provider.OPENAI.value
308 |     else:
309 |         raise ValueError(f"Unknown model prefix for model: {model_name}")
310 | 
311 | # --- Demo Functions ---
312 | 
313 | async def run_analyze_task_demo():
314 |     """Demonstrate the analyze_task tool."""
315 |     console.print(Rule("[bold blue]Analyze Task Demo[/bold blue]"))
316 |     logger.info("Running analyze_task demo...", emoji_key="start")
317 |     
318 |     task_description = "Summarize the provided technical document about AI advancements and extract key entities."
319 |     console.print(f"[cyan]Task Description:[/cyan] {escape(task_description)}")
320 |     
321 |     try:
322 |         # Call the real recommend_model tool
323 |         # Need to estimate input/output length for recommend_model
324 |         # Rough estimate for demo purposes
325 |         input_len_chars = len(task_description) * 10 # Assume task needs more context
326 |         output_len_chars = 200 # Estimate output size
327 | 
328 |         result = await mcp.call_tool("recommend_model", {
329 |             "task_type": "summarization",  # Added to match required argument
330 |             "expected_input_length": input_len_chars,
331 |             "expected_output_length": output_len_chars,
332 |             # Can add other recommend_model params like required_capabilities, max_cost
333 |         })
334 |         
335 |         # Use enhanced display function
336 |         display_task_analysis("Analysis Results", result)
337 |         
338 |     except Exception as e:
339 |         logger.error(f"Error in analyze_task demo: {e}", emoji_key="error", exc_info=True)
340 |         console.print(f"[bold red]Error:[/bold red] {escape(str(e))}")
341 |     console.print()
342 | 
343 | 
344 | async def run_delegate_task_demo(tracker: CostTracker): # Add tracker
345 |     """Demonstrate the delegate_task tool."""
346 |     console.print(Rule("[bold blue]Delegate Task Demo[/bold blue]"))
347 |     logger.info("Running task delegation demo (using recommend_model + completion)...", emoji_key="start")
348 |     
349 |     task_description = "Generate a short marketing blurb for a new AI-powered writing assistant."
350 |     prompt = "Write a catchy, 2-sentence marketing blurb for 'AI Writer Pro', a tool that helps users write faster and better."
351 |     console.print(f"[cyan]Task Description:[/cyan] {escape(task_description)}")
352 |     console.print(f"[cyan]Prompt:[/cyan] {escape(prompt)}")
353 | 
354 |     priorities = ["balanced", "cost", "quality"]
355 |     
356 |     for priority in priorities:
357 |         console.print(Rule(f"[yellow]Delegating with Priority: {priority}[/yellow]"))
358 |         logger.info(f"Delegating task with priority: {priority}", emoji_key="processing")
359 |         try:
360 |             # 1. Get recommendation
361 |             recommendation_result_raw = await mcp.call_tool("recommend_model", {
362 |                  "task_type": "creative_writing", # Infer task type
363 |                  "expected_input_length": len(prompt),
364 |                  "expected_output_length": 100, # Estimate blurb length
365 |                  "priority": priority
366 |             })
367 |             recommendation_result = process_mcp_result(recommendation_result_raw)
368 | 
369 |             if "error" in recommendation_result or not recommendation_result.get("recommendations"):
370 |                  logger.error(f"Could not get recommendation for priority '{priority}'.")
371 |                  console.print(f"[red]Error getting recommendation for '{priority}'.[/red]")
372 |                  continue
373 | 
374 |             # 2. Execute with recommended model
375 |             top_rec = recommendation_result["recommendations"][0]
376 |             rec_provider = _get_provider_for_model(top_rec["model"])
377 |             rec_model = top_rec["model"]
378 |             logger.info(f"Recommendation for '{priority}': Use {rec_provider}/{rec_model}")
379 | 
380 |             # Call generate_completion tool
381 |             completion_result_raw = await mcp.call_tool("generate_completion", {
382 |                  "prompt": prompt,
383 |                  "provider": rec_provider,
384 |                  "model": rec_model,
385 |                  "max_tokens": 100
386 |             })
387 | 
388 |             # Track cost if possible
389 |             completion_result = process_mcp_result(completion_result_raw)
390 |             if isinstance(completion_result, dict) and all(k in completion_result for k in ["cost", "provider", "model"]) and "tokens" in completion_result:
391 |                 try:
392 |                     trackable = TrackableResult(
393 |                         cost=completion_result.get("cost", 0.0),
394 |                         input_tokens=completion_result.get("tokens", {}).get("input", 0),
395 |                         output_tokens=completion_result.get("tokens", {}).get("output", 0),
396 |                         provider=completion_result.get("provider", rec_provider), # Use known provider as fallback
397 |                         model=completion_result.get("model", rec_model), # Use known model as fallback
398 |                         processing_time=completion_result.get("processing_time", 0.0)
399 |                     )
400 |                     tracker.add_call(trackable)
401 |                 except Exception as track_err:
402 |                     logger.warning(f"Could not track cost for delegated task ({priority}): {track_err}", exc_info=False)
403 | 
404 |             # Display result
405 |             if "error" in completion_result:
406 |                  logger.error(f"Completion failed for recommended model {rec_model}: {completion_result['error']}")
407 |                  console.print(f"[red]Completion failed for {rec_model}: {completion_result['error']}[/red]")
408 |             else:
409 |                  console.print(Panel(
410 |                      escape(completion_result.get("text", "").strip()),
411 |                      title=f"[bold green]Delegated Result ({escape(priority)} -> {escape(rec_model)})[/bold green]",
412 |                      border_style="green",
413 |                      expand=False
414 |                  ))
415 |                  _display_stats(completion_result, console) # Display stats from completion
416 | 
417 |         except Exception as e:
418 |             logger.error(f"Error delegating task with priority {priority}: {e}", emoji_key="error", exc_info=True)
419 |             console.print(f"[bold red]Error ({escape(priority)}):[/bold red] {escape(str(e))}")
420 |         console.print()
421 | 
422 | 
423 | async def run_workflow_demo():
424 |     """Demonstrate the execute_workflow tool."""
425 |     console.print(Rule("[bold blue]Execute Workflow Demo[/bold blue]"))
426 |     logger.info("Running execute_workflow demo...", emoji_key="start")
427 | 
428 |     initial_text = """
429 |     Artificial intelligence (AI) is rapidly transforming various sectors. 
430 |     In healthcare, AI algorithms analyze medical images with remarkable accuracy, 
431 |     aiding radiologists like Dr. Evelyn Reed. Pharmaceutical companies, such as InnovatePharma, 
432 |     use AI to accelerate drug discovery. Meanwhile, financial institutions leverage AI 
433 |     for fraud detection and algorithmic trading. The field continues to evolve, 
434 |     driven by researchers like Kenji Tanaka and advancements in machine learning.
435 |     """
436 |     
437 |     workflow = [
438 |         {
439 |             "id": "step1_summarize",
440 |             "operation": "summarize",
441 |             "provider": Provider.ANTHROPIC.value,
442 |             "model": "claude-3-5-haiku-20241022",
443 |             "parameters": {"format": "Provide a 2-sentence summary"},
444 |             "output_as": "summary"
445 |         },
446 |         {
447 |             "id": "step2_extract",
448 |             "operation": "extract_entities",
449 |             "provider": Provider.OPENAI.value,
450 |             "model": "gpt-4.1-mini",
451 |             "parameters": {"entity_types": ["person", "organization", "field"]},
452 |             "input_from": None, # Use initial_input
453 |             "output_as": "entities"
454 |         },
455 |         {
456 |             "id": "step3_questions",
457 |             "operation": "generate_questions",
458 |             "provider": Provider.GEMINI.value,
459 |             "model": "gemini-2.0-flash-lite",
460 |             "parameters": {"question_count": 2, "question_type": "insightful"},
461 |             "input_from": "summary", # Use output from step 1
462 |             "output_as": "questions"
463 |         }
464 |     ]
465 |     
466 |     console.print("[cyan]Initial Input Text:[/cyan]")
467 |     console.print(Panel(escape(initial_text.strip()), border_style="dim blue", expand=False))
468 |     console.print("[cyan]Workflow Definition:[/cyan]")
469 |     try:
470 |         workflow_json = json.dumps(workflow, indent=2, default=lambda o: o.value if isinstance(o, Provider) else str(o)) # Handle enum serialization
471 |         console.print(Panel(
472 |             Syntax(workflow_json, "json", theme="default", line_numbers=True, word_wrap=True),
473 |             title="[bold]Workflow Steps[/bold]",
474 |             border_style="blue",
475 |             expand=False
476 |         ))
477 |     except Exception as json_err:
478 |          console.print(f"[red]Could not display workflow definition: {escape(str(json_err))}[/red]")
479 |     
480 |     logger.info(f"Executing workflow with {len(workflow)} steps...", emoji_key="processing")
481 |     try:
482 |         result = await mcp.call_tool("execute_workflow", {
483 |             "workflow_steps": workflow, 
484 |             "initial_input": initial_text
485 |         })
486 |         
487 |         # Use enhanced display function
488 |         display_workflow_result("Workflow Results", result)
489 | 
490 |     except Exception as e:
491 |         logger.error(f"Error executing workflow: {e}", emoji_key="error", exc_info=True)
492 |         console.print(f"[bold red]Workflow Execution Error:[/bold red] {escape(str(e))}")
493 |     console.print()
494 | 
495 | 
496 | async def run_prompt_optimization_demo():
497 |     """Demonstrate the optimize_prompt tool."""
498 |     console.print(Rule("[bold blue]Prompt Optimization Demo[/bold blue]"))
499 |     logger.info("Running optimize_prompt demo...", emoji_key="start")
500 | 
501 |     original_prompt = "Tell me about Large Language Models."
502 |     target_model = "claude-3-opus-20240229"
503 |     optimization_type = "detailed_response" # e.g., conciseness, detailed_response, specific_format
504 |     
505 |     console.print(f"[cyan]Original Prompt:[/cyan] {escape(original_prompt)}")
506 |     console.print(f"[cyan]Target Model:[/cyan] {escape(target_model)}")
507 |     console.print(f"[cyan]Optimization Type:[/cyan] {escape(optimization_type)}")
508 |     
509 |     logger.info(f"Optimizing prompt for {target_model}...", emoji_key="processing")
510 |     try:
511 |         result = await mcp.call_tool("optimize_prompt", {
512 |             "prompt": original_prompt,
513 |             "target_model": target_model,
514 |             "optimization_type": optimization_type,
515 |             "provider": Provider.OPENAI.value # Using OpenAI to optimize for Claude
516 |         })
517 |         
518 |         # Process result to handle list or dict format
519 |         result = process_mcp_result(result)
520 |         
521 |         # Get optimized prompt text
522 |         optimized_prompt = result.get("optimized_prompt", "")
523 |         if not optimized_prompt and hasattr(result, 'text'):
524 |             optimized_prompt = result.text
525 |         
526 |         console.print(Panel(
527 |             escape(optimized_prompt.strip() if optimized_prompt else "[red]Optimization failed[/red]"),
528 |             title="[bold green]Optimized Prompt[/bold green]",
529 |             border_style="green",
530 |             expand=False
531 |         ))
532 |         
533 |         # Display execution stats
534 |         _display_stats(result, console)
535 |         
536 |     except Exception as e:
537 |         logger.error(f"Error optimizing prompt: {e}", emoji_key="error", exc_info=True)
538 |         console.print(f"[bold red]Prompt Optimization Error:[/bold red] {escape(str(e))}")
539 |     console.print()
540 | 
541 | 
542 | async def main():
543 |     """Run workflow delegation examples."""
544 |     console.print(Rule("[bold magenta]Workflow Delegation Demo Suite[/bold magenta]"))
545 |     tracker = CostTracker() # Instantiate tracker
546 |     
547 |     try:
548 |         # Setup providers first
549 |         await initialize_providers() # Ensure keys are checked/providers ready
550 |         console.print(Rule("[bold magenta]Workflow & Delegation Demos Starting[/bold magenta]"))
551 |         
552 |         # --- Register Necessary Tools --- 
553 |         # Ensure tools called by demos are registered on the MCP instance
554 |         from ultimate_mcp_server.tools.completion import generate_completion
555 |         from ultimate_mcp_server.tools.document import (
556 |             extract_entities,
557 |             generate_qa_pairs,
558 |             summarize_document,
559 |         )
560 |         from ultimate_mcp_server.tools.optimization import recommend_model
561 |         
562 |         mcp.tool()(recommend_model)
563 |         mcp.tool()(generate_completion)
564 |         mcp.tool()(summarize_document)
565 |         mcp.tool()(extract_entities)
566 |         mcp.tool()(generate_qa_pairs)
567 |         logger.info("Manually registered recommend_model, completion, and document tools.")
568 |         # --------------------------------
569 | 
570 |         await run_analyze_task_demo()
571 |         
572 |         # Pass tracker only to delegate demo
573 |         await run_delegate_task_demo(tracker)
574 |         
575 |         await run_workflow_demo()
576 |         # await run_prompt_optimization_demo() # Add back if needed
577 |         
578 |         # Display final cost summary
579 |         tracker.display_summary(console)
580 | 
581 |         logger.success("Workflow Delegation Demo Finished Successfully!", emoji_key="complete")
582 |         console.print(Rule("[bold magenta]Workflow Delegation Demos Complete[/bold magenta]"))
583 |         return 0
584 | 
585 |     except Exception as e:
586 |         logger.critical(f"Workflow demo failed: {str(e)}", emoji_key="critical", exc_info=True)
587 |         console.print(f"[bold red]Critical Demo Error:[/bold red] {escape(str(e))}")
588 |         return 1
589 | 
590 | 
591 | if __name__ == "__main__":
592 |     exit_code = asyncio.run(main())
593 |     sys.exit(exit_code) 
```