dicklesworthstone/llm_gateway_mcp

This is page 38 of 45. Use http://codebase.md/dicklesworthstone/llm_gateway_mcp_server?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .cursorignore
├── .env.example
├── .envrc
├── .gitignore
├── additional_features.md
├── check_api_keys.py
├── completion_support.py
├── comprehensive_test.py
├── docker-compose.yml
├── Dockerfile
├── empirically_measured_model_speeds.json
├── error_handling.py
├── example_structured_tool.py
├── examples
│   ├── __init__.py
│   ├── advanced_agent_flows_using_unified_memory_system_demo.py
│   ├── advanced_extraction_demo.py
│   ├── advanced_unified_memory_system_demo.py
│   ├── advanced_vector_search_demo.py
│   ├── analytics_reporting_demo.py
│   ├── audio_transcription_demo.py
│   ├── basic_completion_demo.py
│   ├── cache_demo.py
│   ├── claude_integration_demo.py
│   ├── compare_synthesize_demo.py
│   ├── cost_optimization.py
│   ├── data
│   │   ├── sample_event.txt
│   │   ├── Steve_Jobs_Introducing_The_iPhone_compressed.md
│   │   └── Steve_Jobs_Introducing_The_iPhone_compressed.mp3
│   ├── docstring_refiner_demo.py
│   ├── document_conversion_and_processing_demo.py
│   ├── entity_relation_graph_demo.py
│   ├── filesystem_operations_demo.py
│   ├── grok_integration_demo.py
│   ├── local_text_tools_demo.py
│   ├── marqo_fused_search_demo.py
│   ├── measure_model_speeds.py
│   ├── meta_api_demo.py
│   ├── multi_provider_demo.py
│   ├── ollama_integration_demo.py
│   ├── prompt_templates_demo.py
│   ├── python_sandbox_demo.py
│   ├── rag_example.py
│   ├── research_workflow_demo.py
│   ├── sample
│   │   ├── article.txt
│   │   ├── backprop_paper.pdf
│   │   ├── buffett.pdf
│   │   ├── contract_link.txt
│   │   ├── legal_contract.txt
│   │   ├── medical_case.txt
│   │   ├── northwind.db
│   │   ├── research_paper.txt
│   │   ├── sample_data.json
│   │   └── text_classification_samples
│   │       ├── email_classification.txt
│   │       ├── news_samples.txt
│   │       ├── product_reviews.txt
│   │       └── support_tickets.txt
│   ├── sample_docs
│   │   └── downloaded
│   │       └── attention_is_all_you_need.pdf
│   ├── sentiment_analysis_demo.py
│   ├── simple_completion_demo.py
│   ├── single_shot_synthesis_demo.py
│   ├── smart_browser_demo.py
│   ├── sql_database_demo.py
│   ├── sse_client_demo.py
│   ├── test_code_extraction.py
│   ├── test_content_detection.py
│   ├── test_ollama.py
│   ├── text_classification_demo.py
│   ├── text_redline_demo.py
│   ├── tool_composition_examples.py
│   ├── tournament_code_demo.py
│   ├── tournament_text_demo.py
│   ├── unified_memory_system_demo.py
│   ├── vector_search_demo.py
│   ├── web_automation_instruction_packs.py
│   └── workflow_delegation_demo.py
├── LICENSE
├── list_models.py
├── marqo_index_config.json.example
├── mcp_protocol_schema_2025-03-25_version.json
├── mcp_python_lib_docs.md
├── mcp_tool_context_estimator.py
├── model_preferences.py
├── pyproject.toml
├── quick_test.py
├── README.md
├── resource_annotations.py
├── run_all_demo_scripts_and_check_for_errors.py
├── storage
│   └── smart_browser_internal
│       ├── locator_cache.db
│       ├── readability.js
│       └── storage_state.enc
├── test_client.py
├── test_connection.py
├── TEST_README.md
├── test_sse_client.py
├── test_stdio_client.py
├── tests
│   ├── __init__.py
│   ├── conftest.py
│   ├── integration
│   │   ├── __init__.py
│   │   └── test_server.py
│   ├── manual
│   │   ├── test_extraction_advanced.py
│   │   └── test_extraction.py
│   └── unit
│       ├── __init__.py
│       ├── test_cache.py
│       ├── test_providers.py
│       └── test_tools.py
├── TODO.md
├── tool_annotations.py
├── tools_list.json
├── ultimate_mcp_banner.webp
├── ultimate_mcp_logo.webp
├── ultimate_mcp_server
│   ├── __init__.py
│   ├── __main__.py
│   ├── cli
│   │   ├── __init__.py
│   │   ├── __main__.py
│   │   ├── commands.py
│   │   ├── helpers.py
│   │   └── typer_cli.py
│   ├── clients
│   │   ├── __init__.py
│   │   ├── completion_client.py
│   │   └── rag_client.py
│   ├── config
│   │   └── examples
│   │       └── filesystem_config.yaml
│   ├── config.py
│   ├── constants.py
│   ├── core
│   │   ├── __init__.py
│   │   ├── evaluation
│   │   │   ├── base.py
│   │   │   └── evaluators.py
│   │   ├── providers
│   │   │   ├── __init__.py
│   │   │   ├── anthropic.py
│   │   │   ├── base.py
│   │   │   ├── deepseek.py
│   │   │   ├── gemini.py
│   │   │   ├── grok.py
│   │   │   ├── ollama.py
│   │   │   ├── openai.py
│   │   │   └── openrouter.py
│   │   ├── server.py
│   │   ├── state_store.py
│   │   ├── tournaments
│   │   │   ├── manager.py
│   │   │   ├── tasks.py
│   │   │   └── utils.py
│   │   └── ums_api
│   │       ├── __init__.py
│   │       ├── ums_database.py
│   │       ├── ums_endpoints.py
│   │       ├── ums_models.py
│   │       └── ums_services.py
│   ├── exceptions.py
│   ├── graceful_shutdown.py
│   ├── services
│   │   ├── __init__.py
│   │   ├── analytics
│   │   │   ├── __init__.py
│   │   │   ├── metrics.py
│   │   │   └── reporting.py
│   │   ├── cache
│   │   │   ├── __init__.py
│   │   │   ├── cache_service.py
│   │   │   ├── persistence.py
│   │   │   ├── strategies.py
│   │   │   └── utils.py
│   │   ├── cache.py
│   │   ├── document.py
│   │   ├── knowledge_base
│   │   │   ├── __init__.py
│   │   │   ├── feedback.py
│   │   │   ├── manager.py
│   │   │   ├── rag_engine.py
│   │   │   ├── retriever.py
│   │   │   └── utils.py
│   │   ├── prompts
│   │   │   ├── __init__.py
│   │   │   ├── repository.py
│   │   │   └── templates.py
│   │   ├── prompts.py
│   │   └── vector
│   │       ├── __init__.py
│   │       ├── embeddings.py
│   │       └── vector_service.py
│   ├── tool_token_counter.py
│   ├── tools
│   │   ├── __init__.py
│   │   ├── audio_transcription.py
│   │   ├── base.py
│   │   ├── completion.py
│   │   ├── docstring_refiner.py
│   │   ├── document_conversion_and_processing.py
│   │   ├── enhanced-ums-lookbook.html
│   │   ├── entity_relation_graph.py
│   │   ├── excel_spreadsheet_automation.py
│   │   ├── extraction.py
│   │   ├── filesystem.py
│   │   ├── html_to_markdown.py
│   │   ├── local_text_tools.py
│   │   ├── marqo_fused_search.py
│   │   ├── meta_api_tool.py
│   │   ├── ocr_tools.py
│   │   ├── optimization.py
│   │   ├── provider.py
│   │   ├── pyodide_boot_template.html
│   │   ├── python_sandbox.py
│   │   ├── rag.py
│   │   ├── redline-compiled.css
│   │   ├── sentiment_analysis.py
│   │   ├── single_shot_synthesis.py
│   │   ├── smart_browser.py
│   │   ├── sql_databases.py
│   │   ├── text_classification.py
│   │   ├── text_redline_tools.py
│   │   ├── tournament.py
│   │   ├── ums_explorer.html
│   │   └── unified_memory_system.py
│   ├── utils
│   │   ├── __init__.py
│   │   ├── async_utils.py
│   │   ├── display.py
│   │   ├── logging
│   │   │   ├── __init__.py
│   │   │   ├── console.py
│   │   │   ├── emojis.py
│   │   │   ├── formatter.py
│   │   │   ├── logger.py
│   │   │   ├── panels.py
│   │   │   ├── progress.py
│   │   │   └── themes.py
│   │   ├── parse_yaml.py
│   │   ├── parsing.py
│   │   ├── security.py
│   │   └── text.py
│   └── working_memory_api.py
├── unified_memory_system_technical_analysis.md
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/examples/docstring_refiner_demo.py:
--------------------------------------------------------------------------------

```python
   1 | #!/usr/bin/env python
   2 | """
   3 | Advanced Docstring Refiner Demo for Ultimate MCP Server.
   4 | 
   5 | This script demonstrates the autonomous documentation refinement tool that analyzes, tests, and improves
   6 | documentation (descriptions, schemas, examples) for MCP tools, enhancing their usability with LLM agents.
   7 | The demo showcases multiple refinement approaches, and visualization techniques, while providing 
   8 | comprehensive performance metrics and cost analysis.
   9 | 
  10 | Features:
  11 |     - Single and multi-tool refinement demonstrations
  12 |     - Custom test generation strategy configuration
  13 |     - Provider fallbacks and model selection optimization
  14 |     - Visual diffs of documentation improvements
  15 |     - Cost estimation and optimization techniques
  16 |     - Schema-focused refinement capabilities
  17 |     - Model comparison and performance analysis
  18 |     - Practical testing with intentionally flawed tools
  19 |     - Adaptive refinement based on tool complexity
  20 | 
  21 | Command-line Arguments:
  22 |     --demo {all,single,multi,custom-testing,optimize,all-tools,schema-focus,practical,model-comparison}:
  23 |         Specific demo to run (default: all)
  24 |     
  25 |     --tool TOOL:
  26 |         Specify a specific tool to refine (bypasses automatic selection)
  27 |     
  28 |     --iterations N:
  29 |         Number of refinement iterations to run
  30 |     
  31 |     --model MODEL:
  32 |         Specify a model to use for refinement (e.g., gpt-4.1-mini, claude-3-5-haiku)
  33 |     
  34 |     --provider PROVIDER:
  35 |         Specify a provider to use for refinement (e.g., openai, anthropic)
  36 |     
  37 |     --visualize {minimal,standard,full}:
  38 |         Control visualization detail level (default: standard)
  39 |     
  40 |     --cost-limit FLOAT:
  41 |         Maximum cost limit in USD (default: 5.0)
  42 |     
  43 |     --output-dir DIR:
  44 |         Directory to save results
  45 |     
  46 |     --save-results:
  47 |         Save refinement results to files
  48 |     
  49 |     --verbose, -v:
  50 |         Increase output verbosity
  51 |     
  52 |     --create-flawed:
  53 |         Create flawed example tools for practical testing
  54 | 
  55 | Demo Modes:
  56 |     single:
  57 |         Demonstrates refining a single tool with detailed progress tracking
  58 |         and visualization of description, schema, and example improvements.
  59 |     
  60 |     multi:
  61 |         Demonstrates refining multiple tools simultaneously, showcasing parallel
  62 |         processing and cross-tool analysis of documentation patterns.
  63 |     
  64 |     custom-testing:
  65 |         Demonstrates advanced test generation strategies with fine-grained control
  66 |         over the types and quantities of test cases.
  67 |     
  68 |     optimize:
  69 |         Showcases cost optimization techniques for large-scale refinement,
  70 |         comparing standard and cost-optimized approaches.
  71 |     
  72 |     all-tools:
  73 |         Demonstrates the capability to refine all available tools in a single run,
  74 |         with resource management and prioritization features.
  75 |     
  76 |     schema-focus:
  77 |         Focuses specifically on schema improvements, with detailed visualization
  78 |         of JSON schema patches and validation improvements.
  79 |     
  80 |     practical:
  81 |         Creates and refines intentionally flawed example tools to demonstrate
  82 |         the system's ability to identify and fix common documentation issues.
  83 |     
  84 |     model-comparison:
  85 |         Compares the performance of different LLM models for refinement tasks,
  86 |         with detailed metrics on success rates, cost, and processing time.
  87 | 
  88 | Dependencies:
  89 |     - ultimate: Core framework for interfacing with LLMs and tools
  90 |     - rich: For beautiful console output and visualizations
  91 |     - asyncio: For asynchronous processing of refinement operations
  92 |     - Required API keys for providers (OPENAI_API_KEY, ANTHROPIC_API_KEY, etc.)
  93 | 
  94 | Usage Examples:
  95 |     # Run all demos with standard visualization
  96 |     python docstring_refiner_demo.py
  97 |     
  98 |     # Run just the single tool refinement demo with a specific tool
  99 |     python docstring_refiner_demo.py --demo single --tool generate_completion
 100 |     
 101 |     # Run the model comparison demo with full visualization and save results
 102 |     python docstring_refiner_demo.py --demo model-comparison --visualize full --save-results
 103 |     
 104 |     # Run the multi-tool demo with a specific model and cost limit
 105 |     python docstring_refiner_demo.py --demo multi --model gpt-4.1-mini --cost-limit 2.5
 106 |     
 107 |     # Create and test flawed example tools
 108 |     python docstring_refiner_demo.py --demo practical --create-flawed
 109 | 
 110 | Return Values:
 111 |     The script returns exit code 0 on successful completion, or exit code 1 if
 112 |     critical errors occur during execution.
 113 | 
 114 | Methods:
 115 |     The script contains various helper functions and demo methods:
 116 | 
 117 |     setup_gateway_and_tools(): Initializes the Gateway and ensures required tools are available
 118 |     
 119 |     get_suitable_tools(): Finds appropriate tools for demonstrations based on complexity
 120 |     
 121 |     display_refinement_progress(): Callback for tracking refinement progress events
 122 |     
 123 |     create_text_diff(), create_side_by_side_diff(): Generate visual diffs of documentation changes
 124 |     
 125 |     display_refinement_result(): Formats and displays refinement results with appropriate detail level
 126 |     
 127 |     create_flawed_example_tools(): Creates example tools with intentional documentation flaws
 128 |     
 129 |     Demo functions (demo_*): Implement specific demonstration scenarios
 130 | 
 131 | Implementation Notes:
 132 |     - The script uses the global MCP instance from the Gateway for all tool operations
 133 |     - Refinement operations are tracked through a CostTracker instance for budget management
 134 |     - All demonstrations include graceful fallbacks for providers and models
 135 |     - Progress updates are displayed using Rich's Progress components
 136 |     - Results can be saved to files for later analysis or integration
 137 | 
 138 | Author:
 139 |     Ultimate MCP Server Team
 140 | 
 141 | Version:
 142 |     1.0.0
 143 | """
 144 | 
 145 | import argparse
 146 | import asyncio
 147 | import datetime
 148 | import difflib
 149 | import json
 150 | import random
 151 | import sys
 152 | import tempfile
 153 | import time
 154 | from pathlib import Path
 155 | from typing import Dict, List, Optional
 156 | 
 157 | # Add project root to path for imports when running as script
 158 | sys.path.insert(0, str(Path(__file__).parent.parent))
 159 | 
 160 | # Rich for beautiful console output
 161 | from rich import box
 162 | from rich.console import Console, Group
 163 | from rich.markup import escape
 164 | from rich.panel import Panel
 165 | from rich.progress import (
 166 |     BarColumn,
 167 |     Progress,
 168 |     SpinnerColumn,
 169 |     TaskProgressColumn,
 170 |     TextColumn,
 171 |     TimeElapsedColumn,
 172 |     TimeRemainingColumn,
 173 | )
 174 | from rich.rule import Rule
 175 | from rich.syntax import Syntax
 176 | from rich.table import Table
 177 | from rich.tree import Tree
 178 | 
 179 | # Project imports
 180 | from ultimate_mcp_server.constants import Provider
 181 | from ultimate_mcp_server.core.server import Gateway
 182 | from ultimate_mcp_server.tools.base import with_error_handling
 183 | from ultimate_mcp_server.tools.docstring_refiner import (
 184 |     RefinementProgressEvent,
 185 | )
 186 | from ultimate_mcp_server.utils import get_logger
 187 | from ultimate_mcp_server.utils.display import CostTracker
 188 | from ultimate_mcp_server.utils.logging.console import console
 189 | 
 190 | # Initialize logger
 191 | logger = get_logger("example.docstring_refiner")
 192 | 
 193 | # Create a separate console for detailed output
 194 | detail_console = Console(highlight=False)
 195 | 
 196 | # Global MCP instance (will be populated from Gateway)
 197 | mcp = None
 198 | 
 199 | # Global settings that can be modified by command line args
 200 | SETTINGS = {
 201 |     "output_dir": None,
 202 |     "visualization_level": "standard",  # "minimal", "standard", "full"
 203 |     "cost_limit": 5.0,  # USD
 204 |     "preferred_providers": [Provider.OPENAI.value, Provider.ANTHROPIC.value, Provider.GEMINI.value],
 205 |     "fallback_providers": [Provider.DEEPSEEK.value, Provider.GROK.value],
 206 |     "save_results": False,
 207 |     "verbose": False,
 208 | }
 209 | 
 210 | 
 211 | def parse_arguments():
 212 |     """Parse command line arguments for the demo."""
 213 |     parser = argparse.ArgumentParser(
 214 |         description="Advanced Docstring Refiner Demo for Ultimate MCP Server",
 215 |         formatter_class=argparse.RawDescriptionHelpFormatter,
 216 |         epilog="""Available demos:
 217 |   all                   - Run all demos (default)
 218 |   single                - Single tool refinement
 219 |   multi                 - Multi-tool refinement
 220 |   custom-testing        - Custom test generation strategies
 221 |   optimize              - Cost optimization techniques
 222 |   all-tools             - Refine all available tools
 223 |   schema-focus          - Focus on schema improvements
 224 |   practical             - Practical testing with flawed tools
 225 |   model-comparison      - Compare different LLM models for refinement
 226 | """
 227 |     )
 228 | 
 229 |     # Demo selection
 230 |     parser.add_argument(
 231 |         "--demo", 
 232 |         default="all",
 233 |         choices=[
 234 |             "all", "single", "multi", "custom-testing", "optimize", 
 235 |             "all-tools", "schema-focus", "practical", 
 236 |             "model-comparison"
 237 |         ],
 238 |         help="Specific demo to run (default: all)"
 239 |     )
 240 |     
 241 |     # Tool selection
 242 |     parser.add_argument(
 243 |         "--tool", 
 244 |         help="Specify a specific tool to refine (bypasses automatic selection)"
 245 |     )
 246 |     
 247 |     # Iteration control
 248 |     parser.add_argument(
 249 |         "--iterations", 
 250 |         type=int,
 251 |         default=None,
 252 |         help="Number of refinement iterations to run"
 253 |     )
 254 |     
 255 |     # Model specification
 256 |     parser.add_argument(
 257 |         "--model", 
 258 |         default=None,
 259 |         help="Specify a model to use for refinement (e.g., gpt-4.1-mini, claude-3-5-haiku)"
 260 |     )
 261 |     
 262 |     # Provider specification
 263 |     parser.add_argument(
 264 |         "--provider", 
 265 |         default=None,
 266 |         help=f"Specify a provider to use for refinement (e.g., {Provider.OPENAI.value}, {Provider.ANTHROPIC.value})"
 267 |     )
 268 |     
 269 |     # Visualization options
 270 |     parser.add_argument(
 271 |         "--visualize", 
 272 |         choices=["minimal", "standard", "full"],
 273 |         default="standard",
 274 |         help="Control visualization detail level"
 275 |     )
 276 |     
 277 |     # Cost limit
 278 |     parser.add_argument(
 279 |         "--cost-limit", 
 280 |         type=float,
 281 |         default=5.0,
 282 |         help="Maximum cost limit in USD"
 283 |     )
 284 |     
 285 |     # Output directory
 286 |     parser.add_argument(
 287 |         "--output-dir", 
 288 |         help="Directory to save results"
 289 |     )
 290 |     
 291 |     # Save results
 292 |     parser.add_argument(
 293 |         "--save-results", 
 294 |         action="store_true",
 295 |         help="Save refinement results to files"
 296 |     )
 297 |     
 298 |     # Verbosity
 299 |     parser.add_argument(
 300 |         "-v", "--verbose", 
 301 |         action="store_true",
 302 |         help="Increase output verbosity"
 303 |     )
 304 |     
 305 |     # Create flawed tools for testing
 306 |     parser.add_argument(
 307 |         "--create-flawed", 
 308 |         action="store_true",
 309 |         help="Create flawed example tools for practical testing"
 310 |     )
 311 | 
 312 |     args = parser.parse_args()
 313 |     
 314 |     # Update settings
 315 |     SETTINGS["visualization_level"] = args.visualize
 316 |     SETTINGS["cost_limit"] = args.cost_limit
 317 |     SETTINGS["save_results"] = args.save_results
 318 |     SETTINGS["verbose"] = args.verbose
 319 |     
 320 |     if args.output_dir:
 321 |         output_dir = Path(args.output_dir)
 322 |         output_dir.mkdir(parents=True, exist_ok=True)
 323 |         SETTINGS["output_dir"] = output_dir
 324 |     
 325 |     return args
 326 | 
 327 | 
 328 | async def setup_gateway_and_tools(create_flawed_tools=False):
 329 |     """Set up the gateway and ensure docstring refiner tool is available."""
 330 |     global mcp
 331 |     logger.debug("Initializing Gateway for docstring refiner demo...")
 332 |     logger.info("Initializing Gateway for docstring refiner demo...", emoji_key="start")
 333 |     
 334 |     # Create Gateway instance with all tools
 335 |     logger.debug("Creating Gateway instance with all tools")
 336 |     gateway = Gateway("docstring-refiner-demo", register_tools=True)  # Register all tools, not just minimal tools
 337 |     
 338 |     # Initialize providers (needed for the tool to function)
 339 |     try:
 340 |         logger.debug("Initializing providers...")
 341 |         await gateway._initialize_providers()
 342 |         logger.success("Successfully initialized providers", emoji_key="success")
 343 |         logger.debug("Successfully initialized providers")
 344 |     except Exception as e:
 345 |         logger.error(f"Error initializing providers: {e}", emoji_key="error", exc_info=True)
 346 |         logger.exception("Error initializing providers")
 347 |         console.print(Panel(
 348 |             f"Error initializing providers: {escape(str(e))}\n\n"
 349 |             "Check that your API keys are set correctly in environment variables:\n"
 350 |             "- OPENAI_API_KEY\n"
 351 |             "- ANTHROPIC_API_KEY\n"
 352 |             "- GEMINI_API_KEY\n",
 353 |             title="❌ Provider Initialization Failed",
 354 |             border_style="red",
 355 |             expand=False
 356 |         ))
 357 |         # Continue anyway, as some providers might still work
 358 |     
 359 |     # Store the MCP server instance
 360 |     mcp = gateway.mcp
 361 |     logger.debug("Stored MCP server instance")
 362 |     
 363 |     # Display available providers with available models
 364 |     logger.debug("Getting provider information")
 365 |     provider_tree = Tree("[bold cyan]Available Providers & Models[/bold cyan]")
 366 |     provider_info = []
 367 |     
 368 |     for provider_name, provider in gateway.providers.items():
 369 |         if provider:
 370 |             try:
 371 |                 models = await provider.list_models()
 372 |                 provider_branch = provider_tree.add(f"[yellow]{provider_name}[/yellow]")
 373 |                 
 374 |                 # Group models by category/capability
 375 |                 categorized_models = {}
 376 |                 for model in models:
 377 |                     model_id = model.get("id", "unknown")
 378 |                     if "4" in model_id:
 379 |                         category = "GPT-4 Family"
 380 |                     elif "3" in model_id:
 381 |                         category = "GPT-3 Family"
 382 |                     elif "claude" in model_id.lower():
 383 |                         category = "Claude Family"
 384 |                     elif "gemini" in model_id.lower():
 385 |                         category = "Gemini Family"
 386 |                     elif "deepseek" in model_id.lower():
 387 |                         category = "DeepSeek Family"
 388 |                     else:
 389 |                         category = "Other Models"
 390 |                     
 391 |                     if category not in categorized_models:
 392 |                         categorized_models[category] = []
 393 |                     categorized_models[category].append(model_id)
 394 |                 
 395 |                 # Add models to the tree by category
 396 |                 for category, model_list in categorized_models.items():
 397 |                     category_branch = provider_branch.add(f"[cyan]{category}[/cyan]")
 398 |                     for model_id in sorted(model_list):
 399 |                         category_branch.add(f"[green]{model_id}[/green]")
 400 |                 
 401 |                 # Get default model for provider info
 402 |                 default_model = provider.get_default_model()
 403 |                 provider_info.append(f"{provider_name} (default: {default_model})")
 404 |             except Exception as e:
 405 |                 logger.warning(f"Could not get models for {provider_name}: {e}", emoji_key="warning")
 406 |                 logger.warning(f"Could not get models for {provider_name}: {e}")
 407 |                 provider_info.append(f"{provider_name} (models unavailable)")
 408 |                 provider_branch = provider_tree.add(f"[yellow]{provider_name}[/yellow]")
 409 |                 provider_branch.add(f"[red]Error listing models: {escape(str(e))}[/red]")
 410 |     
 411 |     # Display provider info based on visualization level
 412 |     if SETTINGS["visualization_level"] == "full":
 413 |         console.print(Panel(provider_tree, border_style="dim cyan", padding=(1, 2)))
 414 |     else:
 415 |         console.print(Panel(
 416 |             f"Available providers: {', '.join(provider_info)}",
 417 |             title="Provider Configuration",
 418 |             border_style="cyan",
 419 |             expand=False
 420 |         ))
 421 |     
 422 |     # Verify the docstring_refiner tool is available
 423 |     logger.debug("Checking for available tools")
 424 |     tool_list = await mcp.list_tools()
 425 |     available_tools = [t.name for t in tool_list]
 426 |     logger.debug(f"Available tools before registration: {available_tools}")
 427 |     
 428 |     # Display all available tools
 429 |     tool_tree = Tree("[bold cyan]Available MCP Tools[/bold cyan]")
 430 |     
 431 |     # Group tools by namespace for better visualization
 432 |     tool_namespaces = {}
 433 |     for tool_name in available_tools:
 434 |         if ":" in tool_name:
 435 |             namespace, name = tool_name.split(":", 1)
 436 |             if namespace not in tool_namespaces:
 437 |                 tool_namespaces[namespace] = []
 438 |             tool_namespaces[namespace].append(name)
 439 |         else:
 440 |             if "root" not in tool_namespaces:
 441 |                 tool_namespaces["root"] = []
 442 |             tool_namespaces["root"].append(tool_name)
 443 |     
 444 |     # Add tools to tree with proper grouping
 445 |     for namespace, tools in tool_namespaces.items():
 446 |         if namespace == "root":
 447 |             for tool in sorted(tools):
 448 |                 tool_tree.add(f"[green]{tool}[/green]")
 449 |         else:
 450 |             ns_branch = tool_tree.add(f"[yellow]{namespace}[/yellow]")
 451 |             for tool in sorted(tools):
 452 |                 ns_branch.add(f"[green]{tool}[/green]")
 453 |     
 454 |     # Display tool info based on visualization level
 455 |     if SETTINGS["visualization_level"] in ["standard", "full"]:
 456 |         console.print(Panel(tool_tree, border_style="dim cyan", padding=(1, 2)))
 457 |     else:
 458 |         console.print(f"[cyan]Tools available:[/cyan] {len(available_tools)}")
 459 |     
 460 |     # Check if refine_tool_documentation is available
 461 |     if "refine_tool_documentation" in available_tools:
 462 |         logger.success("refine_tool_documentation tool available.", emoji_key="success")
 463 |     else:
 464 |         logger.warning("refine_tool_documentation tool not found in available tools list.", emoji_key="warning")
 465 |         console.print(Panel(
 466 |             "The refine_tool_documentation tool is not registered automatically.\n"
 467 |             "This demo will attempt to register it manually as a fallback.",
 468 |             title="⚠️ Tool Availability Notice", 
 469 |             border_style="yellow"
 470 |         ))
 471 |         
 472 |         # Manually register the refine_tool_documentation tool as a fallback
 473 |         # Note: This should no longer be necessary since the tool is now included in STANDALONE_TOOL_FUNCTIONS
 474 |         # in ultimate/tools/__init__.py, but we keep it as a fallback in case of issues
 475 |         try:
 476 |             print("Attempting to manually register refine_tool_documentation tool as fallback...")
 477 |             from ultimate_mcp_server.tools.docstring_refiner import refine_tool_documentation
 478 |             print("Imported refine_tool_documentation successfully")
 479 | 
 480 |             # Create a simplified wrapper to avoid Pydantic validation issues
 481 |             @with_error_handling
 482 |             async def docstring_refiner_wrapper(
 483 |                 tool_names=None,
 484 |                 refine_all_available=False,
 485 |                 max_iterations=1,
 486 |                 ctx=None
 487 |             ):
 488 |                 """
 489 |                 Refine the documentation of MCP tools.
 490 |                 
 491 |                 Args:
 492 |                     tool_names: List of tools to refine, or None to use refine_all_available
 493 |                     refine_all_available: Whether to refine all available tools
 494 |                     max_iterations: Maximum number of refinement iterations
 495 |                     ctx: MCP context
 496 |                 
 497 |                 Returns:
 498 |                     Refinement results
 499 |                 """
 500 |                 print(f"Wrapper called with tool_names={tool_names}, refine_all_available={refine_all_available}")
 501 |                 # Simply pass through to the actual implementation
 502 |                 return await refine_tool_documentation(
 503 |                     tool_names=tool_names,
 504 |                     refine_all_available=refine_all_available,
 505 |                     max_iterations=max_iterations,
 506 |                     ctx=ctx
 507 |                 )
 508 |             
 509 |             # Register our simplified wrapper instead
 510 |             mcp.tool(name="refine_tool_documentation")(docstring_refiner_wrapper)
 511 |             print("Registered fallback wrapper tool successfully")
 512 |             logger.success("Successfully registered fallback wrapper for refine_tool_documentation tool", emoji_key="success")
 513 |         except Exception as e:
 514 |             logger.error(f"Failed to register fallback refine_tool_documentation tool: {e}", emoji_key="error", exc_info=True)
 515 |             print(f"Error registering fallback tool: {type(e).__name__}: {str(e)}")
 516 |             import traceback
 517 |             print("Stack trace:")
 518 |             traceback.print_exc()
 519 |             console.print(Panel(
 520 |                 f"Error registering the fallback refine_tool_documentation tool: {escape(str(e))}\n\n"
 521 |                 "This demo requires the docstring_refiner tool to be properly registered.",
 522 |                 title="❌ Registration Failed",
 523 |                 border_style="red",
 524 |                 expand=False
 525 |             ))
 526 |             console.print(Panel(
 527 |                 "This demo requires the docstring_refiner tool to be properly registered.\n"
 528 |                 "Check that you have the correct version of the Ultimate MCP Server and dependencies installed.",
 529 |                 title="⚠️ Demo Requirements Not Met",
 530 |                 border_style="red",
 531 |                 expand=False
 532 |             ))
 533 |             return gateway
 534 |     
 535 |     # Create flawed example tools if requested
 536 |     if create_flawed_tools:
 537 |         created_tools = await create_flawed_example_tools(mcp)
 538 |         if created_tools:
 539 |             console.print(Panel(
 540 |                 f"Created {len(created_tools)} flawed example tools for testing:\n" +
 541 |                 "\n".join([f"- [cyan]{name}[/cyan]" for name in created_tools]),
 542 |                 title="🛠️ Flawed Tools Created",
 543 |                 border_style="yellow",
 544 |                 expand=False
 545 |             ))
 546 |     
 547 |     return gateway
 548 | 
 549 | 
 550 | async def create_flawed_example_tools(mcp_instance):
 551 |     """Create flawed example tools for demonstration purposes."""
 552 |     created_tools = []
 553 |     
 554 |     try:
 555 |         # Create a temporary directory to store any needed files
 556 |         temp_dir = tempfile.mkdtemp(prefix="docstring_refiner_flawed_tools_")
 557 |         logger.info(f"Created temporary directory for flawed tools: {temp_dir}", emoji_key="setup")
 558 |         
 559 |         # Define several flawed tools with various issues
 560 |         
 561 |         # Tool 1: Ambiguous Description
 562 |         @mcp_instance.tool()
 563 |         async def flawed_process_text(text: str, mode: str = "simple", include_metadata: bool = False):
 564 |             """Process the given text.
 565 |             
 566 |             This tool does processing on text.
 567 |             
 568 |             Args:
 569 |                 text: Text to process
 570 |                 mode: Processing mode (simple, advanced, expert)
 571 |                 include_metadata: Whether to include metadata in result
 572 |             """
 573 |             # Actual implementation doesn't matter for the demo
 574 |             result = {"processed": text[::-1]}  # Just reverse the text
 575 |             if include_metadata:
 576 |                 result["metadata"] = {"length": len(text), "mode": mode}
 577 |             return result
 578 |         
 579 |         created_tools.append("flawed_process_text")
 580 |         
 581 |         # Tool 2: Missing Parameter Descriptions
 582 |         @mcp_instance.tool()
 583 |         async def flawed_scrape_website(url, depth=1, extract_links=True, timeout=30.0):
 584 |             """Website scraper tool.
 585 |             
 586 |             Extracts content from websites.
 587 |             """
 588 |             # Simulate scraping
 589 |             return {
 590 |                 "title": f"Page at {url}",
 591 |                 "content": f"Scraped content with depth {depth}",
 592 |                 "links": ["https://example.com/1", "https://example.com/2"] if extract_links else []
 593 |             }
 594 |         
 595 |         created_tools.append("flawed_scrape_website")
 596 |         
 597 |         # Tool 3: Confusing Schema & Inconsistent Description
 598 |         @mcp_instance.tool()
 599 |         async def flawed_data_processor(config, inputs, format="json"):
 600 |             """Processes data.
 601 |             
 602 |             The analyzer takes configuration and processes input data.
 603 |             The system allows different engine versions and parameters.
 604 |             """
 605 |             # Just return dummy data
 606 |             return {
 607 |                 "outputs": [f"Processed: {i}" for i in inputs],
 608 |                 "engine_used": config.get("engine", "v1"),
 609 |                 "format": format
 610 |             }
 611 |         
 612 |         created_tools.append("flawed_data_processor")
 613 |         
 614 |         # Tool 4: Misleading Examples in Description but no schema examples
 615 |         @mcp_instance.tool()
 616 |         async def flawed_product_search(query, filters=None, sort="rating", page=1, per_page=20):
 617 |             """Search for products in the database.
 618 |             
 619 |             Example usage:
 620 |             ```
 621 |             search_products("laptop", {"category": "electronics", "min_price": 500}, sort_by="newest")
 622 |             ```
 623 |             
 624 |             The search function allows querying for items along with filtering and sorting options.
 625 |             """
 626 |             # Return dummy results
 627 |             return {
 628 |                 "results": [{"id": i, "name": f"{query} product {i}", "price": random.randint(10, 1000)} for i in range(1, 6)],
 629 |                 "total": 243,
 630 |                 "page": page,
 631 |                 "per_page": per_page
 632 |             }
 633 |         
 634 |         created_tools.append("flawed_product_search")
 635 |         
 636 |         # Tool 5: Schema with type issues (number vs integer conflicts)
 637 |         @mcp_instance.tool()
 638 |         async def flawed_calculator(values, operation, precision=2, scale_factor=1.0):
 639 |             """Statistical calculator.
 640 |             
 641 |             Calculate statistics on a set of values. The operation determines which
 642 |             statistic to calculate. Valid operations are:
 643 |             
 644 |             - sum: Calculate the sum of all values
 645 |             - average: Calculate the mean of the values
 646 |             - max: Find the maximum value
 647 |             - min: Find the minimum value
 648 |             
 649 |             The precision parameter must be an integer between 0 and 10.
 650 |             
 651 |             After calculation, the result is multiplied by the scale_factor.
 652 |             """
 653 |             # Perform the calculation
 654 |             if operation == "sum":
 655 |                 result = sum(values)
 656 |             elif operation == "average":
 657 |                 result = sum(values) / len(values) if values else 0
 658 |             elif operation == "max":
 659 |                 result = max(values) if values else None
 660 |             elif operation == "min":
 661 |                 result = min(values) if values else None
 662 |             else:
 663 |                 result = None
 664 |                 
 665 |             # Apply scale and precision
 666 |             if result is not None:
 667 |                 result = round(result * scale_factor, precision)
 668 |                 
 669 |             return {"result": result}
 670 |         
 671 |         created_tools.append("flawed_calculator")
 672 |         
 673 |         logger.success(f"Successfully created {len(created_tools)} flawed example tools", emoji_key="success")
 674 |         return created_tools
 675 |         
 676 |     except Exception as e:
 677 |         logger.error(f"Error creating flawed example tools: {e}", emoji_key="error", exc_info=True)
 678 |         console.print(f"[bold red]Error creating flawed example tools:[/bold red] {escape(str(e))}")
 679 |         return []
 680 | 
 681 | 
 682 | async def display_refinement_progress(event: RefinementProgressEvent):
 683 |     """Handle progress events from the refinement process."""
 684 |     # Create a formatted message based on the event type
 685 |     if event.stage == "starting_iteration":
 686 |         message = f"[bold cyan]Starting iteration {event.iteration}/{event.total_iterations} for {event.tool_name}[/bold cyan]"
 687 |     elif event.stage == "agent_simulation":
 688 |         message = f"[blue]Simulating agent usage for {event.tool_name}...[/blue]"
 689 |     elif event.stage == "test_generation":
 690 |         message = f"[blue]Generating test cases for {event.tool_name}...[/blue]"
 691 |     elif event.stage == "test_execution_start":
 692 |         message = f"[blue]Executing tests for {event.tool_name}...[/blue]"
 693 |     elif event.stage == "test_execution_progress":
 694 |         message = f"[blue]Test execution progress: {event.progress_pct:.1f}%[/blue]"
 695 |     elif event.stage == "test_execution_end":
 696 |         success_rate = event.details.get("success_rate") if event.details else None
 697 |         if success_rate is not None:
 698 |             message = f"[green]Tests completed for {event.tool_name} - Success rate: {success_rate:.1%}[/green]"
 699 |         else:
 700 |             message = f"[green]Tests completed for {event.tool_name}[/green]"
 701 |     elif event.stage == "analysis_start":
 702 |         message = f"[blue]Analyzing results for {event.tool_name}...[/blue]"
 703 |     elif event.stage == "analysis_end":
 704 |         message = f"[green]Analysis completed for {event.tool_name}[/green]"
 705 |     elif event.stage == "schema_patching":
 706 |         message = f"[blue]Applying schema patches for {event.tool_name}...[/blue]"
 707 |     elif event.stage == "winnowing":
 708 |         message = f"[blue]Optimizing documentation for {event.tool_name}...[/blue]"
 709 |     elif event.stage == "iteration_complete":
 710 |         message = f"[bold green]Iteration {event.iteration} complete for {event.tool_name}[/bold green]"
 711 |     elif event.stage == "tool_complete":
 712 |         message = f"[bold magenta]Refinement complete for {event.tool_name}[/bold magenta]"
 713 |     elif event.stage == "error":
 714 |         message = f"[bold red]Error during refinement for {event.tool_name}: {event.message}[/bold red]"
 715 |     else:
 716 |         message = f"[dim]{event.message}[/dim]"
 717 |     
 718 |     # Print the message
 719 |     detail_console.print(message)
 720 |     
 721 |     # Print additional details if in verbose mode
 722 |     if SETTINGS["verbose"] and event.details:
 723 |         try:
 724 |             detail_console.print(f"[dim cyan]Details: {json.dumps(event.details, default=str)}[/dim cyan]")
 725 |         except Exception:
 726 |             detail_console.print(f"[dim cyan]Details: {event.details}[/dim cyan]")
 727 |     
 728 |     # Return True to confirm the callback was processed
 729 |     return True
 730 | 
 731 | 
 732 | def create_text_diff(original: str, improved: str) -> Panel:
 733 |     """Create a colorized diff between original and improved text."""
 734 |     diff = difflib.unified_diff(
 735 |         original.splitlines(),
 736 |         improved.splitlines(),
 737 |         lineterm='',
 738 |         n=3  # Context lines
 739 |     )
 740 |     
 741 |     # Convert diff to rich text with colors
 742 |     rich_diff = []
 743 |     for line in diff:
 744 |         if line.startswith('+'):
 745 |             rich_diff.append(f"[green]{escape(line)}[/green]")
 746 |         elif line.startswith('-'):
 747 |             rich_diff.append(f"[red]{escape(line)}[/red]")
 748 |         elif line.startswith('@@'):
 749 |             rich_diff.append(f"[cyan]{escape(line)}[/cyan]")
 750 |         else:
 751 |             rich_diff.append(escape(line))
 752 |     
 753 |     # Return as panel
 754 |     if rich_diff:
 755 |         diff_panel = Panel(
 756 |             "\n".join(rich_diff),
 757 |             title="Documentation Changes (Diff)",
 758 |             border_style="yellow",
 759 |             expand=False
 760 |         )
 761 |         return diff_panel
 762 |     else:
 763 |         return Panel(
 764 |             "[dim italic]No differences found[/dim italic]",
 765 |             title="Documentation Changes (Diff)",
 766 |             border_style="dim",
 767 |             expand=False
 768 |         )
 769 | 
 770 | 
 771 | def create_side_by_side_diff(original: str, improved: str, title: str = "Documentation Comparison") -> Panel:
 772 |     """Create a side-by-side comparison of original and improved text."""
 773 |     # Wrap in panels with highlighting
 774 |     original_panel = Panel(
 775 |         escape(original),
 776 |         title="Original",
 777 |         border_style="dim red",
 778 |         expand=True
 779 |     )
 780 |     
 781 |     improved_panel = Panel(
 782 |         escape(improved),
 783 |         title="Improved",
 784 |         border_style="green",
 785 |         expand=True
 786 |     )
 787 |     
 788 |     # Create side-by-side group
 789 |     comparison = Group(
 790 |         Rule("Before / After"),
 791 |         Group(
 792 |             original_panel,
 793 |             improved_panel
 794 |         )
 795 |     )
 796 |     
 797 |     return Panel(
 798 |         comparison,
 799 |         title=title,
 800 |         border_style="cyan",
 801 |         expand=False
 802 |     )
 803 | 
 804 | 
 805 | def display_refinement_result(
 806 |     result: Dict, 
 807 |     console: Console = console, 
 808 |     visualization_level: str = "standard",
 809 |     save_to_file: bool = False,
 810 |     output_dir: Optional[Path] = None
 811 | ):
 812 |     """Display the results of the docstring refinement process."""
 813 |     console.print(Rule("[bold green]Refinement Results[/bold green]", style="green"))
 814 |     
 815 |     # Summary statistics
 816 |     stats_table = Table(title="[bold]Summary Statistics[/bold]", box=box.ROUNDED, show_header=False, expand=False)
 817 |     stats_table.add_column("Metric", style="cyan", no_wrap=True)
 818 |     stats_table.add_column("Value", style="white")
 819 |     stats_table.add_row("Total Tools Refined", str(len(result.get("refined_tools", []))))
 820 |     stats_table.add_row("Total Iterations", str(result.get("total_iterations_run", 0)))
 821 |     stats_table.add_row("Total Tests Executed", str(result.get("total_test_calls_attempted", 0)))
 822 |     stats_table.add_row("Total Test Failures", str(result.get("total_test_calls_failed", 0)))
 823 |     stats_table.add_row("Total Validation Failures", str(result.get("total_schema_validation_failures", 0)))
 824 |     stats_table.add_row("Total Processing Time", f"{result.get('total_processing_time', 0.0):.2f}s")
 825 |     stats_table.add_row("Total Cost", f"${result.get('total_refinement_cost', 0.0):.6f}")
 826 |     console.print(stats_table)
 827 |     
 828 |     # Save results to file if requested
 829 |     if save_to_file and output_dir:
 830 |         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
 831 |         result_file = output_dir / f"refinement_results_{timestamp}.json"
 832 |         try:
 833 |             with open(result_file, 'w') as f:
 834 |                 json.dump(result, f, indent=2, default=str)
 835 |             console.print(f"[green]Results saved to:[/green] {result_file}")
 836 |         except Exception as e:
 837 |             console.print(f"[red]Error saving results to file:[/red] {e}")
 838 |     
 839 |     # Tools refined
 840 |     refined_tools = result.get("refined_tools", [])
 841 |     if refined_tools:
 842 |         console.print("\n[bold]Tools Refined:[/bold]")
 843 |         
 844 |         # Results tallying
 845 |         total_description_improvements = 0
 846 |         total_schema_improvements = 0
 847 |         total_example_improvements = 0
 848 |         flaw_categories_observed = {}
 849 |         
 850 |         for i, tool in enumerate(refined_tools):
 851 |             tool_name = tool.get("tool_name", "Unknown tool")
 852 |             initial_success_rate = tool.get("initial_success_rate", 0.0)
 853 |             final_success_rate = tool.get("final_success_rate", 0.0)
 854 |             improvement_factor = tool.get("improvement_factor", 0.0)
 855 |             
 856 |             # Decide on panel color based on improvement
 857 |             if improvement_factor > 0.5:
 858 |                 border_style = "green"
 859 |             elif improvement_factor > 0:
 860 |                 border_style = "blue"
 861 |             else:
 862 |                 border_style = "yellow"
 863 |                 
 864 |             # Create a panel for each tool
 865 |             success_change = (final_success_rate - initial_success_rate) * 100
 866 |             success_change_str = (
 867 |                 f"[green]+{success_change:.1f}%[/green]" if success_change > 0 else
 868 |                 f"[red]{success_change:.1f}%[/red]" if success_change < 0 else
 869 |                 "[yellow]No change[/yellow]"
 870 |             )
 871 |             
 872 |             tool_panel_content = [
 873 |                 f"Initial Success Rate: [yellow]{initial_success_rate:.1%}[/yellow]",
 874 |                 f"Final Success Rate: [green]{final_success_rate:.1%}[/green]",
 875 |                 f"Change: {success_change_str}",
 876 |                 f"Improvement Factor: [cyan]{improvement_factor:.2f}x[/cyan]"
 877 |             ]
 878 |             
 879 |             console.print(Panel(
 880 |                 Group(*tool_panel_content),
 881 |                 title=f"[bold]{i+1}. {tool_name}[/bold]",
 882 |                 border_style=border_style,
 883 |                 expand=False
 884 |             ))
 885 |             
 886 |             # Display the final proposed changes
 887 |             final_changes = tool.get("final_proposed_changes", {})
 888 |             iterations = tool.get("iterations", [])
 889 |             
 890 |             if final_changes:
 891 |                 # Check if description was improved
 892 |                 original_desc = None
 893 |                 for iter_data in iterations:
 894 |                     if iter_data.get("iteration") == 1:
 895 |                         # Get the original description from the first iteration
 896 |                         original_desc = iter_data.get("documentation_used", {}).get("description", "")
 897 |                         break
 898 |                 
 899 |                 final_desc = final_changes.get("description", "")
 900 |                 
 901 |                 # Count this as an improvement if descriptions differ
 902 |                 if original_desc and final_desc and original_desc != final_desc:
 903 |                     total_description_improvements += 1
 904 |                     
 905 |                     # Display description changes based on visualization level
 906 |                     if visualization_level in ["standard", "full"]:
 907 |                         console.print("[bold cyan]Description Changes:[/bold cyan]")
 908 |                         
 909 |                         if visualization_level == "full":
 910 |                             # Show diff view for detailed visualization
 911 |                             console.print(create_text_diff(original_desc, final_desc))
 912 |                             
 913 |                         # Show side-by-side comparison
 914 |                         console.print(create_side_by_side_diff(
 915 |                             original_desc, 
 916 |                             final_desc, 
 917 |                             title="Description Comparison"
 918 |                         ))
 919 |                 
 920 |                 # Display schema patches if any
 921 |                 schema_patches = tool.get("final_proposed_schema_patches", [])
 922 |                 if schema_patches:
 923 |                     total_schema_improvements += 1
 924 |                     
 925 |                     if visualization_level in ["standard", "full"]:
 926 |                         console.print("[bold cyan]Schema Patches Applied:[/bold cyan]")
 927 |                         console.print(Panel(
 928 |                             Syntax(json.dumps(schema_patches, indent=2), "json", theme="default", line_numbers=False),
 929 |                             title="JSON Patch Operations",
 930 |                             border_style="magenta",
 931 |                             expand=False
 932 |                         ))
 933 |                 
 934 |                 # Display examples
 935 |                 examples = final_changes.get("examples", [])
 936 |                 if examples:
 937 |                     total_example_improvements += len(examples)
 938 |                     
 939 |                     if visualization_level in ["standard", "full"]:
 940 |                         console.print("[bold cyan]Generated Examples:[/bold cyan]")
 941 |                         examples_to_show = examples if visualization_level == "full" else examples[:3]
 942 |                         
 943 |                         for j, example in enumerate(examples_to_show):
 944 |                             args = example.get("args", {})
 945 |                             comment = example.get("comment", "No description")
 946 |                             addresses_failure = example.get("addresses_failure_pattern", "")
 947 |                             
 948 |                             # Add failure pattern as subtitle if present
 949 |                             subtitle = f"Addresses: {addresses_failure}" if addresses_failure else None
 950 |                             
 951 |                             console.print(Panel(
 952 |                                 Syntax(json.dumps(args, indent=2), "json", theme="default", line_numbers=False),
 953 |                                 title=f"Example {j+1}: {comment}",
 954 |                                 subtitle=subtitle,
 955 |                                 border_style="dim green",
 956 |                                 expand=False
 957 |                             ))
 958 |                         
 959 |                         if len(examples) > 3 and visualization_level == "standard":
 960 |                             console.print(f"[dim]...and {len(examples) - 3} more examples[/dim]")
 961 |             
 962 |             # Collect flaw categories if available
 963 |             for iter_data in iterations:
 964 |                 analysis = iter_data.get("analysis", {})
 965 |                 if analysis:
 966 |                     flaws = analysis.get("identified_flaw_categories", [])
 967 |                     for flaw in flaws:
 968 |                         if flaw not in flaw_categories_observed:
 969 |                             flaw_categories_observed[flaw] = 0
 970 |                         flaw_categories_observed[flaw] += 1
 971 |             
 972 |             console.print()  # Add spacing between tools
 973 |         
 974 |         # Display improvement summary
 975 |         console.print(Rule("[bold blue]Improvement Summary[/bold blue]", style="blue"))
 976 |         
 977 |         improvement_table = Table(box=box.SIMPLE, show_header=True, header_style="bold cyan")
 978 |         improvement_table.add_column("Improvement Type", style="blue")
 979 |         improvement_table.add_column("Count", style="cyan")
 980 |         improvement_table.add_column("Details", style="white")
 981 |         
 982 |         improvement_table.add_row(
 983 |             "Description Improvements", 
 984 |             str(total_description_improvements),
 985 |             f"{total_description_improvements} of {len(refined_tools)} tools ({total_description_improvements/len(refined_tools)*100:.0f}%)"
 986 |         )
 987 |         improvement_table.add_row(
 988 |             "Schema Improvements", 
 989 |             str(total_schema_improvements),
 990 |             f"{total_schema_improvements} of {len(refined_tools)} tools ({total_schema_improvements/len(refined_tools)*100:.0f}%)"
 991 |         )
 992 |         improvement_table.add_row(
 993 |             "Example Additions", 
 994 |             str(total_example_improvements),
 995 |             f"Average {total_example_improvements/len(refined_tools):.1f} examples per tool"
 996 |         )
 997 |         
 998 |         console.print(improvement_table)
 999 |         
1000 |         # Display flaw categories if any were observed
1001 |         if flaw_categories_observed and visualization_level in ["standard", "full"]:
1002 |             console.print("\n[bold cyan]Documentation Flaws Identified:[/bold cyan]")
1003 |             
1004 |             flaws_table = Table(box=box.SIMPLE, show_header=True, header_style="bold magenta")
1005 |             flaws_table.add_column("Flaw Category", style="magenta")
1006 |             flaws_table.add_column("Occurrences", style="cyan")
1007 |             flaws_table.add_column("Description", style="white")
1008 |             
1009 |             # Map flaw categories to descriptions
1010 |             flaw_descriptions = {
1011 |                 "MISSING_DESCRIPTION": "Documentation is missing key information",
1012 |                 "AMBIGUOUS_DESCRIPTION": "Description is unclear or can be interpreted in multiple ways",
1013 |                 "INCORRECT_DESCRIPTION": "Description contains incorrect information",
1014 |                 "MISSING_SCHEMA_CONSTRAINT": "Schema is missing important constraints",
1015 |                 "INCORRECT_SCHEMA_CONSTRAINT": "Schema contains incorrect constraints",
1016 |                 "OVERLY_RESTRICTIVE_SCHEMA": "Schema is unnecessarily restrictive",
1017 |                 "TYPE_CONFUSION": "Parameter types are inconsistent or unclear",
1018 |                 "MISSING_EXAMPLE": "Documentation lacks necessary examples",
1019 |                 "MISLEADING_EXAMPLE": "Examples provided are incorrect or misleading",
1020 |                 "INCOMPLETE_EXAMPLE": "Examples are present but insufficient",
1021 |                 "PARAMETER_DEPENDENCY_UNCLEAR": "Dependencies between parameters are not explained",
1022 |                 "CONFLICTING_CONSTRAINTS": "Schema contains contradictory constraints",
1023 |                 "AGENT_FORMULATION_ERROR": "Documentation hinders LLM agent's ability to use the tool",
1024 |                 "SCHEMA_PREVALIDATION_FAILURE": "Schema validation issues",
1025 |                 "TOOL_EXECUTION_ERROR": "Issues with tool execution",
1026 |                 "UNKNOWN": "Unspecified documentation issue"
1027 |             }
1028 |             
1029 |             # Sort flaws by occurrence count
1030 |             sorted_flaws = sorted(flaw_categories_observed.items(), key=lambda x: x[1], reverse=True)
1031 |             
1032 |             for flaw, count in sorted_flaws:
1033 |                 flaws_table.add_row(
1034 |                     flaw, 
1035 |                     str(count),
1036 |                     flaw_descriptions.get(flaw, "No description available")
1037 |                 )
1038 |             
1039 |             console.print(flaws_table)
1040 |     
1041 |     # Error reporting
1042 |     errors = result.get("errors_during_refinement_process", [])
1043 |     if errors:
1044 |         console.print("[bold red]Errors During Refinement:[/bold red]")
1045 |         for error in errors:
1046 |             console.print(f"- [red]{escape(error)}[/red]")
1047 | 
1048 | 
1049 | async def get_suitable_tools(
1050 |     mcp_instance,
1051 |     count: int = 1, 
1052 |     complexity: str = "medium", 
1053 |     exclude_tools: Optional[List[str]] = None
1054 | ) -> List[str]:
1055 |     """
1056 |     Find suitable tools for refinement based on complexity.
1057 |     
1058 |     Args:
1059 |         mcp_instance: The MCP server instance
1060 |         count: Number of tools to return
1061 |         complexity: Desired complexity level ("simple", "medium", "complex")
1062 |         exclude_tools: List of tool names to exclude
1063 |         
1064 |     Returns:
1065 |         List of suitable tool names
1066 |     """
1067 |     exclude_tools = exclude_tools or []
1068 |     
1069 |     # Get all available tools
1070 |     tool_list = await mcp_instance.list_tools()
1071 |     
1072 |     # Filter out excluded tools and refine_tool_documentation itself
1073 |     available_tools = [
1074 |         t.name for t in tool_list 
1075 |         if t.name not in exclude_tools and t.name != "refine_tool_documentation"
1076 |     ]
1077 |     
1078 |     if not available_tools:
1079 |         return []
1080 |     
1081 |     # Define complexity criteria based on schema properties
1082 |     if complexity == "simple":
1083 |         # Simple tools have few required parameters and a flat schema
1084 |         preferred_tools = []
1085 |         for tool_name in available_tools:
1086 |             try:
1087 |                 tool_def = next((t for t in tool_list if t.name == tool_name), None)
1088 |                 if not tool_def:
1089 |                     continue
1090 |                     
1091 |                 input_schema = getattr(tool_def, "inputSchema", {})
1092 |                 if not input_schema:
1093 |                     continue
1094 |                     
1095 |                 properties = input_schema.get("properties", {})
1096 |                 required = input_schema.get("required", [])
1097 |                 
1098 |                 # Simple tools have few properties and required fields
1099 |                 if len(properties) <= 3 and len(required) <= 1:
1100 |                     # Check for nested objects which would increase complexity
1101 |                     has_nested = any(
1102 |                         isinstance(prop, dict) and prop.get("type") == "object"
1103 |                         for prop in properties.values()
1104 |                     )
1105 |                     
1106 |                     if not has_nested:
1107 |                         preferred_tools.append(tool_name)
1108 |             except Exception:
1109 |                 continue
1110 |     
1111 |     elif complexity == "complex":
1112 |         # Complex tools have deep nested structures and many required parameters
1113 |         preferred_tools = []
1114 |         for tool_name in available_tools:
1115 |             try:
1116 |                 tool_def = next((t for t in tool_list if t.name == tool_name), None)
1117 |                 if not tool_def:
1118 |                     continue
1119 |                     
1120 |                 input_schema = getattr(tool_def, "inputSchema", {})
1121 |                 if not input_schema:
1122 |                     continue
1123 |                     
1124 |                 properties = input_schema.get("properties", {})
1125 |                 required = input_schema.get("required", [])
1126 |                 
1127 |                 # Complex tools have many properties or required fields
1128 |                 if len(properties) >= 5 or len(required) >= 3:
1129 |                     # Check for nested objects which would increase complexity
1130 |                     has_nested = any(
1131 |                         isinstance(prop, dict) and prop.get("type") == "object"
1132 |                         for prop in properties.values()
1133 |                     )
1134 |                     
1135 |                     if has_nested:
1136 |                         preferred_tools.append(tool_name)
1137 |             except Exception:
1138 |                 continue
1139 |     
1140 |     else:  # medium complexity (default)
1141 |         # Medium tools are somewhere in between
1142 |         preferred_tools = []
1143 |         for tool_name in available_tools:
1144 |             try:
1145 |                 tool_def = next((t for t in tool_list if t.name == tool_name), None)
1146 |                 if not tool_def:
1147 |                     continue
1148 |                     
1149 |                 input_schema = getattr(tool_def, "inputSchema", {})
1150 |                 if not input_schema:
1151 |                     continue
1152 |                     
1153 |                 properties = input_schema.get("properties", {})
1154 |                 
1155 |                 # Medium tools have a moderate number of properties
1156 |                 if 3 <= len(properties) <= 6:
1157 |                     preferred_tools.append(tool_name)
1158 |             except Exception:
1159 |                 continue
1160 |     
1161 |     # If we couldn't find tools matching the complexity criteria, fall back to any available tool
1162 |     if not preferred_tools:
1163 |         preferred_tools = available_tools
1164 |     
1165 |     # Prioritize tools without namespaces (i.e., not "namespace:tool_name")
1166 |     prioritized_tools = [t for t in preferred_tools if ":" not in t]
1167 |     
1168 |     # If we still need more tools and have prioritized all we could, add namespace tools
1169 |     if len(prioritized_tools) < count:
1170 |         namespace_tools = [t for t in preferred_tools if ":" in t]
1171 |         prioritized_tools.extend(namespace_tools)
1172 |     
1173 |     # Return the requested number of tools (or fewer if not enough are available)
1174 |     return prioritized_tools[:min(count, len(prioritized_tools))]
1175 | 
1176 | 
1177 | async def demo_single_tool_refinement(
1178 |     gateway: Gateway, 
1179 |     tracker: CostTracker,
1180 |     target_tool: Optional[str] = None,
1181 |     refinement_provider: Optional[str] = None,
1182 |     refinement_model: Optional[str] = None,
1183 |     max_iterations: Optional[int] = None
1184 | ):
1185 |     """Demonstrate refining documentation for a single tool."""
1186 |     console.print(Rule("[bold cyan]Single Tool Refinement[/bold cyan]", style="cyan"))
1187 |     
1188 |     # Use specified tool or find a suitable one
1189 |     selected_tool = None
1190 |     if target_tool:
1191 |         # Check if specified tool exists
1192 |         tool_list = await gateway.mcp.list_tools()
1193 |         available_tools = [t.name for t in tool_list]
1194 |         
1195 |         if target_tool in available_tools:
1196 |             selected_tool = target_tool
1197 |         else:
1198 |             logger.warning(f"Specified tool '{target_tool}' not found", emoji_key="warning")
1199 |             console.print(f"[yellow]Warning:[/yellow] Specified tool '{target_tool}' not found. Selecting automatically.")
1200 |     
1201 |     # Auto-select if needed
1202 |     if not selected_tool:
1203 |         suitable_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="medium")
1204 |         
1205 |         if suitable_tools:
1206 |             selected_tool = suitable_tools[0]
1207 |         else:
1208 |             logger.error("No suitable tools found for refinement demo", emoji_key="error")
1209 |             console.print("[bold red]Error:[/bold red] No suitable tools found for refinement demo.")
1210 |             return
1211 |     
1212 |     console.print(f"Selected tool for refinement: [cyan]{selected_tool}[/cyan]")
1213 |     
1214 |     # Determine provider and model
1215 |     provider = refinement_provider or Provider.OPENAI.value
1216 |     
1217 |     # Find best available model if not specified
1218 |     if not refinement_model:
1219 |         try:
1220 |             if provider == Provider.OPENAI.value:
1221 |                 model = "gpt-4.1"  # Prefer this for best results
1222 |                 # Check if model is available
1223 |                 provider_instance = gateway.providers.get(provider)
1224 |                 if provider_instance:
1225 |                     models = await provider_instance.list_models()
1226 |                     model_ids = [m.get("id") for m in models]
1227 |                     if model not in model_ids:
1228 |                         model = "gpt-4.1-mini"  # Fall back to mini
1229 |             elif provider == Provider.ANTHROPIC.value:
1230 |                 model = "claude-3-5-sonnet"
1231 |             else:
1232 |                 # Use default model for other providers
1233 |                 provider_instance = gateway.providers.get(provider)
1234 |                 if provider_instance:
1235 |                     model = provider_instance.get_default_model()
1236 |                 else:
1237 |                     model = None
1238 |         except Exception as e:
1239 |             logger.warning(f"Error determining model for {provider}: {e}", emoji_key="warning")
1240 |             model = None
1241 |             
1242 |         # If we still don't have a model, try a different provider
1243 |         if not model:
1244 |             for fallback_provider in SETTINGS["fallback_providers"]:
1245 |                 try:
1246 |                     provider_instance = gateway.providers.get(fallback_provider)
1247 |                     if provider_instance:
1248 |                         model = provider_instance.get_default_model()
1249 |                         provider = fallback_provider
1250 |                         break
1251 |                 except Exception:
1252 |                     continue
1253 |             
1254 |             # If still no model, use a reasonable default
1255 |             if not model:
1256 |                 model = "gpt-4.1-mini"
1257 |                 provider = Provider.OPENAI.value
1258 |     else:
1259 |         model = refinement_model
1260 |     
1261 |     # Define refinement parameters
1262 |     iterations = max_iterations or 2  # Default to 2 for demo
1263 |     
1264 |     params = {
1265 |         "tool_names": [selected_tool],
1266 |         "max_iterations": iterations,
1267 |         "refinement_model_config": {
1268 |             "provider": provider,
1269 |             "model": model,
1270 |             "temperature": 0.2,
1271 |         },
1272 |         "validation_level": "full",
1273 |         "enable_winnowing": True,
1274 |         "progress_callback": display_refinement_progress,
1275 |     }
1276 |     
1277 |     console.print(Panel(
1278 |         Syntax(json.dumps({k: v for k, v in params.items() if k != "progress_callback"}, indent=2), "json"),
1279 |         title="Refinement Parameters",
1280 |         border_style="dim cyan",
1281 |         expand=False
1282 |     ))
1283 |     
1284 |     # Create a progress display
1285 |     console.print("\n[bold cyan]Refinement Progress:[/bold cyan]")
1286 |     detail_console.print(f"\n[bold]Starting refinement for {selected_tool}...[/bold]")
1287 |     
1288 |     # Estimate cost
1289 |     estimated_cost = 0.03 * iterations  # Very rough estimate per iteration
1290 |     console.print(f"[cyan]Estimated cost:[/cyan] ${estimated_cost:.2f} USD")
1291 |     
1292 |     # Check if cost would exceed limit
1293 |     if estimated_cost > SETTINGS["cost_limit"]:
1294 |         console.print(Panel(
1295 |             f"Estimated cost (${estimated_cost:.2f}) exceeds the set limit (${SETTINGS['cost_limit']:.2f}).\n"
1296 |             "Adjusting iterations to stay within budget.",
1297 |             title="⚠️ Cost Limit Warning",
1298 |             border_style="yellow",
1299 |             expand=False
1300 |         ))
1301 |         # Adjust iterations to stay under limit
1302 |         adjusted_iterations = max(1, int(SETTINGS["cost_limit"] / 0.03))
1303 |         params["max_iterations"] = adjusted_iterations
1304 |         console.print(f"[yellow]Reducing iterations from {iterations} to {adjusted_iterations}[/yellow]")
1305 |     
1306 |     with Progress(
1307 |         TextColumn("[bold blue]{task.description}"),
1308 |         BarColumn(complete_style="green", finished_style="green"),
1309 |         TaskProgressColumn(),
1310 |         TimeElapsedColumn(),
1311 |         console=console,
1312 |         expand=True
1313 |     ) as progress:
1314 |         task_id = progress.add_task("[cyan]Refining tool documentation...", total=100)
1315 |         
1316 |         # Execute the refinement
1317 |         start_time = time.time()
1318 |         try:
1319 |             result = await gateway.mcp.call_tool("refine_tool_documentation", params)
1320 |             
1321 |             # Simulate progress updates (since we can't hook into the actual progress)
1322 |             # The actual progress is displayed through display_refinement_progress
1323 |             elapsed = 0
1324 |             while progress.tasks[task_id].completed < 100 and elapsed < 60:
1325 |                 progress.update(task_id, completed=min(95, elapsed * 1.5))
1326 |                 await asyncio.sleep(0.5)
1327 |                 elapsed = time.time() - start_time
1328 |             
1329 |             progress.update(task_id, completed=100)
1330 |             
1331 |             # Track cost if available
1332 |             if isinstance(result, dict) and "total_refinement_cost" in result:
1333 |                 tracker.add_generic_cost(
1334 |                     cost=result.get("total_refinement_cost", 0.0),
1335 |                     description=f"Refinement of {selected_tool}",
1336 |                     provider=provider,
1337 |                     model=model
1338 |                 )
1339 |             
1340 |             # Display the results
1341 |             display_refinement_result(
1342 |                 result, 
1343 |                 console=console,
1344 |                 visualization_level=SETTINGS["visualization_level"],
1345 |                 save_to_file=SETTINGS["save_results"],
1346 |                 output_dir=SETTINGS["output_dir"]
1347 |             )
1348 |             
1349 |             return result
1350 |             
1351 |         except Exception as e:
1352 |             progress.update(task_id, completed=100, description="[bold red]Refinement failed!")
1353 |             logger.error(f"Error during single tool refinement: {e}", emoji_key="error", exc_info=True)
1354 |             console.print(f"[bold red]Error during refinement:[/bold red] {escape(str(e))}")
1355 |             return None
1356 | 
1357 | 
1358 | async def demo_multi_tool_refinement(
1359 |     gateway: Gateway, 
1360 |     tracker: CostTracker,
1361 |     target_tools: Optional[List[str]] = None,
1362 |     refinement_provider: Optional[str] = None,
1363 |     refinement_model: Optional[str] = None,
1364 |     max_iterations: Optional[int] = None
1365 | ):
1366 |     """Demonstrate refining documentation for multiple tools simultaneously."""
1367 |     console.print(Rule("[bold cyan]Multi-Tool Refinement[/bold cyan]", style="cyan"))
1368 |     
1369 |     # Use specified tools or find suitable ones
1370 |     selected_tools = []
1371 |     
1372 |     if target_tools:
1373 |         # Check which specified tools exist
1374 |         tool_list = await gateway.mcp.list_tools()
1375 |         available_tools = [t.name for t in tool_list]
1376 |         
1377 |         for tool_name in target_tools:
1378 |             if tool_name in available_tools:
1379 |                 selected_tools.append(tool_name)
1380 |             else:
1381 |                 logger.warning(f"Specified tool '{tool_name}' not found", emoji_key="warning")
1382 |                 console.print(f"[yellow]Warning:[/yellow] Specified tool '{tool_name}' not found. Skipping.")
1383 |     
1384 |     # Auto-select if needed
1385 |     if not selected_tools:
1386 |         # Get various complexity levels for a diverse mix
1387 |         simple_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="simple")
1388 |         medium_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="medium", exclude_tools=simple_tools)
1389 |         complex_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="complex", exclude_tools=simple_tools + medium_tools)
1390 |         
1391 |         selected_tools = simple_tools + medium_tools + complex_tools
1392 |         
1393 |         if not selected_tools:
1394 |             # Fall back to any available tools
1395 |             selected_tools = await get_suitable_tools(gateway.mcp, count=3, complexity="medium")
1396 |     
1397 |     if not selected_tools:
1398 |         logger.error("No suitable tools found for multi-tool refinement demo", emoji_key="error")
1399 |         console.print("[bold red]Error:[/bold red] No suitable tools found for multi-tool refinement demo.")
1400 |         return
1401 |     
1402 |     console.print(f"Selected tools for refinement: [cyan]{', '.join(selected_tools)}[/cyan]")
1403 |     
1404 |     # Determine provider and model
1405 |     provider = refinement_provider or Provider.OPENAI.value
1406 |     
1407 |     # Find best available model if not specified
1408 |     if not refinement_model:
1409 |         try:
1410 |             if provider == Provider.OPENAI.value:
1411 |                 model = "gpt-4.1-mini"  # Use mini for multi-tool to save cost
1412 |                 # Check if model is available
1413 |                 provider_instance = gateway.providers.get(provider)
1414 |                 if provider_instance:
1415 |                     models = await provider_instance.list_models()
1416 |                     model_ids = [m.get("id") for m in models]
1417 |                     if model not in model_ids:
1418 |                         model = provider_instance.get_default_model()
1419 |             elif provider == Provider.ANTHROPIC.value:
1420 |                 model = "claude-3-5-haiku"
1421 |             else:
1422 |                 # Use default model for other providers
1423 |                 provider_instance = gateway.providers.get(provider)
1424 |                 if provider_instance:
1425 |                     model = provider_instance.get_default_model()
1426 |                 else:
1427 |                     model = None
1428 |         except Exception as e:
1429 |             logger.warning(f"Error determining model for {provider}: {e}", emoji_key="warning")
1430 |             model = None
1431 |             
1432 |         # If we still don't have a model, try a different provider
1433 |         if not model:
1434 |             for fallback_provider in SETTINGS["fallback_providers"]:
1435 |                 try:
1436 |                     provider_instance = gateway.providers.get(fallback_provider)
1437 |                     if provider_instance:
1438 |                         model = provider_instance.get_default_model()
1439 |                         provider = fallback_provider
1440 |                         break
1441 |                 except Exception:
1442 |                     continue
1443 |             
1444 |             # If still no model, use a reasonable default
1445 |             if not model:
1446 |                 model = "gpt-4.1-mini"
1447 |                 provider = Provider.OPENAI.value
1448 |     else:
1449 |         model = refinement_model
1450 |     
1451 |     # Define refinement parameters with variations from the first demo
1452 |     iterations = max_iterations or 1  # Default to 1 for multi-tool
1453 |     
1454 |     params = {
1455 |         "tool_names": selected_tools,
1456 |         "max_iterations": iterations,
1457 |         "refinement_model_config": {
1458 |             "provider": provider,
1459 |             "model": model,
1460 |             "temperature": 0.3,
1461 |         },
1462 |         # Add an ensemble for better analysis if using full visualization
1463 |         "analysis_ensemble_configs": [
1464 |             {
1465 |                 "provider": Provider.ANTHROPIC.value if provider != Provider.ANTHROPIC.value else Provider.OPENAI.value,
1466 |                 "model": "claude-3-5-haiku" if provider != Provider.ANTHROPIC.value else "gpt-4.1-mini",
1467 |                 "temperature": 0.2,
1468 |             }
1469 |         ] if SETTINGS["visualization_level"] == "full" else None,
1470 |         "validation_level": "basic",  # Use basic validation for speed
1471 |         "enable_winnowing": False,  # Skip winnowing for demo speed
1472 |         "progress_callback": display_refinement_progress,
1473 |     }
1474 |     
1475 |     console.print(Panel(
1476 |         Syntax(json.dumps({k: v for k, v in params.items() if k not in ["progress_callback", "analysis_ensemble_configs"]}, indent=2), "json"),
1477 |         title="Multi-Tool Refinement Parameters",
1478 |         border_style="dim cyan",
1479 |         expand=False
1480 |     ))
1481 |     
1482 |     # Estimate cost - higher with multiple tools
1483 |     estimated_cost = 0.02 * iterations * len(selected_tools) 
1484 |     console.print(f"[cyan]Estimated cost:[/cyan] ${estimated_cost:.2f} USD")
1485 |     
1486 |     # Check if cost would exceed limit
1487 |     if estimated_cost > SETTINGS["cost_limit"]:
1488 |         console.print(Panel(
1489 |             f"Estimated cost (${estimated_cost:.2f}) exceeds the set limit (${SETTINGS['cost_limit']:.2f}).\n"
1490 |             "Reducing tool count to stay within budget.",
1491 |             title="⚠️ Cost Limit Warning",
1492 |             border_style="yellow",
1493 |             expand=False
1494 |         ))
1495 |         # Reduce the number of tools
1496 |         max_tools = max(1, int(SETTINGS["cost_limit"] / (0.02 * iterations)))
1497 |         selected_tools = selected_tools[:max_tools]
1498 |         params["tool_names"] = selected_tools
1499 |         console.print(f"[yellow]Reducing tools to: {', '.join(selected_tools)}[/yellow]")
1500 |     
1501 |     # Create a progress display
1502 |     console.print("\n[bold cyan]Multi-Tool Refinement Progress:[/bold cyan]")
1503 |     detail_console.print(f"\n[bold]Starting refinement for {len(selected_tools)} tools...[/bold]")
1504 |     
1505 |     # We'll create a task for each tool
1506 |     with Progress(
1507 |         TextColumn("[bold blue]{task.description}"),
1508 |         BarColumn(complete_style="green", finished_style="green"),
1509 |         TaskProgressColumn(),
1510 |         TimeElapsedColumn(),
1511 |         console=console,
1512 |         expand=True
1513 |     ) as progress:
1514 |         # Create a task for overall progress
1515 |         overall_task = progress.add_task("[cyan]Overall progress...", total=100)
1516 |         
1517 |         # Execute the refinement
1518 |         start_time = time.time()
1519 |         try:
1520 |             result = await gateway.mcp.call_tool("refine_tool_documentation", params)
1521 |             
1522 |             # Simulate progress updates
1523 |             # The actual progress is displayed through display_refinement_progress
1524 |             elapsed = 0
1525 |             while progress.tasks[overall_task].completed < 100 and elapsed < 120:
1526 |                 progress.update(overall_task, completed=min(95, elapsed * 0.8))
1527 |                 await asyncio.sleep(0.5)
1528 |                 elapsed = time.time() - start_time
1529 |             
1530 |             progress.update(overall_task, completed=100)
1531 |             
1532 |             # Track cost if available
1533 |             if isinstance(result, dict) and "total_refinement_cost" in result:
1534 |                 tracker.add_generic_cost(
1535 |                     cost=result.get("total_refinement_cost", 0.0),
1536 |                     description=f"Multi-tool refinement ({len(selected_tools)} tools)",
1537 |                     provider=provider,
1538 |                     model=model
1539 |                 )
1540 |             
1541 |             # Display the results
1542 |             display_refinement_result(
1543 |                 result, 
1544 |                 console=console,
1545 |                 visualization_level=SETTINGS["visualization_level"],
1546 |                 save_to_file=SETTINGS["save_results"],
1547 |                 output_dir=SETTINGS["output_dir"]
1548 |             )
1549 |             
1550 |             return result
1551 |             
1552 |         except Exception as e:
1553 |             progress.update(overall_task, completed=100, description="[bold red]Refinement failed!")
1554 |             logger.error(f"Error during multi-tool refinement: {e}", emoji_key="error", exc_info=True)
1555 |             console.print(f"[bold red]Error during multi-tool refinement:[/bold red] {escape(str(e))}")
1556 |             return None
1557 | 
1558 | 
1559 | async def demo_custom_test_generation(
1560 |     gateway: Gateway, 
1561 |     tracker: CostTracker,
1562 |     target_tool: Optional[str] = None,
1563 |     refinement_provider: Optional[str] = None,
1564 |     refinement_model: Optional[str] = None,
1565 |     max_iterations: Optional[int] = None
1566 | ):
1567 |     """Demonstrate refinement with custom test generation strategies."""
1568 |     console.print(Rule("[bold cyan]Custom Test Generation Strategy[/bold cyan]", style="cyan"))
1569 |     
1570 |     # Choose a single tool to refine
1571 |     selected_tool = None
1572 |     
1573 |     if target_tool:
1574 |         # Check if specified tool exists
1575 |         tool_list = await gateway.mcp.list_tools()
1576 |         available_tools = [t.name for t in tool_list]
1577 |         
1578 |         if target_tool in available_tools:
1579 |             selected_tool = target_tool
1580 |         else:
1581 |             logger.warning(f"Specified tool '{target_tool}' not found", emoji_key="warning")
1582 |             console.print(f"[yellow]Warning:[/yellow] Specified tool '{target_tool}' not found. Selecting automatically.")
1583 |     
1584 |     # Auto-select if needed (prefer complex tools for custom test demo)
1585 |     if not selected_tool:
1586 |         complex_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="complex")
1587 |         
1588 |         if complex_tools:
1589 |             selected_tool = complex_tools[0]
1590 |         else:
1591 |             # Fall back to medium complexity
1592 |             medium_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="medium")
1593 |             
1594 |             if medium_tools:
1595 |                 selected_tool = medium_tools[0]
1596 |             else:
1597 |                 # Last resort - any tool
1598 |                 simple_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="simple")
1599 |                 
1600 |                 if simple_tools:
1601 |                     selected_tool = simple_tools[0]
1602 |     
1603 |     if not selected_tool:
1604 |         logger.error("No suitable tools found for custom test generation demo", emoji_key="error")
1605 |         console.print("[bold red]Error:[/bold red] No suitable tools found for custom test generation demo.")
1606 |         return
1607 |     
1608 |     console.print(f"Selected tool for custom test generation: [cyan]{selected_tool}[/cyan]")
1609 |     
1610 |     # Determine provider and model
1611 |     provider = refinement_provider or Provider.OPENAI.value
1612 |     
1613 |     # Find best available model if not specified
1614 |     if not refinement_model:
1615 |         try:
1616 |             if provider == Provider.OPENAI.value:
1617 |                 model = "gpt-4.1"  # Prefer this for best results
1618 |                 # Check if model is available
1619 |                 provider_instance = gateway.providers.get(provider)
1620 |                 if provider_instance:
1621 |                     models = await provider_instance.list_models()
1622 |                     model_ids = [m.get("id") for m in models]
1623 |                     if model not in model_ids:
1624 |                         model = "gpt-4.1-mini"  # Fall back to mini
1625 |             elif provider == Provider.ANTHROPIC.value:
1626 |                 model = "claude-3-5-sonnet"
1627 |             else:
1628 |                 # Use default model for other providers
1629 |                 provider_instance = gateway.providers.get(provider)
1630 |                 if provider_instance:
1631 |                     model = provider_instance.get_default_model()
1632 |                 else:
1633 |                     model = None
1634 |         except Exception as e:
1635 |             logger.warning(f"Error determining model for {provider}: {e}", emoji_key="warning")
1636 |             model = None
1637 |             
1638 |         # If we still don't have a model, try a different provider
1639 |         if not model:
1640 |             for fallback_provider in SETTINGS["fallback_providers"]:
1641 |                 try:
1642 |                     provider_instance = gateway.providers.get(fallback_provider)
1643 |                     if provider_instance:
1644 |                         model = provider_instance.get_default_model()
1645 |                         provider = fallback_provider
1646 |                         break
1647 |                 except Exception:
1648 |                     continue
1649 |             
1650 |             # If still no model, use a reasonable default
1651 |             if not model:
1652 |                 model = "gpt-4.1-mini"
1653 |                 provider = Provider.OPENAI.value
1654 |     else:
1655 |         model = refinement_model
1656 |     
1657 |     # Define refinement parameters with custom test generation strategy
1658 |     iterations = max_iterations or 1
1659 |     
1660 |     params = {
1661 |         "tool_names": [selected_tool],
1662 |         "max_iterations": iterations,
1663 |         "refinement_model_config": {
1664 |             "provider": provider,
1665 |             "model": model,
1666 |             "temperature": 0.2,
1667 |         },
1668 |         # Custom test generation strategy
1669 |         "generation_config": {
1670 |             "positive_required_only": 3,      # More tests with just required params
1671 |             "positive_optional_mix": 5,       # More tests with mixed optional params
1672 |             "negative_type": 4,               # More type validation checks
1673 |             "negative_required": 3,           # More tests with missing required params
1674 |             "edge_boundary_min": 2,           # More tests with boundary values
1675 |             "edge_boundary_max": 2,
1676 |             "llm_realistic_combo": 5,         # More LLM-generated realistic tests
1677 |             "llm_ambiguity_probe": 3,         # More tests probing ambiguities
1678 |         },
1679 |         "validation_level": "full",
1680 |         "enable_winnowing": True,
1681 |         "progress_callback": display_refinement_progress,
1682 |     }
1683 |     
1684 |     console.print(Panel(
1685 |         Group(
1686 |             Syntax(json.dumps({k: v for k, v in params.items() if k not in ["progress_callback", "generation_config"]}, indent=2), "json"),
1687 |             "\n[bold cyan]Custom Test Generation Strategy:[/bold cyan]",
1688 |             Syntax(json.dumps(params["generation_config"], indent=2), "json"),
1689 |         ),
1690 |         title="Custom Test Generation Parameters",
1691 |         border_style="dim cyan",
1692 |         expand=False
1693 |     ))
1694 |     
1695 |     # Estimate cost (higher due to more test cases)
1696 |     estimated_cost = 0.04 * iterations
1697 |     console.print(f"[cyan]Estimated cost:[/cyan] ${estimated_cost:.2f} USD")
1698 |     
1699 |     # Check if cost would exceed limit
1700 |     if estimated_cost > SETTINGS["cost_limit"]:
1701 |         console.print(Panel(
1702 |             f"Estimated cost (${estimated_cost:.2f}) exceeds the set limit (${SETTINGS['cost_limit']:.2f}).\n"
1703 |             "Reducing iterations to stay within budget.",
1704 |             title="⚠️ Cost Limit Warning",
1705 |             border_style="yellow",
1706 |             expand=False
1707 |         ))
1708 |         # Adjust iterations to stay under limit
1709 |         params["max_iterations"] = 1
1710 |     
1711 |     # Create a progress display
1712 |     console.print("\n[bold cyan]Custom Test Generation Progress:[/bold cyan]")
1713 |     detail_console.print(f"\n[bold]Starting refinement with custom test strategy for {selected_tool}...[/bold]")
1714 |     
1715 |     with Progress(
1716 |         TextColumn("[bold blue]{task.description}"),
1717 |         BarColumn(complete_style="green", finished_style="green"),
1718 |         TaskProgressColumn(),
1719 |         TimeElapsedColumn(),
1720 |         console=console,
1721 |         expand=True
1722 |     ) as progress:
1723 |         task_id = progress.add_task("[cyan]Refining with custom test strategy...", total=100)
1724 |         
1725 |         # Execute the refinement
1726 |         start_time = time.time()
1727 |         try:
1728 |             result = await gateway.mcp.call_tool("refine_tool_documentation", params)
1729 |             
1730 |             # Simulate progress updates
1731 |             elapsed = 0
1732 |             while progress.tasks[task_id].completed < 100 and elapsed < 60:
1733 |                 progress.update(task_id, completed=min(95, elapsed * 1.5))
1734 |                 await asyncio.sleep(0.5)
1735 |                 elapsed = time.time() - start_time
1736 |             
1737 |             progress.update(task_id, completed=100)
1738 |             
1739 |             # Track cost if available
1740 |             if isinstance(result, dict) and "total_refinement_cost" in result:
1741 |                 tracker.add_generic_cost(
1742 |                     cost=result.get("total_refinement_cost", 0.0),
1743 |                     description=f"Custom test strategy for {selected_tool}",
1744 |                     provider=provider,
1745 |                     model=model
1746 |                 )
1747 |             
1748 |             # Display the results
1749 |             display_refinement_result(
1750 |                 result, 
1751 |                 console=console,
1752 |                 visualization_level=SETTINGS["visualization_level"],
1753 |                 save_to_file=SETTINGS["save_results"],
1754 |                 output_dir=SETTINGS["output_dir"]
1755 |             )
1756 |             
1757 |             return result
1758 |             
1759 |         except Exception as e:
1760 |             progress.update(task_id, completed=100, description="[bold red]Refinement failed!")
1761 |             logger.error(f"Error during custom test generation: {e}", emoji_key="error", exc_info=True)
1762 |             console.print(f"[bold red]Error during custom test generation:[/bold red] {escape(str(e))}")
1763 |             return None
1764 | 
1765 | 
1766 | async def demo_all_tools_refinement(
1767 |     gateway: Gateway, 
1768 |     tracker: CostTracker,
1769 |     refinement_provider: Optional[str] = None,
1770 |     refinement_model: Optional[str] = None,
1771 |     max_iterations: Optional[int] = None
1772 | ):
1773 |     """Demonstrate refining documentation for all available tools."""
1774 |     console.print(Rule("[bold cyan]All Tools Refinement[/bold cyan]", style="cyan"))
1775 |     
1776 |     # Get all available tools (excluding refine_tool_documentation itself)
1777 |     tool_list = await gateway.mcp.list_tools()
1778 |     available_tools = [
1779 |         t.name for t in tool_list 
1780 |         if t.name != "refine_tool_documentation"
1781 |     ]
1782 |     
1783 |     if not available_tools:
1784 |         logger.error("No tools available for refinement", emoji_key="error")
1785 |         console.print("[bold red]Error:[/bold red] No tools available for refinement.")
1786 |         return
1787 |     
1788 |     console.print(f"[cyan]Found {len(available_tools)} tools available for refinement[/cyan]")
1789 |     
1790 |     # Determine provider and model
1791 |     provider = refinement_provider or Provider.OPENAI.value
1792 |     
1793 |     # Find best available model if not specified
1794 |     if not refinement_model:
1795 |         try:
1796 |             if provider == Provider.OPENAI.value:
1797 |                 model = "gpt-4.1-mini"  # Use mini for multi-tool to save cost
1798 |                 # Check if model is available
1799 |                 provider_instance = gateway.providers.get(provider)
1800 |                 if provider_instance:
1801 |                     models = await provider_instance.list_models()
1802 |                     model_ids = [m.get("id") for m in models]
1803 |                     if model not in model_ids:
1804 |                         model = provider_instance.get_default_model()
1805 |             elif provider == Provider.ANTHROPIC.value:
1806 |                 model = "claude-3-5-haiku"
1807 |             else:
1808 |                 # Use default model for other providers
1809 |                 provider_instance = gateway.providers.get(provider)
1810 |                 if provider_instance:
1811 |                     model = provider_instance.get_default_model()
1812 |                 else:
1813 |                     model = None
1814 |         except Exception as e:
1815 |             logger.warning(f"Error determining model for {provider}: {e}", emoji_key="warning")
1816 |             model = None
1817 |             
1818 |         # If we still don't have a model, try a different provider
1819 |         if not model:
1820 |             for fallback_provider in SETTINGS["fallback_providers"]:
1821 |                 try:
1822 |                     provider_instance = gateway.providers.get(fallback_provider)
1823 |                     if provider_instance:
1824 |                         model = provider_instance.get_default_model()
1825 |                         provider = fallback_provider
1826 |                         break
1827 |                 except Exception:
1828 |                     continue
1829 |             
1830 |             # If still no model, use a reasonable default
1831 |             if not model:
1832 |                 model = "gpt-4.1-mini"
1833 |                 provider = Provider.OPENAI.value
1834 |     else:
1835 |         model = refinement_model
1836 |     
1837 |     # Define refinement parameters
1838 |     iterations = max_iterations or 1  # Default to 1 for all-tools
1839 |     
1840 |     params = {
1841 |         "refine_all_available": True,  # This is the key difference for this demo
1842 |         "max_iterations": iterations,
1843 |         "refinement_model_config": {
1844 |             "provider": provider,
1845 |             "model": model,
1846 |             "temperature": 0.3,
1847 |         },
1848 |         "validation_level": "basic",  # Use basic validation for speed
1849 |         "enable_winnowing": False,  # Skip winnowing for demo speed
1850 |         "progress_callback": display_refinement_progress,
1851 |     }
1852 |     
1853 |     console.print(Panel(
1854 |         Syntax(json.dumps({k: v for k, v in params.items() if k != "progress_callback"}, indent=2), "json"),
1855 |         title="All Tools Refinement Parameters",
1856 |         border_style="dim cyan",
1857 |         expand=False
1858 |     ))
1859 |     
1860 |     # Estimate cost - higher with multiple tools
1861 |     estimated_cost = 0.01 * iterations * len(available_tools)  # Lower per-tool cost with bulk processing
1862 |     console.print(f"[cyan]Estimated cost:[/cyan] ${estimated_cost:.2f} USD")
1863 |     
1864 |     # Check if cost would exceed limit
1865 |     if estimated_cost > SETTINGS["cost_limit"]:
1866 |         console.print(Panel(
1867 |             f"Estimated cost (${estimated_cost:.2f}) exceeds the set limit (${SETTINGS['cost_limit']:.2f}).\n"
1868 |             "Switching to targeted refinement to stay within budget.",
1869 |             title="⚠️ Cost Limit Warning",
1870 |             border_style="yellow",
1871 |             expand=False
1872 |         ))
1873 |         
1874 |         # Switch to using targeted tool_names instead of refine_all_available
1875 |         max_tools = max(1, int(SETTINGS["cost_limit"] / (0.02 * iterations)))
1876 |         selected_tools = random.sample(available_tools, min(max_tools, len(available_tools)))
1877 |         
1878 |         params["refine_all_available"] = False
1879 |         params["tool_names"] = selected_tools
1880 |         
1881 |         console.print(f"[yellow]Reducing to {len(selected_tools)} randomly selected tools[/yellow]")
1882 |     
1883 |     # Create a progress display
1884 |     console.print("\n[bold cyan]All Tools Refinement Progress:[/bold cyan]")
1885 |     detail_console.print(f"\n[bold]Starting refinement for all {len(available_tools)} tools...[/bold]")
1886 |     
1887 |     with Progress(
1888 |         SpinnerColumn(),
1889 |         TextColumn("[bold blue]{task.description}"),
1890 |         BarColumn(complete_style="green", finished_style="green"),
1891 |         TaskProgressColumn(),
1892 |         TimeRemainingColumn(),
1893 |         console=console,
1894 |         expand=True
1895 |     ) as progress:
1896 |         task_id = progress.add_task("[cyan]Refining all tools...", total=100)
1897 |         
1898 |         # Execute the refinement
1899 |         start_time = time.time()
1900 |         try:
1901 |             result = await gateway.mcp.call_tool("refine_tool_documentation", params)
1902 |             
1903 |             # Simulate progress updates
1904 |             elapsed = 0
1905 |             while progress.tasks[task_id].completed < 100 and elapsed < 300:  # Longer timeout for all tools
1906 |                 progress.update(task_id, completed=min(95, elapsed * 0.3))  # Slower progress for more tools
1907 |                 await asyncio.sleep(1.0)
1908 |                 elapsed = time.time() - start_time
1909 |             
1910 |             progress.update(task_id, completed=100)
1911 |             
1912 |             # Track cost if available
1913 |             if isinstance(result, dict) and "total_refinement_cost" in result:
1914 |                 tracker.add_generic_cost(
1915 |                     cost=result.get("total_refinement_cost", 0.0),
1916 |                     description=f"All tools refinement ({len(available_tools)} tools)",
1917 |                     provider=provider,
1918 |                     model=model
1919 |                 )
1920 |             
1921 |             # Display the results
1922 |             display_refinement_result(
1923 |                 result, 
1924 |                 console=console,
1925 |                 visualization_level=SETTINGS["visualization_level"],
1926 |                 save_to_file=SETTINGS["save_results"],
1927 |                 output_dir=SETTINGS["output_dir"]
1928 |             )
1929 |             
1930 |             return result
1931 |             
1932 |         except Exception as e:
1933 |             progress.update(task_id, completed=100, description="[bold red]Refinement failed!")
1934 |             logger.error(f"Error during all tools refinement: {e}", emoji_key="error", exc_info=True)
1935 |             console.print(f"[bold red]Error during all tools refinement:[/bold red] {escape(str(e))}")
1936 |             return None
1937 | 
1938 | 
1939 | async def demo_schema_focused_refinement(
1940 |     gateway: Gateway, 
1941 |     tracker: CostTracker,
1942 |     target_tool: Optional[str] = None,
1943 |     refinement_provider: Optional[str] = None,
1944 |     refinement_model: Optional[str] = None
1945 | ):
1946 |     """Demonstrate refinement focused specifically on schema improvements."""
1947 |     console.print(Rule("[bold cyan]Schema-Focused Refinement[/bold cyan]", style="cyan"))
1948 |     
1949 |     # Choose a complex tool to refine
1950 |     selected_tool = None
1951 |     
1952 |     if target_tool:
1953 |         # Check if specified tool exists
1954 |         tool_list = await gateway.mcp.list_tools()
1955 |         available_tools = [t.name for t in tool_list]
1956 |         
1957 |         if target_tool in available_tools:
1958 |             selected_tool = target_tool
1959 |         else:
1960 |             logger.warning(f"Specified tool '{target_tool}' not found", emoji_key="warning")
1961 |             console.print(f"[yellow]Warning:[/yellow] Specified tool '{target_tool}' not found. Selecting automatically.")
1962 |     
1963 |     # Auto-select if needed (prefer complex tools for schema refinement)
1964 |     if not selected_tool:
1965 |         complex_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="complex")
1966 |         
1967 |         if complex_tools:
1968 |             selected_tool = complex_tools[0]
1969 |         else:
1970 |             # Fall back to medium complexity
1971 |             medium_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="medium")
1972 |             
1973 |             if medium_tools:
1974 |                 selected_tool = medium_tools[0]
1975 |             else:
1976 |                 # Last resort - any tool
1977 |                 simple_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="simple")
1978 |                 
1979 |                 if simple_tools:
1980 |                     selected_tool = simple_tools[0]
1981 |     
1982 |     if not selected_tool:
1983 |         logger.error("No suitable tools found for schema-focused refinement demo", emoji_key="error")
1984 |         console.print("[bold red]Error:[/bold red] No suitable tools found for schema-focused refinement demo.")
1985 |         return
1986 |     
1987 |     console.print(f"Selected tool for schema-focused refinement: [cyan]{selected_tool}[/cyan]")
1988 |     
1989 |     # Get tool schema
1990 |     tool_list = await gateway.mcp.list_tools()
1991 |     tool_def = next((t for t in tool_list if t.name == selected_tool), None)
1992 |     
1993 |     if not tool_def or not hasattr(tool_def, "inputSchema"):
1994 |         logger.error(f"Could not get schema for tool {selected_tool}", emoji_key="error")
1995 |         console.print(f"[bold red]Error:[/bold red] Could not get schema for tool {selected_tool}.")
1996 |         return
1997 |     
1998 |     input_schema = getattr(tool_def, "inputSchema", {})
1999 |     
2000 |     # Display the original schema
2001 |     console.print("[bold cyan]Original Schema:[/bold cyan]")
2002 |     console.print(Panel(
2003 |         Syntax(json.dumps(input_schema, indent=2), "json", theme="default", line_numbers=False),
2004 |         title="Original Input Schema",
2005 |         border_style="dim cyan",
2006 |         expand=False
2007 |     ))
2008 |     
2009 |     # Determine provider and model
2010 |     provider = refinement_provider or Provider.OPENAI.value
2011 |     
2012 |     # Find best available model if not specified
2013 |     if not refinement_model:
2014 |         try:
2015 |             if provider == Provider.OPENAI.value:
2016 |                 model = "gpt-4.1"  # Prefer this for best schema analysis
2017 |                 # Check if model is available
2018 |                 provider_instance = gateway.providers.get(provider)
2019 |                 if provider_instance:
2020 |                     models = await provider_instance.list_models()
2021 |                     model_ids = [m.get("id") for m in models]
2022 |                     if model not in model_ids:
2023 |                         model = "gpt-4.1-mini"  # Fall back to mini
2024 |             elif provider == Provider.ANTHROPIC.value:
2025 |                 model = "claude-3-5-sonnet"
2026 |             else:
2027 |                 # Use default model for other providers
2028 |                 provider_instance = gateway.providers.get(provider)
2029 |                 if provider_instance:
2030 |                     model = provider_instance.get_default_model()
2031 |                 else:
2032 |                     model = None
2033 |         except Exception as e:
2034 |             logger.warning(f"Error determining model for {provider}: {e}", emoji_key="warning")
2035 |             model = None
2036 |             
2037 |         # If we still don't have a model, try a different provider
2038 |         if not model:
2039 |             for fallback_provider in SETTINGS["fallback_providers"]:
2040 |                 try:
2041 |                     provider_instance = gateway.providers.get(fallback_provider)
2042 |                     if provider_instance:
2043 |                         model = provider_instance.get_default_model()
2044 |                         provider = fallback_provider
2045 |                         break
2046 |                 except Exception:
2047 |                     continue
2048 |             
2049 |             # If still no model, use a reasonable default
2050 |             if not model:
2051 |                 model = "gpt-4.1-mini"
2052 |                 provider = Provider.OPENAI.value
2053 |     else:
2054 |         model = refinement_model
2055 |     
2056 |     # Define refinement parameters focused on schema improvements
2057 |     params = {
2058 |         "tool_names": [selected_tool],
2059 |         "max_iterations": 1,  # Single iteration focused on schema
2060 |         "refinement_model_config": {
2061 |             "provider": provider,
2062 |             "model": model,
2063 |             "temperature": 0.2,
2064 |         },
2065 |         # Custom test generation strategy focused on schema edge cases
2066 |         "generation_config": {
2067 |             "positive_required_only": 2,
2068 |             "positive_optional_mix": 3,
2069 |             "negative_type": 4,          # More type validation checks
2070 |             "negative_required": 3,       # More tests with missing required params
2071 |             "negative_enum": 3,           # More enum testing
2072 |             "negative_format": 3,         # More format testing
2073 |             "negative_range": 3,          # More range testing
2074 |             "negative_length": 3,         # More length testing
2075 |             "negative_pattern": 3,        # More pattern testing
2076 |             "edge_boundary_min": 3,       # More tests with min boundary values
2077 |             "edge_boundary_max": 3,       # More tests with max boundary values
2078 |             "llm_ambiguity_probe": 2,     # Probe for ambiguities
2079 |         },
2080 |         "validation_level": "full",      # Strict validation
2081 |         "enable_winnowing": False,       # No winnowing needed
2082 |         "progress_callback": display_refinement_progress,
2083 |     }
2084 |     
2085 |     console.print(Panel(
2086 |         Syntax(json.dumps({k: v for k, v in params.items() if k not in ["progress_callback", "generation_config"]}, indent=2), "json"),
2087 |         title="Schema-Focused Refinement Parameters",
2088 |         border_style="dim cyan",
2089 |         expand=False
2090 |     ))
2091 |     
2092 |     # Estimate cost
2093 |     estimated_cost = 0.035  # Schema focus costs a bit more due to edge case testing
2094 |     console.print(f"[cyan]Estimated cost:[/cyan] ${estimated_cost:.2f} USD")
2095 |     
2096 |     # Create a progress display
2097 |     console.print("\n[bold cyan]Schema-Focused Refinement Progress:[/bold cyan]")
2098 |     detail_console.print(f"\n[bold]Starting schema-focused refinement for {selected_tool}...[/bold]")
2099 |     
2100 |     with Progress(
2101 |         TextColumn("[bold blue]{task.description}"),
2102 |         BarColumn(complete_style="green", finished_style="green"),
2103 |         TaskProgressColumn(),
2104 |         TimeElapsedColumn(),
2105 |         console=console,
2106 |         expand=True
2107 |     ) as progress:
2108 |         task_id = progress.add_task("[cyan]Refining schema...", total=100)
2109 |         
2110 |         # Execute the refinement
2111 |         start_time = time.time()
2112 |         try:
2113 |             result = await gateway.mcp.call_tool("refine_tool_documentation", params)
2114 |             
2115 |             # Simulate progress updates
2116 |             elapsed = 0
2117 |             while progress.tasks[task_id].completed < 100 and elapsed < 60:
2118 |                 progress.update(task_id, completed=min(95, elapsed * 1.5))
2119 |                 await asyncio.sleep(0.5)
2120 |                 elapsed = time.time() - start_time
2121 |             
2122 |             progress.update(task_id, completed=100)
2123 |             
2124 |             # Track cost if available
2125 |             if isinstance(result, dict) and "total_refinement_cost" in result:
2126 |                 tracker.add_generic_cost(
2127 |                     cost=result.get("total_refinement_cost", 0.0),
2128 |                     description=f"Schema-focused refinement of {selected_tool}",
2129 |                     provider=provider,
2130 |                     model=model
2131 |                 )
2132 |             
2133 |             # Extract schema patches from the result
2134 |             refined_tools = result.get("refined_tools", [])
2135 |             target_tool_result = next((t for t in refined_tools if t.get("tool_name") == selected_tool), None)
2136 |             
2137 |             if target_tool_result and target_tool_result.get("final_proposed_schema_patches"):
2138 |                 schema_patches = target_tool_result.get("final_proposed_schema_patches", [])
2139 |                 patched_schema = target_tool_result.get("final_schema_after_patches", {})
2140 |                 
2141 |                 if schema_patches:
2142 |                     console.print("[bold green]Schema Refinement Results:[/bold green]")
2143 |                     
2144 |                     console.print(Panel(
2145 |                         Syntax(json.dumps(schema_patches, indent=2), "json", theme="default", line_numbers=False),
2146 |                         title="Applied Schema Patches",
2147 |                         border_style="magenta",
2148 |                         expand=False
2149 |                     ))
2150 |                     
2151 |                     if patched_schema:
2152 |                         console.print(Panel(
2153 |                             Syntax(json.dumps(patched_schema, indent=2), "json", theme="default", line_numbers=False),
2154 |                             title="Refined Schema",
2155 |                             border_style="green",
2156 |                             expand=False
2157 |                         ))
2158 |                     
2159 |                     # Generate a side-by-side comparison
2160 |                     console.print(create_side_by_side_diff(
2161 |                         json.dumps(input_schema, indent=2), 
2162 |                         json.dumps(patched_schema, indent=2), 
2163 |                         title="Schema Before/After Comparison"
2164 |                     ))
2165 |                 else:
2166 |                     console.print("[yellow]No schema patches were applied.[/yellow]")
2167 |             
2168 |             # Display the full results
2169 |             display_refinement_result(
2170 |                 result, 
2171 |                 console=console,
2172 |                 visualization_level=SETTINGS["visualization_level"],
2173 |                 save_to_file=SETTINGS["save_results"],
2174 |                 output_dir=SETTINGS["output_dir"]
2175 |             )
2176 |             
2177 |             return result
2178 |             
2179 |         except Exception as e:
2180 |             progress.update(task_id, completed=100, description="[bold red]Refinement failed!")
2181 |             logger.error(f"Error during schema-focused refinement: {e}", emoji_key="error", exc_info=True)
2182 |             console.print(f"[bold red]Error during schema-focused refinement:[/bold red] {escape(str(e))}")
2183 |             return None
2184 | 
2185 | 
2186 | async def demo_model_comparison(
2187 |     gateway: Gateway, 
2188 |     tracker: CostTracker,
2189 |     target_tool: Optional[str] = None
2190 | ):
2191 |     """Demonstrate comparing different LLM models for refinement."""
2192 |     console.print(Rule("[bold cyan]Model Comparison for Refinement[/bold cyan]", style="cyan"))
2193 |     
2194 |     # Choose a single tool to refine
2195 |     selected_tool = None
2196 |     
2197 |     if target_tool:
2198 |         # Check if specified tool exists
2199 |         tool_list = await gateway.mcp.list_tools()
2200 |         available_tools = [t.name for t in tool_list]
2201 |         
2202 |         if target_tool in available_tools:
2203 |             selected_tool = target_tool
2204 |         else:
2205 |             logger.warning(f"Specified tool '{target_tool}' not found", emoji_key="warning")
2206 |             console.print(f"[yellow]Warning:[/yellow] Specified tool '{target_tool}' not found. Selecting automatically.")
2207 |     
2208 |     # Auto-select if needed
2209 |     if not selected_tool:
2210 |         medium_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="medium")
2211 |         
2212 |         if medium_tools:
2213 |             selected_tool = medium_tools[0]
2214 |         else:
2215 |             # Fall back to any available tool
2216 |             simple_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="simple")
2217 |             
2218 |             if simple_tools:
2219 |                 selected_tool = simple_tools[0]
2220 |     
2221 |     if not selected_tool:
2222 |         logger.error("No suitable tools found for model comparison demo", emoji_key="error")
2223 |         console.print("[bold red]Error:[/bold red] No suitable tools found for model comparison demo.")
2224 |         return
2225 |     
2226 |     console.print(f"Selected tool for model comparison: [cyan]{selected_tool}[/cyan]")
2227 |     
2228 |     # Define models to compare
2229 |     models_to_compare = []
2230 |     
2231 |     # Check which models are available
2232 |     for provider_name in SETTINGS["preferred_providers"] + SETTINGS["fallback_providers"]:
2233 |         provider_instance = gateway.providers.get(provider_name)
2234 |         if provider_instance:
2235 |             try:
2236 |                 available_models = await provider_instance.list_models()
2237 |                 model_ids = [m.get("id") for m in available_models]
2238 |                 
2239 |                 if provider_name == Provider.OPENAI.value:
2240 |                     if "gpt-4.1" in model_ids:
2241 |                         models_to_compare.append((provider_name, "gpt-4.1"))
2242 |                     if "gpt-4.1-mini" in model_ids:
2243 |                         models_to_compare.append((provider_name, "gpt-4.1-mini"))
2244 |                 
2245 |                 elif provider_name == Provider.ANTHROPIC.value:
2246 |                     if "claude-3-5-sonnet" in model_ids:
2247 |                         models_to_compare.append((provider_name, "claude-3-5-sonnet"))
2248 |                     if "claude-3-5-haiku" in model_ids:
2249 |                         models_to_compare.append((provider_name, "claude-3-5-haiku"))
2250 |                 
2251 |                 elif provider_name == Provider.GEMINI.value:
2252 |                     if "gemini-2.0-pro" in model_ids:
2253 |                         models_to_compare.append((provider_name, "gemini-2.0-pro"))
2254 |                 
2255 |                 elif provider_name == Provider.DEEPSEEK.value:
2256 |                     if "deepseek-chat" in model_ids:
2257 |                         models_to_compare.append((provider_name, "deepseek-chat"))
2258 |                 
2259 |                 # If we already have 3+ models, stop looking
2260 |                 if len(models_to_compare) >= 3:
2261 |                     break
2262 |                     
2263 |             except Exception as e:
2264 |                 logger.warning(f"Error listing models for {provider_name}: {e}", emoji_key="warning")
2265 |     
2266 |     # If we don't have enough models, add some defaults that might work
2267 |     if len(models_to_compare) < 2:
2268 |         fallback_models = [
2269 |             (Provider.OPENAI.value, "gpt-4.1-mini"),
2270 |             (Provider.ANTHROPIC.value, "claude-3-5-haiku"),
2271 |             (Provider.GEMINI.value, "gemini-2.0-pro")
2272 |         ]
2273 |         
2274 |         for provider, model in fallback_models:
2275 |             if (provider, model) not in models_to_compare:
2276 |                 models_to_compare.append((provider, model))
2277 |                 if len(models_to_compare) >= 3:
2278 |                     break
2279 |     
2280 |     # Limit to max 3 models for a reasonable comparison
2281 |     models_to_compare = models_to_compare[:3]
2282 |     
2283 |     if not models_to_compare:
2284 |         logger.error("No models available for comparison", emoji_key="error")
2285 |         console.print("[bold red]Error:[/bold red] No models available for comparison.")
2286 |         return
2287 |     
2288 |     console.print(f"Models being compared: [cyan]{', '.join([f'{p}/{m}' for p, m in models_to_compare])}[/cyan]")
2289 |     
2290 |     # Estimate total cost
2291 |     estimated_cost = 0.03 * len(models_to_compare)
2292 |     console.print(f"[cyan]Estimated total cost:[/cyan] ${estimated_cost:.2f} USD")
2293 |     
2294 |     # Check if cost would exceed limit
2295 |     if estimated_cost > SETTINGS["cost_limit"]:
2296 |         console.print(Panel(
2297 |             f"Estimated cost (${estimated_cost:.2f}) exceeds the set limit (${SETTINGS['cost_limit']:.2f}).\n"
2298 |             "Reducing the number of models to compare.",
2299 |             title="⚠️ Cost Limit Warning",
2300 |             border_style="yellow",
2301 |             expand=False
2302 |         ))
2303 |         max_models = max(2, int(SETTINGS["cost_limit"] / 0.03))
2304 |         models_to_compare = models_to_compare[:max_models]
2305 |         console.print(f"[yellow]Comparing only: {', '.join([f'{p}/{m}' for p, m in models_to_compare])}[/yellow]")
2306 |     
2307 |     # Create a progress display
2308 |     console.print("\n[bold cyan]Model Comparison Progress:[/bold cyan]")
2309 |     
2310 |     # Results storage
2311 |     model_results = {}
2312 |     
2313 |     # Run refinement with each model
2314 |     for provider, model in models_to_compare:
2315 |         detail_console.print(f"\n[bold]Starting refinement with {provider}/{model}...[/bold]")
2316 |         
2317 |         params = {
2318 |             "tool_names": [selected_tool],
2319 |             "max_iterations": 1,
2320 | "refinement_model_config": {
2321 |                 "provider": provider,
2322 |                 "model": model,
2323 |                 "temperature": 0.2,
2324 |             },
2325 |             "validation_level": "basic",
2326 |             "enable_winnowing": False,
2327 |             "progress_callback": display_refinement_progress,
2328 |         }
2329 |         
2330 |         with Progress(
2331 |             TextColumn(f"[bold blue]Testing {provider}/{model}..."),
2332 |             BarColumn(complete_style="green", finished_style="green"),
2333 |             TaskProgressColumn(),
2334 |             TimeElapsedColumn(),
2335 |             console=console,
2336 |             expand=True
2337 |         ) as progress:
2338 |             task_id = progress.add_task(f"[cyan]Refining with {model}...", total=100)
2339 |             
2340 |             # Execute the refinement
2341 |             start_time = time.time()
2342 |             try:
2343 |                 result = await gateway.mcp.call_tool("refine_tool_documentation", params)
2344 |                 
2345 |                 # Simulate progress updates
2346 |                 elapsed = 0
2347 |                 while progress.tasks[task_id].completed < 100 and elapsed < 60:
2348 |                     progress.update(task_id, completed=min(95, elapsed * 1.5))
2349 |                     await asyncio.sleep(0.5)
2350 |                     elapsed = time.time() - start_time
2351 |                 
2352 |                 progress.update(task_id, completed=100)
2353 |                 
2354 |                 # Track cost if available
2355 |                 if isinstance(result, dict) and "total_refinement_cost" in result:
2356 |                     tracker.add_generic_cost(
2357 |                         cost=result.get("total_refinement_cost", 0.0),
2358 |                         description=f"{provider}/{model} refinement of {selected_tool}",
2359 |                         provider=provider,
2360 |                         model=model
2361 |                     )
2362 |                 
2363 |                 # Store result for comparison
2364 |                 model_results[(provider, model)] = {
2365 |                     "result": result,
2366 |                     "processing_time": time.time() - start_time,
2367 |                     "cost": result.get("total_refinement_cost", 0.0) if isinstance(result, dict) else 0.0
2368 |                 }
2369 |                 
2370 |             except Exception as e:
2371 |                 progress.update(task_id, completed=100, description=f"[bold red]{model} failed!")
2372 |                 logger.error(f"Error during refinement with {provider}/{model}: {e}", emoji_key="error", exc_info=True)
2373 |                 console.print(f"[bold red]Error during refinement with {provider}/{model}:[/bold red] {escape(str(e))}")
2374 |     
2375 |     # Compare and display results
2376 |     if model_results:
2377 |         console.print(Rule("[bold blue]Model Comparison Results[/bold blue]", style="blue"))
2378 |         
2379 |         # Create comparison table
2380 |         comparison_table = Table(title="Model Performance Comparison", box=box.ROUNDED)
2381 |         comparison_table.add_column("Model", style="cyan")
2382 |         comparison_table.add_column("Initial Success", style="dim yellow")
2383 |         comparison_table.add_column("Final Success", style="green")
2384 |         comparison_table.add_column("Improvement", style="magenta")
2385 |         comparison_table.add_column("Processing Time", style="blue")
2386 |         comparison_table.add_column("Cost", style="red")
2387 |         
2388 |         for (provider, model), data in model_results.items():
2389 |             result = data["result"]
2390 |             refined_tools = result.get("refined_tools", [])
2391 |             
2392 |             # Find the specific tool result
2393 |             tool_result = next((t for t in refined_tools if t.get("tool_name") == selected_tool), None)
2394 |             
2395 |             if tool_result:
2396 |                 initial_success = tool_result.get("initial_success_rate", 0.0)
2397 |                 final_success = tool_result.get("final_success_rate", 0.0)
2398 |                 improvement = tool_result.get("improvement_factor", 0.0)
2399 |                 
2400 |                 comparison_table.add_row(
2401 |                     f"{provider}/{model}",
2402 |                     f"{initial_success:.1%}",
2403 |                     f"{final_success:.1%}",
2404 |                     f"{improvement:.2f}x",
2405 |                     f"{data['processing_time']:.2f}s",
2406 |                     f"${data['cost']:.6f}"
2407 |                 )
2408 |         
2409 |         console.print(comparison_table)
2410 |         
2411 |         # Find the best model
2412 |         best_model = None
2413 |         best_improvement = -1
2414 |         
2415 |         for (provider, model), data in model_results.items():
2416 |             result = data["result"]
2417 |             refined_tools = result.get("refined_tools", [])
2418 |             tool_result = next((t for t in refined_tools if t.get("tool_name") == selected_tool), None)
2419 |             
2420 |             if tool_result:
2421 |                 improvement = tool_result.get("improvement_factor", 0.0)
2422 |                 if improvement > best_improvement:
2423 |                     best_improvement = improvement
2424 |                     best_model = (provider, model)
2425 |         
2426 |         if best_model:
2427 |             console.print(f"[bold green]Best model:[/bold green] [cyan]{best_model[0]}/{best_model[1]}[/cyan] with {best_improvement:.2f}x improvement")
2428 |             
2429 |             # Show detailed results for the best model
2430 |             best_data = model_results[best_model]
2431 |             console.print("\n[bold cyan]Detailed Results for Best Model:[/bold cyan]")
2432 |             
2433 |             display_refinement_result(
2434 |                 best_data["result"], 
2435 |                 console=console,
2436 |                 visualization_level=SETTINGS["visualization_level"],
2437 |                 save_to_file=SETTINGS["save_results"],
2438 |                 output_dir=SETTINGS["output_dir"]
2439 |             )
2440 |         
2441 |         return model_results
2442 |     else:
2443 |         console.print("[yellow]No results available for comparison.[/yellow]")
2444 |         return None
2445 | 
2446 | 
2447 | async def demo_cost_optimization(
2448 |     gateway: Gateway, 
2449 |     tracker: CostTracker,
2450 |     target_tool: Optional[str] = None
2451 | ):
2452 |     """Demonstrate cost optimization techniques for documentation refinement."""
2453 |     console.print(Rule("[bold cyan]Cost Optimization Techniques[/bold cyan]", style="cyan"))
2454 |     
2455 |     # Choose a single tool to refine
2456 |     selected_tool = None
2457 |     
2458 |     if target_tool:
2459 |         # Check if specified tool exists
2460 |         tool_list = await gateway.mcp.list_tools()
2461 |         available_tools = [t.name for t in tool_list]
2462 |         
2463 |         if target_tool in available_tools:
2464 |             selected_tool = target_tool
2465 |         else:
2466 |             logger.warning(f"Specified tool '{target_tool}' not found", emoji_key="warning")
2467 |             console.print(f"[yellow]Warning:[/yellow] Specified tool '{target_tool}' not found. Selecting automatically.")
2468 |     
2469 |     # Auto-select if needed
2470 |     if not selected_tool:
2471 |         medium_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="medium")
2472 |         
2473 |         if medium_tools:
2474 |             selected_tool = medium_tools[0]
2475 |         else:
2476 |             # Fall back to any available tool
2477 |             simple_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="simple")
2478 |             
2479 |             if simple_tools:
2480 |                 selected_tool = simple_tools[0]
2481 |     
2482 |     if not selected_tool:
2483 |         logger.error("No suitable tools found for cost optimization demo", emoji_key="error")
2484 |         console.print("[bold red]Error:[/bold red] No suitable tools found for cost optimization demo.")
2485 |         return
2486 |     
2487 |     console.print(f"Selected tool for cost optimization: [cyan]{selected_tool}[/cyan]")
2488 |     
2489 |     # Create a table of optimization techniques
2490 |     optimization_table = Table(title="Cost Optimization Techniques", box=box.SIMPLE_HEAD)
2491 |     optimization_table.add_column("Technique", style="cyan")
2492 |     optimization_table.add_column("Description", style="white")
2493 |     optimization_table.add_column("Est. Savings", style="green")
2494 |     
2495 |     optimization_table.add_row(
2496 |         "Smaller Models",
2497 |         "Use smaller, faster models for initial iterations or simple tools",
2498 |         "50-80%"
2499 |     )
2500 |     optimization_table.add_row(
2501 |         "Reduced Iterations",
2502 |         "Single iteration can capture most improvements",
2503 |         "30-60%"
2504 |     )
2505 |     optimization_table.add_row(
2506 |         "Basic Validation",
2507 |         "Use 'basic' validation level instead of 'full'",
2508 |         "10-20%"
2509 |     )
2510 |     optimization_table.add_row(
2511 |         "Focused Strategies",
2512 |         "Custom test generation focused on important cases",
2513 |         "20-40%"
2514 |     )
2515 |     optimization_table.add_row(
2516 |         "Bulk Processing",
2517 |         "Refine multiple related tools at once",
2518 |         "30-50%"
2519 |     )
2520 |     optimization_table.add_row(
2521 |         "Skip Winnowing",
2522 |         "Disable winnowing for quick improvements",
2523 |         "5-10%"
2524 |     )
2525 |     
2526 |     console.print(optimization_table)
2527 |     
2528 |     # Define and display standard vs. optimized configurations
2529 |     standard_config = {
2530 |         "tool_names": [selected_tool],
2531 |         "max_iterations": 3,
2532 |         "refinement_model_config": {
2533 |             "provider": Provider.OPENAI.value,
2534 |             "model": "gpt-4.1",
2535 |             "temperature": 0.2,
2536 |         },
2537 |         "validation_level": "full",
2538 |         "enable_winnowing": True
2539 |     }
2540 |     
2541 |     optimized_config = {
2542 |         "tool_names": [selected_tool],
2543 |         "max_iterations": 1,
2544 |         "refinement_model_config": {
2545 |             "provider": Provider.OPENAI.value,
2546 |             "model": "gpt-4.1-mini",
2547 |             "temperature": 0.3,
2548 |         },
2549 |         "validation_level": "basic",
2550 |         "enable_winnowing": False,
2551 |         # Focused test generation to save costs
2552 |         "generation_config": {
2553 |             "positive_required_only": 2,
2554 |             "positive_optional_mix": 2,
2555 |             "negative_type": 2,
2556 |             "negative_required": 1,
2557 |             "negative_enum": 0,
2558 |             "negative_format": 0,
2559 |             "negative_range": 0,
2560 |             "negative_length": 0,
2561 |             "negative_pattern": 0,
2562 |             "edge_empty": 0,
2563 |             "edge_null": 0,
2564 |             "edge_boundary_min": 0,
2565 |             "edge_boundary_max": 0,
2566 |             "llm_realistic_combo": 2,
2567 |             "llm_ambiguity_probe": 1,
2568 |             "llm_simulation_based": 0
2569 |         }
2570 |     }
2571 |     
2572 |     # Compare costs
2573 |     standard_est_cost = 0.09  # 3 iterations with gpt-4.1
2574 |     optimized_est_cost = 0.015  # 1 iteration with gpt-4.1-mini and reduced tests
2575 |     savings_pct = ((standard_est_cost - optimized_est_cost) / standard_est_cost) * 100
2576 |     
2577 |     console.print(Panel(
2578 |         Group(
2579 |             "[bold]Standard Config:[/bold]",
2580 |             Syntax(json.dumps(standard_config, indent=2), "json", theme="default", line_numbers=False),
2581 |             f"[yellow]Estimated Cost: ${standard_est_cost:.3f}[/yellow]",
2582 |             "\n[bold]Optimized Config:[/bold]",
2583 |             Syntax(json.dumps(optimized_config, indent=2), "json", theme="default", line_numbers=False),
2584 |             f"[green]Estimated Cost: ${optimized_est_cost:.3f}[/green]",
2585 |             f"\n[bold cyan]Estimated Savings: {savings_pct:.1f}%[/bold cyan]"
2586 |         ),
2587 |         title="Cost Comparison",
2588 |         border_style="dim cyan",
2589 |         expand=False
2590 |     ))
2591 |     
2592 |     # Run the optimized configuration
2593 |     console.print("\n[bold cyan]Running Cost-Optimized Refinement:[/bold cyan]")
2594 |     detail_console.print(f"\n[bold]Starting cost-optimized refinement for {selected_tool}...[/bold]")
2595 |     
2596 |     # Add progress callback
2597 |     optimized_config["progress_callback"] = display_refinement_progress
2598 |     
2599 |     with Progress(
2600 |         TextColumn("[bold blue]{task.description}"),
2601 |         BarColumn(complete_style="green", finished_style="green"),
2602 |         TaskProgressColumn(),
2603 |         TimeElapsedColumn(),
2604 |         console=console,
2605 |         expand=True
2606 |     ) as progress:
2607 |         task_id = progress.add_task("[cyan]Running cost-optimized refinement...", total=100)
2608 |         
2609 |         # Execute the refinement
2610 |         start_time = time.time()
2611 |         try:
2612 |             result = await gateway.mcp.call_tool("refine_tool_documentation", optimized_config)
2613 |             
2614 |             # Simulate progress updates
2615 |             elapsed = 0
2616 |             while progress.tasks[task_id].completed < 100 and elapsed < 30:
2617 |                 progress.update(task_id, completed=min(95, elapsed * 3))  # Faster progress for optimized mode
2618 |                 await asyncio.sleep(0.5)
2619 |                 elapsed = time.time() - start_time
2620 |             
2621 |             progress.update(task_id, completed=100)
2622 |             
2623 |             # Track cost if available
2624 |             if isinstance(result, dict) and "total_refinement_cost" in result:
2625 |                 actual_cost = result.get("total_refinement_cost", 0.0)
2626 |                 tracker.add_generic_cost(
2627 |                     cost=actual_cost,
2628 |                     description=f"Cost-optimized refinement of {selected_tool}",
2629 |                     provider=optimized_config["refinement_model_config"]["provider"],
2630 |                     model=optimized_config["refinement_model_config"]["model"]
2631 |                 )
2632 |                 
2633 |                 # Compare estimated vs actual cost
2634 |                 console.print("[bold cyan]Cost Analysis:[/bold cyan]")
2635 |                 console.print(f"Estimated Cost: ${optimized_est_cost:.3f}")
2636 |                 console.print(f"Actual Cost: ${actual_cost:.3f}")
2637 |                 console.print(f"Actual Savings vs. Standard: {((standard_est_cost - actual_cost) / standard_est_cost) * 100:.1f}%")
2638 |             
2639 |             # Display the results
2640 |             display_refinement_result(
2641 |                 result, 
2642 |                 console=console,
2643 |                 visualization_level=SETTINGS["visualization_level"],
2644 |                 save_to_file=SETTINGS["save_results"],
2645 |                 output_dir=SETTINGS["output_dir"]
2646 |             )
2647 |             
2648 |             return result
2649 |             
2650 |         except Exception as e:
2651 |             progress.update(task_id, completed=100, description="[bold red]Refinement failed!")
2652 |             logger.error(f"Error during cost-optimized refinement: {e}", emoji_key="error", exc_info=True)
2653 |             console.print(f"[bold red]Error during cost-optimized refinement:[/bold red] {escape(str(e))}")
2654 |             return None
2655 | 
2656 | async def demo_practical_testing(
2657 |     gateway: Gateway, 
2658 |     tracker: CostTracker
2659 | ):
2660 |     """Demonstrate practical testing with flawed examples."""
2661 |     console.print(Rule("[bold cyan]Practical Testing with Flawed Tools[/bold cyan]", style="cyan"))
2662 |     
2663 |     # Check if we have flawed example tools
2664 |     created_tools = await create_flawed_example_tools(gateway.mcp)
2665 |     
2666 |     if not created_tools:
2667 |         logger.error("Failed to create flawed example tools", emoji_key="error")
2668 |         console.print("[bold red]Error:[/bold red] Failed to create flawed example tools for demonstration.")
2669 |         return
2670 |     
2671 |     console.print(f"Created {len(created_tools)} flawed example tools for practical testing:\n" + 
2672 |                  "\n".join([f"- [cyan]{name}[/cyan]" for name in created_tools]))
2673 |     
2674 |     # Get details on the intentional flaws
2675 |     flaws_table = Table(title="Intentional Documentation Flaws", box=box.ROUNDED)
2676 |     flaws_table.add_column("Tool", style="cyan")
2677 |     flaws_table.add_column("Flaw Type", style="yellow")
2678 |     flaws_table.add_column("Description", style="white")
2679 |     
2680 |     flaws_table.add_row(
2681 |         "flawed_process_text",
2682 |         "Ambiguous Description",
2683 |         "Description is vague and doesn't explain parameters."
2684 |     )
2685 |     flaws_table.add_row(
2686 |         "flawed_scrape_website",
2687 |         "Missing Parameter Descriptions",
2688 |         "Parameters in schema have no descriptions."
2689 |     )
2690 |     flaws_table.add_row(
2691 |         "flawed_data_processor",
2692 |         "Confusing Schema & Description Mismatch",
2693 |         "Description calls the tool 'analyzer' but name is 'processor'."
2694 |     )
2695 |     flaws_table.add_row(
2696 |         "flawed_product_search",
2697 |         "Misleading Examples",
2698 |         "Example shows incorrect parameter name 'sort_by' vs schema 'sort'."
2699 |     )
2700 |     flaws_table.add_row(
2701 |         "flawed_calculator",
2702 |         "Schema/Implementation Conflict",
2703 |         "Clear description but possible schema type confusion."
2704 |     )
2705 |     
2706 |     console.print(flaws_table)
2707 |     
2708 |     # Select a flawed tool to demonstrate refinement
2709 |     selected_tool = created_tools[0]  # Start with the first one
2710 |     console.print(f"\nSelected tool for demonstration: [cyan]{selected_tool}[/cyan]")
2711 |     
2712 |     # Show the original flawed tool definition
2713 |     tool_list = await gateway.mcp.list_tools()
2714 |     tool_def = next((t for t in tool_list if t.name == selected_tool), None)
2715 |     
2716 |     if tool_def and hasattr(tool_def, "inputSchema") and hasattr(tool_def, "description"):
2717 |         input_schema = getattr(tool_def, "inputSchema", {})
2718 |         description = getattr(tool_def, "description", "")
2719 |         
2720 |         console.print("[bold cyan]Original Flawed Tool Definition:[/bold cyan]")
2721 |         
2722 |         console.print(Panel(
2723 |             escape(description),
2724 |             title="Original Description",
2725 |             border_style="dim red",
2726 |             expand=False
2727 |         ))
2728 |         
2729 |         console.print(Panel(
2730 |             Syntax(json.dumps(input_schema, indent=2), "json", theme="default", line_numbers=False),
2731 |             title="Original Schema",
2732 |             border_style="dim red",
2733 |             expand=False
2734 |         ))
2735 |     
2736 |     # Run refinement on the flawed tool
2737 |     console.print("\n[bold cyan]Running Refinement on Flawed Tool:[/bold cyan]")
2738 |     detail_console.print(f"\n[bold]Starting refinement for flawed tool {selected_tool}...[/bold]")
2739 |     
2740 |     params = {
2741 |         "tool_names": [selected_tool],
2742 |         "max_iterations": 2,
2743 |         "refinement_model_config": {
2744 |             "provider": Provider.OPENAI.value,
2745 |             "model": "gpt-4.1",  # Use the best model for these challenging cases
2746 |             "temperature": 0.2,
2747 |         },
2748 |         "validation_level": "full",
2749 |         "enable_winnowing": True,
2750 |         "progress_callback": display_refinement_progress,
2751 |     }
2752 |     
2753 |     with Progress(
2754 |         TextColumn("[bold blue]{task.description}"),
2755 |         BarColumn(complete_style="green", finished_style="green"),
2756 |         TaskProgressColumn(),
2757 |         TimeElapsedColumn(),
2758 |         console=console,
2759 |         expand=True
2760 |     ) as progress:
2761 |         task_id = progress.add_task("[cyan]Refining flawed tool...", total=100)
2762 |         
2763 |         # Execute the refinement
2764 |         start_time = time.time()
2765 |         try:
2766 |             result = await gateway.mcp.call_tool("refine_tool_documentation", params)
2767 |             
2768 |             # Simulate progress updates
2769 |             elapsed = 0
2770 |             while progress.tasks[task_id].completed < 100 and elapsed < 60:
2771 |                 progress.update(task_id, completed=min(95, elapsed * 1.5))
2772 |                 await asyncio.sleep(0.5)
2773 |                 elapsed = time.time() - start_time
2774 |             
2775 |             progress.update(task_id, completed=100)
2776 |             
2777 |             # Track cost if available
2778 |             if isinstance(result, dict) and "total_refinement_cost" in result:
2779 |                 tracker.add_generic_cost(
2780 |                     cost=result.get("total_refinement_cost", 0.0),
2781 |                     description=f"Flawed tool refinement of {selected_tool}",
2782 |                     provider=Provider.OPENAI.value,
2783 |                     model="gpt-4.1"
2784 |                 )
2785 |             
2786 |             # Display the results
2787 |             display_refinement_result(
2788 |                 result, 
2789 |                 console=console,
2790 |                 visualization_level=SETTINGS["visualization_level"],
2791 |                 save_to_file=SETTINGS["save_results"],
2792 |                 output_dir=SETTINGS["output_dir"]
2793 |             )
2794 |             
2795 |             # Highlight identified flaws
2796 |             refined_tools = result.get("refined_tools", [])
2797 |             target_tool_result = next((t for t in refined_tools if t.get("tool_name") == selected_tool), None)
2798 |             
2799 |             if target_tool_result:
2800 |                 identified_flaws = []
2801 |                 for iter_data in target_tool_result.get("iterations", []):
2802 |                     analysis = iter_data.get("analysis", {})
2803 |                     if analysis:
2804 |                         flaws = analysis.get("identified_flaw_categories", [])
2805 |                         for flaw in flaws:
2806 |                             if flaw not in identified_flaws:
2807 |                                 identified_flaws.append(flaw)
2808 |                 
2809 |                 if identified_flaws:
2810 |                     console.print("\n[bold cyan]Identified Documentation Flaws:[/bold cyan]")
2811 |                     flaw_details = {
2812 |                         "MISSING_DESCRIPTION": "Documentation is missing key information",
2813 |                         "AMBIGUOUS_DESCRIPTION": "Description is unclear or can be interpreted in multiple ways",
2814 |                         "INCORRECT_DESCRIPTION": "Description contains incorrect information",
2815 |                         "MISSING_SCHEMA_CONSTRAINT": "Schema is missing important constraints",
2816 |                         "INCORRECT_SCHEMA_CONSTRAINT": "Schema contains incorrect constraints",
2817 |                         "OVERLY_RESTRICTIVE_SCHEMA": "Schema is unnecessarily restrictive",
2818 |                         "TYPE_CONFUSION": "Parameter types are inconsistent or unclear",
2819 |                         "MISSING_EXAMPLE": "Documentation lacks necessary examples",
2820 |                         "MISLEADING_EXAMPLE": "Examples provided are incorrect or misleading",
2821 |                         "INCOMPLETE_EXAMPLE": "Examples are present but insufficient",
2822 |                         "PARAMETER_DEPENDENCY_UNCLEAR": "Dependencies between parameters are not explained",
2823 |                         "CONFLICTING_CONSTRAINTS": "Schema contains contradictory constraints",
2824 |                         "AGENT_FORMULATION_ERROR": "Documentation hinders LLM agent's ability to use the tool",
2825 |                         "SCHEMA_PREVALIDATION_FAILURE": "Schema validation issues",
2826 |                         "TOOL_EXECUTION_ERROR": "Issues with tool execution",
2827 |                         "UNKNOWN": "Unspecified documentation issue"
2828 |                     }
2829 |                     
2830 |                     for flaw in identified_flaws:
2831 |                         console.print(f"- [bold yellow]{flaw}[/bold yellow]: {flaw_details.get(flaw, 'No description available')}")
2832 |             
2833 |             return result
2834 |             
2835 |         except Exception as e:
2836 |             progress.update(task_id, completed=100, description="[bold red]Refinement failed!")
2837 |             logger.error(f"Error during flawed tool refinement: {e}", emoji_key="error", exc_info=True)
2838 |             console.print(f"[bold red]Error during flawed tool refinement:[/bold red] {escape(str(e))}")
2839 |             return None
2840 | 
2841 | 
2842 | async def main():
2843 |     """Main entry point for the demo."""
2844 |     try:
2845 |         print("Starting demo...")
2846 |         logger.debug("Starting demo...")
2847 |         args = parse_arguments()
2848 |         print(f"Args parsed: {args}")
2849 |         logger.debug(f"Args parsed: {args}")
2850 |         
2851 |         # Set up gateway
2852 |         print("Setting up gateway...")
2853 |         gateway = await setup_gateway_and_tools(create_flawed_tools=args.create_flawed)  # noqa: F841
2854 |         print("Gateway setup complete")
2855 |         
2856 |         # Initialize cost tracker
2857 |         tracker = CostTracker(limit=SETTINGS["cost_limit"])
2858 |         
2859 |         # Check if the tool was successfully registered
2860 |         print("Checking if tool is registered...")
2861 |         tool_list = await mcp.list_tools()
2862 |         available_tools = [t.name for t in tool_list]
2863 |         print(f"Available tools: {available_tools}")
2864 |         
2865 |         if "refine_tool_documentation" in available_tools:
2866 |             print("Tool is available, proceeding with demo")
2867 |             logger.info("Tool successfully registered, proceeding with demo", emoji_key="success")
2868 |             
2869 |             # Run the selected demo based on CLI arguments
2870 |             print(f"Running demo: {args.demo}")
2871 |             
2872 |             # Select a demo based on specified arguments
2873 |             if args.demo == "single" or args.demo == "all":
2874 |                 print("Running single tool refinement demo")
2875 |                 result = await demo_single_tool_refinement(
2876 |                     gateway, 
2877 |                     tracker,
2878 |                     target_tool=args.tool,
2879 |                     refinement_provider=args.provider,
2880 |                     refinement_model=args.model,
2881 |                     max_iterations=args.iterations
2882 |                 )
2883 |                 if result:
2884 |                     logger.success("Single tool refinement demo completed", emoji_key="success")
2885 |             
2886 |             elif args.demo == "multi":
2887 |                 print("Running multi-tool refinement demo")
2888 |                 result = await demo_multi_tool_refinement(
2889 |                     gateway, 
2890 |                     tracker,
2891 |                     target_tools=[args.tool] if args.tool else None,
2892 |                     refinement_provider=args.provider,
2893 |                     refinement_model=args.model,
2894 |                     max_iterations=args.iterations
2895 |                 )
2896 |                 if result:
2897 |                     logger.success("Multi-tool refinement demo completed", emoji_key="success")
2898 |                     
2899 |             elif args.demo == "custom-testing":
2900 |                 print("Running custom test generation demo")
2901 |                 result = await demo_custom_test_generation(
2902 |                     gateway, 
2903 |                     tracker,
2904 |                     target_tool=args.tool,
2905 |                     refinement_provider=args.provider,
2906 |                     refinement_model=args.model,
2907 |                     max_iterations=args.iterations
2908 |                 )
2909 |                 if result:
2910 |                     logger.success("Custom test generation demo completed", emoji_key="success")
2911 |                     
2912 |             elif args.demo == "optimize":
2913 |                 print("Running cost optimization demo")
2914 |                 result = await demo_cost_optimization(
2915 |                     gateway, 
2916 |                     tracker,
2917 |                     target_tool=args.tool
2918 |                 )
2919 |                 if result:
2920 |                     logger.success("Cost optimization demo completed", emoji_key="success")
2921 |                     
2922 |             elif args.demo == "all-tools":
2923 |                 print("Running all-tools refinement demo")
2924 |                 result = await demo_all_tools_refinement(
2925 |                     gateway, 
2926 |                     tracker,
2927 |                     refinement_provider=args.provider,
2928 |                     refinement_model=args.model,
2929 |                     max_iterations=args.iterations
2930 |                 )
2931 |                 if result:
2932 |                     logger.success("All-tools refinement demo completed", emoji_key="success")
2933 |                     
2934 |             elif args.demo == "schema-focus":
2935 |                 print("Running schema-focused refinement demo")
2936 |                 result = await demo_schema_focused_refinement(
2937 |                     gateway, 
2938 |                     tracker,
2939 |                     target_tool=args.tool,
2940 |                     refinement_provider=args.provider,
2941 |                     refinement_model=args.model
2942 |                 )
2943 |                 if result:
2944 |                     logger.success("Schema-focused refinement demo completed", emoji_key="success")
2945 |                     
2946 |             elif args.demo == "practical":
2947 |                 print("Running practical testing demo")
2948 |                 result = await demo_practical_testing(gateway, tracker)
2949 |                 if result:
2950 |                     logger.success("Practical testing demo completed", emoji_key="success")
2951 |                     
2952 |             elif args.demo == "model-comparison":
2953 |                 print("Running model comparison demo")
2954 |                 result = await demo_model_comparison(
2955 |                     gateway, 
2956 |                     tracker,
2957 |                     target_tool=args.tool
2958 |                 )
2959 |                 if result:
2960 |                     logger.success("Model comparison demo completed", emoji_key="success")
2961 |             
2962 |             elif args.demo == "all":
2963 |                 print("Running all demos")
2964 |                 console.print(Panel(
2965 |                     "Running all demos in sequence. This may take some time.",
2966 |                     title="ℹ️ Running All Demos",
2967 |                     border_style="cyan",
2968 |                     expand=False
2969 |                 ))
2970 |                 
2971 |                 # Run each demo in sequence
2972 |                 demos = [
2973 |                     demo_single_tool_refinement(gateway, tracker, target_tool=args.tool, 
2974 |                                                refinement_provider=args.provider, 
2975 |                                                refinement_model=args.model,
2976 |                                                max_iterations=args.iterations),
2977 |                     demo_multi_tool_refinement(gateway, tracker, 
2978 |                                               refinement_provider=args.provider, 
2979 |                                               refinement_model=args.model,
2980 |                                               max_iterations=args.iterations),
2981 |                     demo_custom_test_generation(gateway, tracker, target_tool=args.tool, 
2982 |                                                refinement_provider=args.provider, 
2983 |                                                refinement_model=args.model),
2984 |                     demo_cost_optimization(gateway, tracker, target_tool=args.tool),
2985 |                     demo_schema_focused_refinement(gateway, tracker, target_tool=args.tool, 
2986 |                                                  refinement_provider=args.provider, 
2987 |                                                  refinement_model=args.model),
2988 |                     demo_model_comparison(gateway, tracker, target_tool=args.tool)
2989 |                 ]
2990 |                 
2991 |                 if args.create_flawed:
2992 |                     demos.append(demo_practical_testing(gateway, tracker))
2993 |                 
2994 |                 for demo_coro in demos:
2995 |                     try:
2996 |                         await demo_coro
2997 |                     except Exception as e:
2998 |                         logger.error(f"Error running demo: {e}", emoji_key="error", exc_info=True)
2999 |                         console.print(f"[bold red]Error running demo:[/bold red] {escape(str(e))}")
3000 |                 
3001 |                 logger.success("All demos completed", emoji_key="success")
3002 |             
3003 |             else:
3004 |                 print("No valid demo specified")
3005 |                 console.print(Panel(
3006 |                     f"The specified demo '{args.demo}' is not recognized.\n"
3007 |                     "Available demos: all, single, multi, custom-testing, optimize, all-tools, schema-focus, practical, model-comparison",
3008 |                     title="⚠️ Invalid Demo Selection",
3009 |                     border_style="yellow",
3010 |                     expand=False
3011 |                 ))
3012 |         else:
3013 |             print("Tool is not available")
3014 |             # Tool not available, show error message
3015 |             console.print(Panel(
3016 |                 "This demo requires the docstring_refiner tool to be properly registered.\n"
3017 |                 "Due to known issues with Pydantic definitions, the tool can't be registered in this demo.\n\n"
3018 |                 "Check that you have the correct version of the Ultimate MCP Server and dependencies installed.",
3019 |                 title="⚠️ Demo Requirements Not Met",
3020 |                 border_style="red",
3021 |                 expand=False
3022 |             ))
3023 |         
3024 |         # Display cost summary
3025 |         console.print(Rule("[bold green]Total Demo Cost Summary[/bold green]", style="green"))
3026 |         tracker.display_costs(console=console)
3027 |         
3028 |         logger.info("Docstring Refiner Demo completed successfully", emoji_key="success")
3029 |         console.print(Rule("[bold green]Demo Complete[/bold green]", style="green"))
3030 |         print("Demo completed successfully")
3031 |         
3032 |     except Exception as e:
3033 |         print(f"Error in main: {type(e).__name__}: {str(e)}")
3034 |         import traceback
3035 |         traceback.print_exc()
3036 |         return 1
3037 |     
3038 |     return 0
3039 | 
3040 | 
3041 | if __name__ == "__main__":
3042 |     exit_code = asyncio.run(main())
3043 |     sys.exit(exit_code)            
```