This is page 38 of 45. Use http://codebase.md/dicklesworthstone/llm_gateway_mcp_server?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .cursorignore
├── .env.example
├── .envrc
├── .gitignore
├── additional_features.md
├── check_api_keys.py
├── completion_support.py
├── comprehensive_test.py
├── docker-compose.yml
├── Dockerfile
├── empirically_measured_model_speeds.json
├── error_handling.py
├── example_structured_tool.py
├── examples
│ ├── __init__.py
│ ├── advanced_agent_flows_using_unified_memory_system_demo.py
│ ├── advanced_extraction_demo.py
│ ├── advanced_unified_memory_system_demo.py
│ ├── advanced_vector_search_demo.py
│ ├── analytics_reporting_demo.py
│ ├── audio_transcription_demo.py
│ ├── basic_completion_demo.py
│ ├── cache_demo.py
│ ├── claude_integration_demo.py
│ ├── compare_synthesize_demo.py
│ ├── cost_optimization.py
│ ├── data
│ │ ├── sample_event.txt
│ │ ├── Steve_Jobs_Introducing_The_iPhone_compressed.md
│ │ └── Steve_Jobs_Introducing_The_iPhone_compressed.mp3
│ ├── docstring_refiner_demo.py
│ ├── document_conversion_and_processing_demo.py
│ ├── entity_relation_graph_demo.py
│ ├── filesystem_operations_demo.py
│ ├── grok_integration_demo.py
│ ├── local_text_tools_demo.py
│ ├── marqo_fused_search_demo.py
│ ├── measure_model_speeds.py
│ ├── meta_api_demo.py
│ ├── multi_provider_demo.py
│ ├── ollama_integration_demo.py
│ ├── prompt_templates_demo.py
│ ├── python_sandbox_demo.py
│ ├── rag_example.py
│ ├── research_workflow_demo.py
│ ├── sample
│ │ ├── article.txt
│ │ ├── backprop_paper.pdf
│ │ ├── buffett.pdf
│ │ ├── contract_link.txt
│ │ ├── legal_contract.txt
│ │ ├── medical_case.txt
│ │ ├── northwind.db
│ │ ├── research_paper.txt
│ │ ├── sample_data.json
│ │ └── text_classification_samples
│ │ ├── email_classification.txt
│ │ ├── news_samples.txt
│ │ ├── product_reviews.txt
│ │ └── support_tickets.txt
│ ├── sample_docs
│ │ └── downloaded
│ │ └── attention_is_all_you_need.pdf
│ ├── sentiment_analysis_demo.py
│ ├── simple_completion_demo.py
│ ├── single_shot_synthesis_demo.py
│ ├── smart_browser_demo.py
│ ├── sql_database_demo.py
│ ├── sse_client_demo.py
│ ├── test_code_extraction.py
│ ├── test_content_detection.py
│ ├── test_ollama.py
│ ├── text_classification_demo.py
│ ├── text_redline_demo.py
│ ├── tool_composition_examples.py
│ ├── tournament_code_demo.py
│ ├── tournament_text_demo.py
│ ├── unified_memory_system_demo.py
│ ├── vector_search_demo.py
│ ├── web_automation_instruction_packs.py
│ └── workflow_delegation_demo.py
├── LICENSE
├── list_models.py
├── marqo_index_config.json.example
├── mcp_protocol_schema_2025-03-25_version.json
├── mcp_python_lib_docs.md
├── mcp_tool_context_estimator.py
├── model_preferences.py
├── pyproject.toml
├── quick_test.py
├── README.md
├── resource_annotations.py
├── run_all_demo_scripts_and_check_for_errors.py
├── storage
│ └── smart_browser_internal
│ ├── locator_cache.db
│ ├── readability.js
│ └── storage_state.enc
├── test_client.py
├── test_connection.py
├── TEST_README.md
├── test_sse_client.py
├── test_stdio_client.py
├── tests
│ ├── __init__.py
│ ├── conftest.py
│ ├── integration
│ │ ├── __init__.py
│ │ └── test_server.py
│ ├── manual
│ │ ├── test_extraction_advanced.py
│ │ └── test_extraction.py
│ └── unit
│ ├── __init__.py
│ ├── test_cache.py
│ ├── test_providers.py
│ └── test_tools.py
├── TODO.md
├── tool_annotations.py
├── tools_list.json
├── ultimate_mcp_banner.webp
├── ultimate_mcp_logo.webp
├── ultimate_mcp_server
│ ├── __init__.py
│ ├── __main__.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── commands.py
│ │ ├── helpers.py
│ │ └── typer_cli.py
│ ├── clients
│ │ ├── __init__.py
│ │ ├── completion_client.py
│ │ └── rag_client.py
│ ├── config
│ │ └── examples
│ │ └── filesystem_config.yaml
│ ├── config.py
│ ├── constants.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── evaluation
│ │ │ ├── base.py
│ │ │ └── evaluators.py
│ │ ├── providers
│ │ │ ├── __init__.py
│ │ │ ├── anthropic.py
│ │ │ ├── base.py
│ │ │ ├── deepseek.py
│ │ │ ├── gemini.py
│ │ │ ├── grok.py
│ │ │ ├── ollama.py
│ │ │ ├── openai.py
│ │ │ └── openrouter.py
│ │ ├── server.py
│ │ ├── state_store.py
│ │ ├── tournaments
│ │ │ ├── manager.py
│ │ │ ├── tasks.py
│ │ │ └── utils.py
│ │ └── ums_api
│ │ ├── __init__.py
│ │ ├── ums_database.py
│ │ ├── ums_endpoints.py
│ │ ├── ums_models.py
│ │ └── ums_services.py
│ ├── exceptions.py
│ ├── graceful_shutdown.py
│ ├── services
│ │ ├── __init__.py
│ │ ├── analytics
│ │ │ ├── __init__.py
│ │ │ ├── metrics.py
│ │ │ └── reporting.py
│ │ ├── cache
│ │ │ ├── __init__.py
│ │ │ ├── cache_service.py
│ │ │ ├── persistence.py
│ │ │ ├── strategies.py
│ │ │ └── utils.py
│ │ ├── cache.py
│ │ ├── document.py
│ │ ├── knowledge_base
│ │ │ ├── __init__.py
│ │ │ ├── feedback.py
│ │ │ ├── manager.py
│ │ │ ├── rag_engine.py
│ │ │ ├── retriever.py
│ │ │ └── utils.py
│ │ ├── prompts
│ │ │ ├── __init__.py
│ │ │ ├── repository.py
│ │ │ └── templates.py
│ │ ├── prompts.py
│ │ └── vector
│ │ ├── __init__.py
│ │ ├── embeddings.py
│ │ └── vector_service.py
│ ├── tool_token_counter.py
│ ├── tools
│ │ ├── __init__.py
│ │ ├── audio_transcription.py
│ │ ├── base.py
│ │ ├── completion.py
│ │ ├── docstring_refiner.py
│ │ ├── document_conversion_and_processing.py
│ │ ├── enhanced-ums-lookbook.html
│ │ ├── entity_relation_graph.py
│ │ ├── excel_spreadsheet_automation.py
│ │ ├── extraction.py
│ │ ├── filesystem.py
│ │ ├── html_to_markdown.py
│ │ ├── local_text_tools.py
│ │ ├── marqo_fused_search.py
│ │ ├── meta_api_tool.py
│ │ ├── ocr_tools.py
│ │ ├── optimization.py
│ │ ├── provider.py
│ │ ├── pyodide_boot_template.html
│ │ ├── python_sandbox.py
│ │ ├── rag.py
│ │ ├── redline-compiled.css
│ │ ├── sentiment_analysis.py
│ │ ├── single_shot_synthesis.py
│ │ ├── smart_browser.py
│ │ ├── sql_databases.py
│ │ ├── text_classification.py
│ │ ├── text_redline_tools.py
│ │ ├── tournament.py
│ │ ├── ums_explorer.html
│ │ └── unified_memory_system.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── async_utils.py
│ │ ├── display.py
│ │ ├── logging
│ │ │ ├── __init__.py
│ │ │ ├── console.py
│ │ │ ├── emojis.py
│ │ │ ├── formatter.py
│ │ │ ├── logger.py
│ │ │ ├── panels.py
│ │ │ ├── progress.py
│ │ │ └── themes.py
│ │ ├── parse_yaml.py
│ │ ├── parsing.py
│ │ ├── security.py
│ │ └── text.py
│ └── working_memory_api.py
├── unified_memory_system_technical_analysis.md
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/examples/docstring_refiner_demo.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python
2 | """
3 | Advanced Docstring Refiner Demo for Ultimate MCP Server.
4 |
5 | This script demonstrates the autonomous documentation refinement tool that analyzes, tests, and improves
6 | documentation (descriptions, schemas, examples) for MCP tools, enhancing their usability with LLM agents.
7 | The demo showcases multiple refinement approaches, and visualization techniques, while providing
8 | comprehensive performance metrics and cost analysis.
9 |
10 | Features:
11 | - Single and multi-tool refinement demonstrations
12 | - Custom test generation strategy configuration
13 | - Provider fallbacks and model selection optimization
14 | - Visual diffs of documentation improvements
15 | - Cost estimation and optimization techniques
16 | - Schema-focused refinement capabilities
17 | - Model comparison and performance analysis
18 | - Practical testing with intentionally flawed tools
19 | - Adaptive refinement based on tool complexity
20 |
21 | Command-line Arguments:
22 | --demo {all,single,multi,custom-testing,optimize,all-tools,schema-focus,practical,model-comparison}:
23 | Specific demo to run (default: all)
24 |
25 | --tool TOOL:
26 | Specify a specific tool to refine (bypasses automatic selection)
27 |
28 | --iterations N:
29 | Number of refinement iterations to run
30 |
31 | --model MODEL:
32 | Specify a model to use for refinement (e.g., gpt-4.1-mini, claude-3-5-haiku)
33 |
34 | --provider PROVIDER:
35 | Specify a provider to use for refinement (e.g., openai, anthropic)
36 |
37 | --visualize {minimal,standard,full}:
38 | Control visualization detail level (default: standard)
39 |
40 | --cost-limit FLOAT:
41 | Maximum cost limit in USD (default: 5.0)
42 |
43 | --output-dir DIR:
44 | Directory to save results
45 |
46 | --save-results:
47 | Save refinement results to files
48 |
49 | --verbose, -v:
50 | Increase output verbosity
51 |
52 | --create-flawed:
53 | Create flawed example tools for practical testing
54 |
55 | Demo Modes:
56 | single:
57 | Demonstrates refining a single tool with detailed progress tracking
58 | and visualization of description, schema, and example improvements.
59 |
60 | multi:
61 | Demonstrates refining multiple tools simultaneously, showcasing parallel
62 | processing and cross-tool analysis of documentation patterns.
63 |
64 | custom-testing:
65 | Demonstrates advanced test generation strategies with fine-grained control
66 | over the types and quantities of test cases.
67 |
68 | optimize:
69 | Showcases cost optimization techniques for large-scale refinement,
70 | comparing standard and cost-optimized approaches.
71 |
72 | all-tools:
73 | Demonstrates the capability to refine all available tools in a single run,
74 | with resource management and prioritization features.
75 |
76 | schema-focus:
77 | Focuses specifically on schema improvements, with detailed visualization
78 | of JSON schema patches and validation improvements.
79 |
80 | practical:
81 | Creates and refines intentionally flawed example tools to demonstrate
82 | the system's ability to identify and fix common documentation issues.
83 |
84 | model-comparison:
85 | Compares the performance of different LLM models for refinement tasks,
86 | with detailed metrics on success rates, cost, and processing time.
87 |
88 | Dependencies:
89 | - ultimate: Core framework for interfacing with LLMs and tools
90 | - rich: For beautiful console output and visualizations
91 | - asyncio: For asynchronous processing of refinement operations
92 | - Required API keys for providers (OPENAI_API_KEY, ANTHROPIC_API_KEY, etc.)
93 |
94 | Usage Examples:
95 | # Run all demos with standard visualization
96 | python docstring_refiner_demo.py
97 |
98 | # Run just the single tool refinement demo with a specific tool
99 | python docstring_refiner_demo.py --demo single --tool generate_completion
100 |
101 | # Run the model comparison demo with full visualization and save results
102 | python docstring_refiner_demo.py --demo model-comparison --visualize full --save-results
103 |
104 | # Run the multi-tool demo with a specific model and cost limit
105 | python docstring_refiner_demo.py --demo multi --model gpt-4.1-mini --cost-limit 2.5
106 |
107 | # Create and test flawed example tools
108 | python docstring_refiner_demo.py --demo practical --create-flawed
109 |
110 | Return Values:
111 | The script returns exit code 0 on successful completion, or exit code 1 if
112 | critical errors occur during execution.
113 |
114 | Methods:
115 | The script contains various helper functions and demo methods:
116 |
117 | setup_gateway_and_tools(): Initializes the Gateway and ensures required tools are available
118 |
119 | get_suitable_tools(): Finds appropriate tools for demonstrations based on complexity
120 |
121 | display_refinement_progress(): Callback for tracking refinement progress events
122 |
123 | create_text_diff(), create_side_by_side_diff(): Generate visual diffs of documentation changes
124 |
125 | display_refinement_result(): Formats and displays refinement results with appropriate detail level
126 |
127 | create_flawed_example_tools(): Creates example tools with intentional documentation flaws
128 |
129 | Demo functions (demo_*): Implement specific demonstration scenarios
130 |
131 | Implementation Notes:
132 | - The script uses the global MCP instance from the Gateway for all tool operations
133 | - Refinement operations are tracked through a CostTracker instance for budget management
134 | - All demonstrations include graceful fallbacks for providers and models
135 | - Progress updates are displayed using Rich's Progress components
136 | - Results can be saved to files for later analysis or integration
137 |
138 | Author:
139 | Ultimate MCP Server Team
140 |
141 | Version:
142 | 1.0.0
143 | """
144 |
145 | import argparse
146 | import asyncio
147 | import datetime
148 | import difflib
149 | import json
150 | import random
151 | import sys
152 | import tempfile
153 | import time
154 | from pathlib import Path
155 | from typing import Dict, List, Optional
156 |
157 | # Add project root to path for imports when running as script
158 | sys.path.insert(0, str(Path(__file__).parent.parent))
159 |
160 | # Rich for beautiful console output
161 | from rich import box
162 | from rich.console import Console, Group
163 | from rich.markup import escape
164 | from rich.panel import Panel
165 | from rich.progress import (
166 | BarColumn,
167 | Progress,
168 | SpinnerColumn,
169 | TaskProgressColumn,
170 | TextColumn,
171 | TimeElapsedColumn,
172 | TimeRemainingColumn,
173 | )
174 | from rich.rule import Rule
175 | from rich.syntax import Syntax
176 | from rich.table import Table
177 | from rich.tree import Tree
178 |
179 | # Project imports
180 | from ultimate_mcp_server.constants import Provider
181 | from ultimate_mcp_server.core.server import Gateway
182 | from ultimate_mcp_server.tools.base import with_error_handling
183 | from ultimate_mcp_server.tools.docstring_refiner import (
184 | RefinementProgressEvent,
185 | )
186 | from ultimate_mcp_server.utils import get_logger
187 | from ultimate_mcp_server.utils.display import CostTracker
188 | from ultimate_mcp_server.utils.logging.console import console
189 |
190 | # Initialize logger
191 | logger = get_logger("example.docstring_refiner")
192 |
193 | # Create a separate console for detailed output
194 | detail_console = Console(highlight=False)
195 |
196 | # Global MCP instance (will be populated from Gateway)
197 | mcp = None
198 |
199 | # Global settings that can be modified by command line args
200 | SETTINGS = {
201 | "output_dir": None,
202 | "visualization_level": "standard", # "minimal", "standard", "full"
203 | "cost_limit": 5.0, # USD
204 | "preferred_providers": [Provider.OPENAI.value, Provider.ANTHROPIC.value, Provider.GEMINI.value],
205 | "fallback_providers": [Provider.DEEPSEEK.value, Provider.GROK.value],
206 | "save_results": False,
207 | "verbose": False,
208 | }
209 |
210 |
211 | def parse_arguments():
212 | """Parse command line arguments for the demo."""
213 | parser = argparse.ArgumentParser(
214 | description="Advanced Docstring Refiner Demo for Ultimate MCP Server",
215 | formatter_class=argparse.RawDescriptionHelpFormatter,
216 | epilog="""Available demos:
217 | all - Run all demos (default)
218 | single - Single tool refinement
219 | multi - Multi-tool refinement
220 | custom-testing - Custom test generation strategies
221 | optimize - Cost optimization techniques
222 | all-tools - Refine all available tools
223 | schema-focus - Focus on schema improvements
224 | practical - Practical testing with flawed tools
225 | model-comparison - Compare different LLM models for refinement
226 | """
227 | )
228 |
229 | # Demo selection
230 | parser.add_argument(
231 | "--demo",
232 | default="all",
233 | choices=[
234 | "all", "single", "multi", "custom-testing", "optimize",
235 | "all-tools", "schema-focus", "practical",
236 | "model-comparison"
237 | ],
238 | help="Specific demo to run (default: all)"
239 | )
240 |
241 | # Tool selection
242 | parser.add_argument(
243 | "--tool",
244 | help="Specify a specific tool to refine (bypasses automatic selection)"
245 | )
246 |
247 | # Iteration control
248 | parser.add_argument(
249 | "--iterations",
250 | type=int,
251 | default=None,
252 | help="Number of refinement iterations to run"
253 | )
254 |
255 | # Model specification
256 | parser.add_argument(
257 | "--model",
258 | default=None,
259 | help="Specify a model to use for refinement (e.g., gpt-4.1-mini, claude-3-5-haiku)"
260 | )
261 |
262 | # Provider specification
263 | parser.add_argument(
264 | "--provider",
265 | default=None,
266 | help=f"Specify a provider to use for refinement (e.g., {Provider.OPENAI.value}, {Provider.ANTHROPIC.value})"
267 | )
268 |
269 | # Visualization options
270 | parser.add_argument(
271 | "--visualize",
272 | choices=["minimal", "standard", "full"],
273 | default="standard",
274 | help="Control visualization detail level"
275 | )
276 |
277 | # Cost limit
278 | parser.add_argument(
279 | "--cost-limit",
280 | type=float,
281 | default=5.0,
282 | help="Maximum cost limit in USD"
283 | )
284 |
285 | # Output directory
286 | parser.add_argument(
287 | "--output-dir",
288 | help="Directory to save results"
289 | )
290 |
291 | # Save results
292 | parser.add_argument(
293 | "--save-results",
294 | action="store_true",
295 | help="Save refinement results to files"
296 | )
297 |
298 | # Verbosity
299 | parser.add_argument(
300 | "-v", "--verbose",
301 | action="store_true",
302 | help="Increase output verbosity"
303 | )
304 |
305 | # Create flawed tools for testing
306 | parser.add_argument(
307 | "--create-flawed",
308 | action="store_true",
309 | help="Create flawed example tools for practical testing"
310 | )
311 |
312 | args = parser.parse_args()
313 |
314 | # Update settings
315 | SETTINGS["visualization_level"] = args.visualize
316 | SETTINGS["cost_limit"] = args.cost_limit
317 | SETTINGS["save_results"] = args.save_results
318 | SETTINGS["verbose"] = args.verbose
319 |
320 | if args.output_dir:
321 | output_dir = Path(args.output_dir)
322 | output_dir.mkdir(parents=True, exist_ok=True)
323 | SETTINGS["output_dir"] = output_dir
324 |
325 | return args
326 |
327 |
328 | async def setup_gateway_and_tools(create_flawed_tools=False):
329 | """Set up the gateway and ensure docstring refiner tool is available."""
330 | global mcp
331 | logger.debug("Initializing Gateway for docstring refiner demo...")
332 | logger.info("Initializing Gateway for docstring refiner demo...", emoji_key="start")
333 |
334 | # Create Gateway instance with all tools
335 | logger.debug("Creating Gateway instance with all tools")
336 | gateway = Gateway("docstring-refiner-demo", register_tools=True) # Register all tools, not just minimal tools
337 |
338 | # Initialize providers (needed for the tool to function)
339 | try:
340 | logger.debug("Initializing providers...")
341 | await gateway._initialize_providers()
342 | logger.success("Successfully initialized providers", emoji_key="success")
343 | logger.debug("Successfully initialized providers")
344 | except Exception as e:
345 | logger.error(f"Error initializing providers: {e}", emoji_key="error", exc_info=True)
346 | logger.exception("Error initializing providers")
347 | console.print(Panel(
348 | f"Error initializing providers: {escape(str(e))}\n\n"
349 | "Check that your API keys are set correctly in environment variables:\n"
350 | "- OPENAI_API_KEY\n"
351 | "- ANTHROPIC_API_KEY\n"
352 | "- GEMINI_API_KEY\n",
353 | title="❌ Provider Initialization Failed",
354 | border_style="red",
355 | expand=False
356 | ))
357 | # Continue anyway, as some providers might still work
358 |
359 | # Store the MCP server instance
360 | mcp = gateway.mcp
361 | logger.debug("Stored MCP server instance")
362 |
363 | # Display available providers with available models
364 | logger.debug("Getting provider information")
365 | provider_tree = Tree("[bold cyan]Available Providers & Models[/bold cyan]")
366 | provider_info = []
367 |
368 | for provider_name, provider in gateway.providers.items():
369 | if provider:
370 | try:
371 | models = await provider.list_models()
372 | provider_branch = provider_tree.add(f"[yellow]{provider_name}[/yellow]")
373 |
374 | # Group models by category/capability
375 | categorized_models = {}
376 | for model in models:
377 | model_id = model.get("id", "unknown")
378 | if "4" in model_id:
379 | category = "GPT-4 Family"
380 | elif "3" in model_id:
381 | category = "GPT-3 Family"
382 | elif "claude" in model_id.lower():
383 | category = "Claude Family"
384 | elif "gemini" in model_id.lower():
385 | category = "Gemini Family"
386 | elif "deepseek" in model_id.lower():
387 | category = "DeepSeek Family"
388 | else:
389 | category = "Other Models"
390 |
391 | if category not in categorized_models:
392 | categorized_models[category] = []
393 | categorized_models[category].append(model_id)
394 |
395 | # Add models to the tree by category
396 | for category, model_list in categorized_models.items():
397 | category_branch = provider_branch.add(f"[cyan]{category}[/cyan]")
398 | for model_id in sorted(model_list):
399 | category_branch.add(f"[green]{model_id}[/green]")
400 |
401 | # Get default model for provider info
402 | default_model = provider.get_default_model()
403 | provider_info.append(f"{provider_name} (default: {default_model})")
404 | except Exception as e:
405 | logger.warning(f"Could not get models for {provider_name}: {e}", emoji_key="warning")
406 | logger.warning(f"Could not get models for {provider_name}: {e}")
407 | provider_info.append(f"{provider_name} (models unavailable)")
408 | provider_branch = provider_tree.add(f"[yellow]{provider_name}[/yellow]")
409 | provider_branch.add(f"[red]Error listing models: {escape(str(e))}[/red]")
410 |
411 | # Display provider info based on visualization level
412 | if SETTINGS["visualization_level"] == "full":
413 | console.print(Panel(provider_tree, border_style="dim cyan", padding=(1, 2)))
414 | else:
415 | console.print(Panel(
416 | f"Available providers: {', '.join(provider_info)}",
417 | title="Provider Configuration",
418 | border_style="cyan",
419 | expand=False
420 | ))
421 |
422 | # Verify the docstring_refiner tool is available
423 | logger.debug("Checking for available tools")
424 | tool_list = await mcp.list_tools()
425 | available_tools = [t.name for t in tool_list]
426 | logger.debug(f"Available tools before registration: {available_tools}")
427 |
428 | # Display all available tools
429 | tool_tree = Tree("[bold cyan]Available MCP Tools[/bold cyan]")
430 |
431 | # Group tools by namespace for better visualization
432 | tool_namespaces = {}
433 | for tool_name in available_tools:
434 | if ":" in tool_name:
435 | namespace, name = tool_name.split(":", 1)
436 | if namespace not in tool_namespaces:
437 | tool_namespaces[namespace] = []
438 | tool_namespaces[namespace].append(name)
439 | else:
440 | if "root" not in tool_namespaces:
441 | tool_namespaces["root"] = []
442 | tool_namespaces["root"].append(tool_name)
443 |
444 | # Add tools to tree with proper grouping
445 | for namespace, tools in tool_namespaces.items():
446 | if namespace == "root":
447 | for tool in sorted(tools):
448 | tool_tree.add(f"[green]{tool}[/green]")
449 | else:
450 | ns_branch = tool_tree.add(f"[yellow]{namespace}[/yellow]")
451 | for tool in sorted(tools):
452 | ns_branch.add(f"[green]{tool}[/green]")
453 |
454 | # Display tool info based on visualization level
455 | if SETTINGS["visualization_level"] in ["standard", "full"]:
456 | console.print(Panel(tool_tree, border_style="dim cyan", padding=(1, 2)))
457 | else:
458 | console.print(f"[cyan]Tools available:[/cyan] {len(available_tools)}")
459 |
460 | # Check if refine_tool_documentation is available
461 | if "refine_tool_documentation" in available_tools:
462 | logger.success("refine_tool_documentation tool available.", emoji_key="success")
463 | else:
464 | logger.warning("refine_tool_documentation tool not found in available tools list.", emoji_key="warning")
465 | console.print(Panel(
466 | "The refine_tool_documentation tool is not registered automatically.\n"
467 | "This demo will attempt to register it manually as a fallback.",
468 | title="⚠️ Tool Availability Notice",
469 | border_style="yellow"
470 | ))
471 |
472 | # Manually register the refine_tool_documentation tool as a fallback
473 | # Note: This should no longer be necessary since the tool is now included in STANDALONE_TOOL_FUNCTIONS
474 | # in ultimate/tools/__init__.py, but we keep it as a fallback in case of issues
475 | try:
476 | print("Attempting to manually register refine_tool_documentation tool as fallback...")
477 | from ultimate_mcp_server.tools.docstring_refiner import refine_tool_documentation
478 | print("Imported refine_tool_documentation successfully")
479 |
480 | # Create a simplified wrapper to avoid Pydantic validation issues
481 | @with_error_handling
482 | async def docstring_refiner_wrapper(
483 | tool_names=None,
484 | refine_all_available=False,
485 | max_iterations=1,
486 | ctx=None
487 | ):
488 | """
489 | Refine the documentation of MCP tools.
490 |
491 | Args:
492 | tool_names: List of tools to refine, or None to use refine_all_available
493 | refine_all_available: Whether to refine all available tools
494 | max_iterations: Maximum number of refinement iterations
495 | ctx: MCP context
496 |
497 | Returns:
498 | Refinement results
499 | """
500 | print(f"Wrapper called with tool_names={tool_names}, refine_all_available={refine_all_available}")
501 | # Simply pass through to the actual implementation
502 | return await refine_tool_documentation(
503 | tool_names=tool_names,
504 | refine_all_available=refine_all_available,
505 | max_iterations=max_iterations,
506 | ctx=ctx
507 | )
508 |
509 | # Register our simplified wrapper instead
510 | mcp.tool(name="refine_tool_documentation")(docstring_refiner_wrapper)
511 | print("Registered fallback wrapper tool successfully")
512 | logger.success("Successfully registered fallback wrapper for refine_tool_documentation tool", emoji_key="success")
513 | except Exception as e:
514 | logger.error(f"Failed to register fallback refine_tool_documentation tool: {e}", emoji_key="error", exc_info=True)
515 | print(f"Error registering fallback tool: {type(e).__name__}: {str(e)}")
516 | import traceback
517 | print("Stack trace:")
518 | traceback.print_exc()
519 | console.print(Panel(
520 | f"Error registering the fallback refine_tool_documentation tool: {escape(str(e))}\n\n"
521 | "This demo requires the docstring_refiner tool to be properly registered.",
522 | title="❌ Registration Failed",
523 | border_style="red",
524 | expand=False
525 | ))
526 | console.print(Panel(
527 | "This demo requires the docstring_refiner tool to be properly registered.\n"
528 | "Check that you have the correct version of the Ultimate MCP Server and dependencies installed.",
529 | title="⚠️ Demo Requirements Not Met",
530 | border_style="red",
531 | expand=False
532 | ))
533 | return gateway
534 |
535 | # Create flawed example tools if requested
536 | if create_flawed_tools:
537 | created_tools = await create_flawed_example_tools(mcp)
538 | if created_tools:
539 | console.print(Panel(
540 | f"Created {len(created_tools)} flawed example tools for testing:\n" +
541 | "\n".join([f"- [cyan]{name}[/cyan]" for name in created_tools]),
542 | title="🛠️ Flawed Tools Created",
543 | border_style="yellow",
544 | expand=False
545 | ))
546 |
547 | return gateway
548 |
549 |
550 | async def create_flawed_example_tools(mcp_instance):
551 | """Create flawed example tools for demonstration purposes."""
552 | created_tools = []
553 |
554 | try:
555 | # Create a temporary directory to store any needed files
556 | temp_dir = tempfile.mkdtemp(prefix="docstring_refiner_flawed_tools_")
557 | logger.info(f"Created temporary directory for flawed tools: {temp_dir}", emoji_key="setup")
558 |
559 | # Define several flawed tools with various issues
560 |
561 | # Tool 1: Ambiguous Description
562 | @mcp_instance.tool()
563 | async def flawed_process_text(text: str, mode: str = "simple", include_metadata: bool = False):
564 | """Process the given text.
565 |
566 | This tool does processing on text.
567 |
568 | Args:
569 | text: Text to process
570 | mode: Processing mode (simple, advanced, expert)
571 | include_metadata: Whether to include metadata in result
572 | """
573 | # Actual implementation doesn't matter for the demo
574 | result = {"processed": text[::-1]} # Just reverse the text
575 | if include_metadata:
576 | result["metadata"] = {"length": len(text), "mode": mode}
577 | return result
578 |
579 | created_tools.append("flawed_process_text")
580 |
581 | # Tool 2: Missing Parameter Descriptions
582 | @mcp_instance.tool()
583 | async def flawed_scrape_website(url, depth=1, extract_links=True, timeout=30.0):
584 | """Website scraper tool.
585 |
586 | Extracts content from websites.
587 | """
588 | # Simulate scraping
589 | return {
590 | "title": f"Page at {url}",
591 | "content": f"Scraped content with depth {depth}",
592 | "links": ["https://example.com/1", "https://example.com/2"] if extract_links else []
593 | }
594 |
595 | created_tools.append("flawed_scrape_website")
596 |
597 | # Tool 3: Confusing Schema & Inconsistent Description
598 | @mcp_instance.tool()
599 | async def flawed_data_processor(config, inputs, format="json"):
600 | """Processes data.
601 |
602 | The analyzer takes configuration and processes input data.
603 | The system allows different engine versions and parameters.
604 | """
605 | # Just return dummy data
606 | return {
607 | "outputs": [f"Processed: {i}" for i in inputs],
608 | "engine_used": config.get("engine", "v1"),
609 | "format": format
610 | }
611 |
612 | created_tools.append("flawed_data_processor")
613 |
614 | # Tool 4: Misleading Examples in Description but no schema examples
615 | @mcp_instance.tool()
616 | async def flawed_product_search(query, filters=None, sort="rating", page=1, per_page=20):
617 | """Search for products in the database.
618 |
619 | Example usage:
620 | ```
621 | search_products("laptop", {"category": "electronics", "min_price": 500}, sort_by="newest")
622 | ```
623 |
624 | The search function allows querying for items along with filtering and sorting options.
625 | """
626 | # Return dummy results
627 | return {
628 | "results": [{"id": i, "name": f"{query} product {i}", "price": random.randint(10, 1000)} for i in range(1, 6)],
629 | "total": 243,
630 | "page": page,
631 | "per_page": per_page
632 | }
633 |
634 | created_tools.append("flawed_product_search")
635 |
636 | # Tool 5: Schema with type issues (number vs integer conflicts)
637 | @mcp_instance.tool()
638 | async def flawed_calculator(values, operation, precision=2, scale_factor=1.0):
639 | """Statistical calculator.
640 |
641 | Calculate statistics on a set of values. The operation determines which
642 | statistic to calculate. Valid operations are:
643 |
644 | - sum: Calculate the sum of all values
645 | - average: Calculate the mean of the values
646 | - max: Find the maximum value
647 | - min: Find the minimum value
648 |
649 | The precision parameter must be an integer between 0 and 10.
650 |
651 | After calculation, the result is multiplied by the scale_factor.
652 | """
653 | # Perform the calculation
654 | if operation == "sum":
655 | result = sum(values)
656 | elif operation == "average":
657 | result = sum(values) / len(values) if values else 0
658 | elif operation == "max":
659 | result = max(values) if values else None
660 | elif operation == "min":
661 | result = min(values) if values else None
662 | else:
663 | result = None
664 |
665 | # Apply scale and precision
666 | if result is not None:
667 | result = round(result * scale_factor, precision)
668 |
669 | return {"result": result}
670 |
671 | created_tools.append("flawed_calculator")
672 |
673 | logger.success(f"Successfully created {len(created_tools)} flawed example tools", emoji_key="success")
674 | return created_tools
675 |
676 | except Exception as e:
677 | logger.error(f"Error creating flawed example tools: {e}", emoji_key="error", exc_info=True)
678 | console.print(f"[bold red]Error creating flawed example tools:[/bold red] {escape(str(e))}")
679 | return []
680 |
681 |
682 | async def display_refinement_progress(event: RefinementProgressEvent):
683 | """Handle progress events from the refinement process."""
684 | # Create a formatted message based on the event type
685 | if event.stage == "starting_iteration":
686 | message = f"[bold cyan]Starting iteration {event.iteration}/{event.total_iterations} for {event.tool_name}[/bold cyan]"
687 | elif event.stage == "agent_simulation":
688 | message = f"[blue]Simulating agent usage for {event.tool_name}...[/blue]"
689 | elif event.stage == "test_generation":
690 | message = f"[blue]Generating test cases for {event.tool_name}...[/blue]"
691 | elif event.stage == "test_execution_start":
692 | message = f"[blue]Executing tests for {event.tool_name}...[/blue]"
693 | elif event.stage == "test_execution_progress":
694 | message = f"[blue]Test execution progress: {event.progress_pct:.1f}%[/blue]"
695 | elif event.stage == "test_execution_end":
696 | success_rate = event.details.get("success_rate") if event.details else None
697 | if success_rate is not None:
698 | message = f"[green]Tests completed for {event.tool_name} - Success rate: {success_rate:.1%}[/green]"
699 | else:
700 | message = f"[green]Tests completed for {event.tool_name}[/green]"
701 | elif event.stage == "analysis_start":
702 | message = f"[blue]Analyzing results for {event.tool_name}...[/blue]"
703 | elif event.stage == "analysis_end":
704 | message = f"[green]Analysis completed for {event.tool_name}[/green]"
705 | elif event.stage == "schema_patching":
706 | message = f"[blue]Applying schema patches for {event.tool_name}...[/blue]"
707 | elif event.stage == "winnowing":
708 | message = f"[blue]Optimizing documentation for {event.tool_name}...[/blue]"
709 | elif event.stage == "iteration_complete":
710 | message = f"[bold green]Iteration {event.iteration} complete for {event.tool_name}[/bold green]"
711 | elif event.stage == "tool_complete":
712 | message = f"[bold magenta]Refinement complete for {event.tool_name}[/bold magenta]"
713 | elif event.stage == "error":
714 | message = f"[bold red]Error during refinement for {event.tool_name}: {event.message}[/bold red]"
715 | else:
716 | message = f"[dim]{event.message}[/dim]"
717 |
718 | # Print the message
719 | detail_console.print(message)
720 |
721 | # Print additional details if in verbose mode
722 | if SETTINGS["verbose"] and event.details:
723 | try:
724 | detail_console.print(f"[dim cyan]Details: {json.dumps(event.details, default=str)}[/dim cyan]")
725 | except Exception:
726 | detail_console.print(f"[dim cyan]Details: {event.details}[/dim cyan]")
727 |
728 | # Return True to confirm the callback was processed
729 | return True
730 |
731 |
732 | def create_text_diff(original: str, improved: str) -> Panel:
733 | """Create a colorized diff between original and improved text."""
734 | diff = difflib.unified_diff(
735 | original.splitlines(),
736 | improved.splitlines(),
737 | lineterm='',
738 | n=3 # Context lines
739 | )
740 |
741 | # Convert diff to rich text with colors
742 | rich_diff = []
743 | for line in diff:
744 | if line.startswith('+'):
745 | rich_diff.append(f"[green]{escape(line)}[/green]")
746 | elif line.startswith('-'):
747 | rich_diff.append(f"[red]{escape(line)}[/red]")
748 | elif line.startswith('@@'):
749 | rich_diff.append(f"[cyan]{escape(line)}[/cyan]")
750 | else:
751 | rich_diff.append(escape(line))
752 |
753 | # Return as panel
754 | if rich_diff:
755 | diff_panel = Panel(
756 | "\n".join(rich_diff),
757 | title="Documentation Changes (Diff)",
758 | border_style="yellow",
759 | expand=False
760 | )
761 | return diff_panel
762 | else:
763 | return Panel(
764 | "[dim italic]No differences found[/dim italic]",
765 | title="Documentation Changes (Diff)",
766 | border_style="dim",
767 | expand=False
768 | )
769 |
770 |
771 | def create_side_by_side_diff(original: str, improved: str, title: str = "Documentation Comparison") -> Panel:
772 | """Create a side-by-side comparison of original and improved text."""
773 | # Wrap in panels with highlighting
774 | original_panel = Panel(
775 | escape(original),
776 | title="Original",
777 | border_style="dim red",
778 | expand=True
779 | )
780 |
781 | improved_panel = Panel(
782 | escape(improved),
783 | title="Improved",
784 | border_style="green",
785 | expand=True
786 | )
787 |
788 | # Create side-by-side group
789 | comparison = Group(
790 | Rule("Before / After"),
791 | Group(
792 | original_panel,
793 | improved_panel
794 | )
795 | )
796 |
797 | return Panel(
798 | comparison,
799 | title=title,
800 | border_style="cyan",
801 | expand=False
802 | )
803 |
804 |
805 | def display_refinement_result(
806 | result: Dict,
807 | console: Console = console,
808 | visualization_level: str = "standard",
809 | save_to_file: bool = False,
810 | output_dir: Optional[Path] = None
811 | ):
812 | """Display the results of the docstring refinement process."""
813 | console.print(Rule("[bold green]Refinement Results[/bold green]", style="green"))
814 |
815 | # Summary statistics
816 | stats_table = Table(title="[bold]Summary Statistics[/bold]", box=box.ROUNDED, show_header=False, expand=False)
817 | stats_table.add_column("Metric", style="cyan", no_wrap=True)
818 | stats_table.add_column("Value", style="white")
819 | stats_table.add_row("Total Tools Refined", str(len(result.get("refined_tools", []))))
820 | stats_table.add_row("Total Iterations", str(result.get("total_iterations_run", 0)))
821 | stats_table.add_row("Total Tests Executed", str(result.get("total_test_calls_attempted", 0)))
822 | stats_table.add_row("Total Test Failures", str(result.get("total_test_calls_failed", 0)))
823 | stats_table.add_row("Total Validation Failures", str(result.get("total_schema_validation_failures", 0)))
824 | stats_table.add_row("Total Processing Time", f"{result.get('total_processing_time', 0.0):.2f}s")
825 | stats_table.add_row("Total Cost", f"${result.get('total_refinement_cost', 0.0):.6f}")
826 | console.print(stats_table)
827 |
828 | # Save results to file if requested
829 | if save_to_file and output_dir:
830 | timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
831 | result_file = output_dir / f"refinement_results_{timestamp}.json"
832 | try:
833 | with open(result_file, 'w') as f:
834 | json.dump(result, f, indent=2, default=str)
835 | console.print(f"[green]Results saved to:[/green] {result_file}")
836 | except Exception as e:
837 | console.print(f"[red]Error saving results to file:[/red] {e}")
838 |
839 | # Tools refined
840 | refined_tools = result.get("refined_tools", [])
841 | if refined_tools:
842 | console.print("\n[bold]Tools Refined:[/bold]")
843 |
844 | # Results tallying
845 | total_description_improvements = 0
846 | total_schema_improvements = 0
847 | total_example_improvements = 0
848 | flaw_categories_observed = {}
849 |
850 | for i, tool in enumerate(refined_tools):
851 | tool_name = tool.get("tool_name", "Unknown tool")
852 | initial_success_rate = tool.get("initial_success_rate", 0.0)
853 | final_success_rate = tool.get("final_success_rate", 0.0)
854 | improvement_factor = tool.get("improvement_factor", 0.0)
855 |
856 | # Decide on panel color based on improvement
857 | if improvement_factor > 0.5:
858 | border_style = "green"
859 | elif improvement_factor > 0:
860 | border_style = "blue"
861 | else:
862 | border_style = "yellow"
863 |
864 | # Create a panel for each tool
865 | success_change = (final_success_rate - initial_success_rate) * 100
866 | success_change_str = (
867 | f"[green]+{success_change:.1f}%[/green]" if success_change > 0 else
868 | f"[red]{success_change:.1f}%[/red]" if success_change < 0 else
869 | "[yellow]No change[/yellow]"
870 | )
871 |
872 | tool_panel_content = [
873 | f"Initial Success Rate: [yellow]{initial_success_rate:.1%}[/yellow]",
874 | f"Final Success Rate: [green]{final_success_rate:.1%}[/green]",
875 | f"Change: {success_change_str}",
876 | f"Improvement Factor: [cyan]{improvement_factor:.2f}x[/cyan]"
877 | ]
878 |
879 | console.print(Panel(
880 | Group(*tool_panel_content),
881 | title=f"[bold]{i+1}. {tool_name}[/bold]",
882 | border_style=border_style,
883 | expand=False
884 | ))
885 |
886 | # Display the final proposed changes
887 | final_changes = tool.get("final_proposed_changes", {})
888 | iterations = tool.get("iterations", [])
889 |
890 | if final_changes:
891 | # Check if description was improved
892 | original_desc = None
893 | for iter_data in iterations:
894 | if iter_data.get("iteration") == 1:
895 | # Get the original description from the first iteration
896 | original_desc = iter_data.get("documentation_used", {}).get("description", "")
897 | break
898 |
899 | final_desc = final_changes.get("description", "")
900 |
901 | # Count this as an improvement if descriptions differ
902 | if original_desc and final_desc and original_desc != final_desc:
903 | total_description_improvements += 1
904 |
905 | # Display description changes based on visualization level
906 | if visualization_level in ["standard", "full"]:
907 | console.print("[bold cyan]Description Changes:[/bold cyan]")
908 |
909 | if visualization_level == "full":
910 | # Show diff view for detailed visualization
911 | console.print(create_text_diff(original_desc, final_desc))
912 |
913 | # Show side-by-side comparison
914 | console.print(create_side_by_side_diff(
915 | original_desc,
916 | final_desc,
917 | title="Description Comparison"
918 | ))
919 |
920 | # Display schema patches if any
921 | schema_patches = tool.get("final_proposed_schema_patches", [])
922 | if schema_patches:
923 | total_schema_improvements += 1
924 |
925 | if visualization_level in ["standard", "full"]:
926 | console.print("[bold cyan]Schema Patches Applied:[/bold cyan]")
927 | console.print(Panel(
928 | Syntax(json.dumps(schema_patches, indent=2), "json", theme="default", line_numbers=False),
929 | title="JSON Patch Operations",
930 | border_style="magenta",
931 | expand=False
932 | ))
933 |
934 | # Display examples
935 | examples = final_changes.get("examples", [])
936 | if examples:
937 | total_example_improvements += len(examples)
938 |
939 | if visualization_level in ["standard", "full"]:
940 | console.print("[bold cyan]Generated Examples:[/bold cyan]")
941 | examples_to_show = examples if visualization_level == "full" else examples[:3]
942 |
943 | for j, example in enumerate(examples_to_show):
944 | args = example.get("args", {})
945 | comment = example.get("comment", "No description")
946 | addresses_failure = example.get("addresses_failure_pattern", "")
947 |
948 | # Add failure pattern as subtitle if present
949 | subtitle = f"Addresses: {addresses_failure}" if addresses_failure else None
950 |
951 | console.print(Panel(
952 | Syntax(json.dumps(args, indent=2), "json", theme="default", line_numbers=False),
953 | title=f"Example {j+1}: {comment}",
954 | subtitle=subtitle,
955 | border_style="dim green",
956 | expand=False
957 | ))
958 |
959 | if len(examples) > 3 and visualization_level == "standard":
960 | console.print(f"[dim]...and {len(examples) - 3} more examples[/dim]")
961 |
962 | # Collect flaw categories if available
963 | for iter_data in iterations:
964 | analysis = iter_data.get("analysis", {})
965 | if analysis:
966 | flaws = analysis.get("identified_flaw_categories", [])
967 | for flaw in flaws:
968 | if flaw not in flaw_categories_observed:
969 | flaw_categories_observed[flaw] = 0
970 | flaw_categories_observed[flaw] += 1
971 |
972 | console.print() # Add spacing between tools
973 |
974 | # Display improvement summary
975 | console.print(Rule("[bold blue]Improvement Summary[/bold blue]", style="blue"))
976 |
977 | improvement_table = Table(box=box.SIMPLE, show_header=True, header_style="bold cyan")
978 | improvement_table.add_column("Improvement Type", style="blue")
979 | improvement_table.add_column("Count", style="cyan")
980 | improvement_table.add_column("Details", style="white")
981 |
982 | improvement_table.add_row(
983 | "Description Improvements",
984 | str(total_description_improvements),
985 | f"{total_description_improvements} of {len(refined_tools)} tools ({total_description_improvements/len(refined_tools)*100:.0f}%)"
986 | )
987 | improvement_table.add_row(
988 | "Schema Improvements",
989 | str(total_schema_improvements),
990 | f"{total_schema_improvements} of {len(refined_tools)} tools ({total_schema_improvements/len(refined_tools)*100:.0f}%)"
991 | )
992 | improvement_table.add_row(
993 | "Example Additions",
994 | str(total_example_improvements),
995 | f"Average {total_example_improvements/len(refined_tools):.1f} examples per tool"
996 | )
997 |
998 | console.print(improvement_table)
999 |
1000 | # Display flaw categories if any were observed
1001 | if flaw_categories_observed and visualization_level in ["standard", "full"]:
1002 | console.print("\n[bold cyan]Documentation Flaws Identified:[/bold cyan]")
1003 |
1004 | flaws_table = Table(box=box.SIMPLE, show_header=True, header_style="bold magenta")
1005 | flaws_table.add_column("Flaw Category", style="magenta")
1006 | flaws_table.add_column("Occurrences", style="cyan")
1007 | flaws_table.add_column("Description", style="white")
1008 |
1009 | # Map flaw categories to descriptions
1010 | flaw_descriptions = {
1011 | "MISSING_DESCRIPTION": "Documentation is missing key information",
1012 | "AMBIGUOUS_DESCRIPTION": "Description is unclear or can be interpreted in multiple ways",
1013 | "INCORRECT_DESCRIPTION": "Description contains incorrect information",
1014 | "MISSING_SCHEMA_CONSTRAINT": "Schema is missing important constraints",
1015 | "INCORRECT_SCHEMA_CONSTRAINT": "Schema contains incorrect constraints",
1016 | "OVERLY_RESTRICTIVE_SCHEMA": "Schema is unnecessarily restrictive",
1017 | "TYPE_CONFUSION": "Parameter types are inconsistent or unclear",
1018 | "MISSING_EXAMPLE": "Documentation lacks necessary examples",
1019 | "MISLEADING_EXAMPLE": "Examples provided are incorrect or misleading",
1020 | "INCOMPLETE_EXAMPLE": "Examples are present but insufficient",
1021 | "PARAMETER_DEPENDENCY_UNCLEAR": "Dependencies between parameters are not explained",
1022 | "CONFLICTING_CONSTRAINTS": "Schema contains contradictory constraints",
1023 | "AGENT_FORMULATION_ERROR": "Documentation hinders LLM agent's ability to use the tool",
1024 | "SCHEMA_PREVALIDATION_FAILURE": "Schema validation issues",
1025 | "TOOL_EXECUTION_ERROR": "Issues with tool execution",
1026 | "UNKNOWN": "Unspecified documentation issue"
1027 | }
1028 |
1029 | # Sort flaws by occurrence count
1030 | sorted_flaws = sorted(flaw_categories_observed.items(), key=lambda x: x[1], reverse=True)
1031 |
1032 | for flaw, count in sorted_flaws:
1033 | flaws_table.add_row(
1034 | flaw,
1035 | str(count),
1036 | flaw_descriptions.get(flaw, "No description available")
1037 | )
1038 |
1039 | console.print(flaws_table)
1040 |
1041 | # Error reporting
1042 | errors = result.get("errors_during_refinement_process", [])
1043 | if errors:
1044 | console.print("[bold red]Errors During Refinement:[/bold red]")
1045 | for error in errors:
1046 | console.print(f"- [red]{escape(error)}[/red]")
1047 |
1048 |
1049 | async def get_suitable_tools(
1050 | mcp_instance,
1051 | count: int = 1,
1052 | complexity: str = "medium",
1053 | exclude_tools: Optional[List[str]] = None
1054 | ) -> List[str]:
1055 | """
1056 | Find suitable tools for refinement based on complexity.
1057 |
1058 | Args:
1059 | mcp_instance: The MCP server instance
1060 | count: Number of tools to return
1061 | complexity: Desired complexity level ("simple", "medium", "complex")
1062 | exclude_tools: List of tool names to exclude
1063 |
1064 | Returns:
1065 | List of suitable tool names
1066 | """
1067 | exclude_tools = exclude_tools or []
1068 |
1069 | # Get all available tools
1070 | tool_list = await mcp_instance.list_tools()
1071 |
1072 | # Filter out excluded tools and refine_tool_documentation itself
1073 | available_tools = [
1074 | t.name for t in tool_list
1075 | if t.name not in exclude_tools and t.name != "refine_tool_documentation"
1076 | ]
1077 |
1078 | if not available_tools:
1079 | return []
1080 |
1081 | # Define complexity criteria based on schema properties
1082 | if complexity == "simple":
1083 | # Simple tools have few required parameters and a flat schema
1084 | preferred_tools = []
1085 | for tool_name in available_tools:
1086 | try:
1087 | tool_def = next((t for t in tool_list if t.name == tool_name), None)
1088 | if not tool_def:
1089 | continue
1090 |
1091 | input_schema = getattr(tool_def, "inputSchema", {})
1092 | if not input_schema:
1093 | continue
1094 |
1095 | properties = input_schema.get("properties", {})
1096 | required = input_schema.get("required", [])
1097 |
1098 | # Simple tools have few properties and required fields
1099 | if len(properties) <= 3 and len(required) <= 1:
1100 | # Check for nested objects which would increase complexity
1101 | has_nested = any(
1102 | isinstance(prop, dict) and prop.get("type") == "object"
1103 | for prop in properties.values()
1104 | )
1105 |
1106 | if not has_nested:
1107 | preferred_tools.append(tool_name)
1108 | except Exception:
1109 | continue
1110 |
1111 | elif complexity == "complex":
1112 | # Complex tools have deep nested structures and many required parameters
1113 | preferred_tools = []
1114 | for tool_name in available_tools:
1115 | try:
1116 | tool_def = next((t for t in tool_list if t.name == tool_name), None)
1117 | if not tool_def:
1118 | continue
1119 |
1120 | input_schema = getattr(tool_def, "inputSchema", {})
1121 | if not input_schema:
1122 | continue
1123 |
1124 | properties = input_schema.get("properties", {})
1125 | required = input_schema.get("required", [])
1126 |
1127 | # Complex tools have many properties or required fields
1128 | if len(properties) >= 5 or len(required) >= 3:
1129 | # Check for nested objects which would increase complexity
1130 | has_nested = any(
1131 | isinstance(prop, dict) and prop.get("type") == "object"
1132 | for prop in properties.values()
1133 | )
1134 |
1135 | if has_nested:
1136 | preferred_tools.append(tool_name)
1137 | except Exception:
1138 | continue
1139 |
1140 | else: # medium complexity (default)
1141 | # Medium tools are somewhere in between
1142 | preferred_tools = []
1143 | for tool_name in available_tools:
1144 | try:
1145 | tool_def = next((t for t in tool_list if t.name == tool_name), None)
1146 | if not tool_def:
1147 | continue
1148 |
1149 | input_schema = getattr(tool_def, "inputSchema", {})
1150 | if not input_schema:
1151 | continue
1152 |
1153 | properties = input_schema.get("properties", {})
1154 |
1155 | # Medium tools have a moderate number of properties
1156 | if 3 <= len(properties) <= 6:
1157 | preferred_tools.append(tool_name)
1158 | except Exception:
1159 | continue
1160 |
1161 | # If we couldn't find tools matching the complexity criteria, fall back to any available tool
1162 | if not preferred_tools:
1163 | preferred_tools = available_tools
1164 |
1165 | # Prioritize tools without namespaces (i.e., not "namespace:tool_name")
1166 | prioritized_tools = [t for t in preferred_tools if ":" not in t]
1167 |
1168 | # If we still need more tools and have prioritized all we could, add namespace tools
1169 | if len(prioritized_tools) < count:
1170 | namespace_tools = [t for t in preferred_tools if ":" in t]
1171 | prioritized_tools.extend(namespace_tools)
1172 |
1173 | # Return the requested number of tools (or fewer if not enough are available)
1174 | return prioritized_tools[:min(count, len(prioritized_tools))]
1175 |
1176 |
1177 | async def demo_single_tool_refinement(
1178 | gateway: Gateway,
1179 | tracker: CostTracker,
1180 | target_tool: Optional[str] = None,
1181 | refinement_provider: Optional[str] = None,
1182 | refinement_model: Optional[str] = None,
1183 | max_iterations: Optional[int] = None
1184 | ):
1185 | """Demonstrate refining documentation for a single tool."""
1186 | console.print(Rule("[bold cyan]Single Tool Refinement[/bold cyan]", style="cyan"))
1187 |
1188 | # Use specified tool or find a suitable one
1189 | selected_tool = None
1190 | if target_tool:
1191 | # Check if specified tool exists
1192 | tool_list = await gateway.mcp.list_tools()
1193 | available_tools = [t.name for t in tool_list]
1194 |
1195 | if target_tool in available_tools:
1196 | selected_tool = target_tool
1197 | else:
1198 | logger.warning(f"Specified tool '{target_tool}' not found", emoji_key="warning")
1199 | console.print(f"[yellow]Warning:[/yellow] Specified tool '{target_tool}' not found. Selecting automatically.")
1200 |
1201 | # Auto-select if needed
1202 | if not selected_tool:
1203 | suitable_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="medium")
1204 |
1205 | if suitable_tools:
1206 | selected_tool = suitable_tools[0]
1207 | else:
1208 | logger.error("No suitable tools found for refinement demo", emoji_key="error")
1209 | console.print("[bold red]Error:[/bold red] No suitable tools found for refinement demo.")
1210 | return
1211 |
1212 | console.print(f"Selected tool for refinement: [cyan]{selected_tool}[/cyan]")
1213 |
1214 | # Determine provider and model
1215 | provider = refinement_provider or Provider.OPENAI.value
1216 |
1217 | # Find best available model if not specified
1218 | if not refinement_model:
1219 | try:
1220 | if provider == Provider.OPENAI.value:
1221 | model = "gpt-4.1" # Prefer this for best results
1222 | # Check if model is available
1223 | provider_instance = gateway.providers.get(provider)
1224 | if provider_instance:
1225 | models = await provider_instance.list_models()
1226 | model_ids = [m.get("id") for m in models]
1227 | if model not in model_ids:
1228 | model = "gpt-4.1-mini" # Fall back to mini
1229 | elif provider == Provider.ANTHROPIC.value:
1230 | model = "claude-3-5-sonnet"
1231 | else:
1232 | # Use default model for other providers
1233 | provider_instance = gateway.providers.get(provider)
1234 | if provider_instance:
1235 | model = provider_instance.get_default_model()
1236 | else:
1237 | model = None
1238 | except Exception as e:
1239 | logger.warning(f"Error determining model for {provider}: {e}", emoji_key="warning")
1240 | model = None
1241 |
1242 | # If we still don't have a model, try a different provider
1243 | if not model:
1244 | for fallback_provider in SETTINGS["fallback_providers"]:
1245 | try:
1246 | provider_instance = gateway.providers.get(fallback_provider)
1247 | if provider_instance:
1248 | model = provider_instance.get_default_model()
1249 | provider = fallback_provider
1250 | break
1251 | except Exception:
1252 | continue
1253 |
1254 | # If still no model, use a reasonable default
1255 | if not model:
1256 | model = "gpt-4.1-mini"
1257 | provider = Provider.OPENAI.value
1258 | else:
1259 | model = refinement_model
1260 |
1261 | # Define refinement parameters
1262 | iterations = max_iterations or 2 # Default to 2 for demo
1263 |
1264 | params = {
1265 | "tool_names": [selected_tool],
1266 | "max_iterations": iterations,
1267 | "refinement_model_config": {
1268 | "provider": provider,
1269 | "model": model,
1270 | "temperature": 0.2,
1271 | },
1272 | "validation_level": "full",
1273 | "enable_winnowing": True,
1274 | "progress_callback": display_refinement_progress,
1275 | }
1276 |
1277 | console.print(Panel(
1278 | Syntax(json.dumps({k: v for k, v in params.items() if k != "progress_callback"}, indent=2), "json"),
1279 | title="Refinement Parameters",
1280 | border_style="dim cyan",
1281 | expand=False
1282 | ))
1283 |
1284 | # Create a progress display
1285 | console.print("\n[bold cyan]Refinement Progress:[/bold cyan]")
1286 | detail_console.print(f"\n[bold]Starting refinement for {selected_tool}...[/bold]")
1287 |
1288 | # Estimate cost
1289 | estimated_cost = 0.03 * iterations # Very rough estimate per iteration
1290 | console.print(f"[cyan]Estimated cost:[/cyan] ${estimated_cost:.2f} USD")
1291 |
1292 | # Check if cost would exceed limit
1293 | if estimated_cost > SETTINGS["cost_limit"]:
1294 | console.print(Panel(
1295 | f"Estimated cost (${estimated_cost:.2f}) exceeds the set limit (${SETTINGS['cost_limit']:.2f}).\n"
1296 | "Adjusting iterations to stay within budget.",
1297 | title="⚠️ Cost Limit Warning",
1298 | border_style="yellow",
1299 | expand=False
1300 | ))
1301 | # Adjust iterations to stay under limit
1302 | adjusted_iterations = max(1, int(SETTINGS["cost_limit"] / 0.03))
1303 | params["max_iterations"] = adjusted_iterations
1304 | console.print(f"[yellow]Reducing iterations from {iterations} to {adjusted_iterations}[/yellow]")
1305 |
1306 | with Progress(
1307 | TextColumn("[bold blue]{task.description}"),
1308 | BarColumn(complete_style="green", finished_style="green"),
1309 | TaskProgressColumn(),
1310 | TimeElapsedColumn(),
1311 | console=console,
1312 | expand=True
1313 | ) as progress:
1314 | task_id = progress.add_task("[cyan]Refining tool documentation...", total=100)
1315 |
1316 | # Execute the refinement
1317 | start_time = time.time()
1318 | try:
1319 | result = await gateway.mcp.call_tool("refine_tool_documentation", params)
1320 |
1321 | # Simulate progress updates (since we can't hook into the actual progress)
1322 | # The actual progress is displayed through display_refinement_progress
1323 | elapsed = 0
1324 | while progress.tasks[task_id].completed < 100 and elapsed < 60:
1325 | progress.update(task_id, completed=min(95, elapsed * 1.5))
1326 | await asyncio.sleep(0.5)
1327 | elapsed = time.time() - start_time
1328 |
1329 | progress.update(task_id, completed=100)
1330 |
1331 | # Track cost if available
1332 | if isinstance(result, dict) and "total_refinement_cost" in result:
1333 | tracker.add_generic_cost(
1334 | cost=result.get("total_refinement_cost", 0.0),
1335 | description=f"Refinement of {selected_tool}",
1336 | provider=provider,
1337 | model=model
1338 | )
1339 |
1340 | # Display the results
1341 | display_refinement_result(
1342 | result,
1343 | console=console,
1344 | visualization_level=SETTINGS["visualization_level"],
1345 | save_to_file=SETTINGS["save_results"],
1346 | output_dir=SETTINGS["output_dir"]
1347 | )
1348 |
1349 | return result
1350 |
1351 | except Exception as e:
1352 | progress.update(task_id, completed=100, description="[bold red]Refinement failed!")
1353 | logger.error(f"Error during single tool refinement: {e}", emoji_key="error", exc_info=True)
1354 | console.print(f"[bold red]Error during refinement:[/bold red] {escape(str(e))}")
1355 | return None
1356 |
1357 |
1358 | async def demo_multi_tool_refinement(
1359 | gateway: Gateway,
1360 | tracker: CostTracker,
1361 | target_tools: Optional[List[str]] = None,
1362 | refinement_provider: Optional[str] = None,
1363 | refinement_model: Optional[str] = None,
1364 | max_iterations: Optional[int] = None
1365 | ):
1366 | """Demonstrate refining documentation for multiple tools simultaneously."""
1367 | console.print(Rule("[bold cyan]Multi-Tool Refinement[/bold cyan]", style="cyan"))
1368 |
1369 | # Use specified tools or find suitable ones
1370 | selected_tools = []
1371 |
1372 | if target_tools:
1373 | # Check which specified tools exist
1374 | tool_list = await gateway.mcp.list_tools()
1375 | available_tools = [t.name for t in tool_list]
1376 |
1377 | for tool_name in target_tools:
1378 | if tool_name in available_tools:
1379 | selected_tools.append(tool_name)
1380 | else:
1381 | logger.warning(f"Specified tool '{tool_name}' not found", emoji_key="warning")
1382 | console.print(f"[yellow]Warning:[/yellow] Specified tool '{tool_name}' not found. Skipping.")
1383 |
1384 | # Auto-select if needed
1385 | if not selected_tools:
1386 | # Get various complexity levels for a diverse mix
1387 | simple_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="simple")
1388 | medium_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="medium", exclude_tools=simple_tools)
1389 | complex_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="complex", exclude_tools=simple_tools + medium_tools)
1390 |
1391 | selected_tools = simple_tools + medium_tools + complex_tools
1392 |
1393 | if not selected_tools:
1394 | # Fall back to any available tools
1395 | selected_tools = await get_suitable_tools(gateway.mcp, count=3, complexity="medium")
1396 |
1397 | if not selected_tools:
1398 | logger.error("No suitable tools found for multi-tool refinement demo", emoji_key="error")
1399 | console.print("[bold red]Error:[/bold red] No suitable tools found for multi-tool refinement demo.")
1400 | return
1401 |
1402 | console.print(f"Selected tools for refinement: [cyan]{', '.join(selected_tools)}[/cyan]")
1403 |
1404 | # Determine provider and model
1405 | provider = refinement_provider or Provider.OPENAI.value
1406 |
1407 | # Find best available model if not specified
1408 | if not refinement_model:
1409 | try:
1410 | if provider == Provider.OPENAI.value:
1411 | model = "gpt-4.1-mini" # Use mini for multi-tool to save cost
1412 | # Check if model is available
1413 | provider_instance = gateway.providers.get(provider)
1414 | if provider_instance:
1415 | models = await provider_instance.list_models()
1416 | model_ids = [m.get("id") for m in models]
1417 | if model not in model_ids:
1418 | model = provider_instance.get_default_model()
1419 | elif provider == Provider.ANTHROPIC.value:
1420 | model = "claude-3-5-haiku"
1421 | else:
1422 | # Use default model for other providers
1423 | provider_instance = gateway.providers.get(provider)
1424 | if provider_instance:
1425 | model = provider_instance.get_default_model()
1426 | else:
1427 | model = None
1428 | except Exception as e:
1429 | logger.warning(f"Error determining model for {provider}: {e}", emoji_key="warning")
1430 | model = None
1431 |
1432 | # If we still don't have a model, try a different provider
1433 | if not model:
1434 | for fallback_provider in SETTINGS["fallback_providers"]:
1435 | try:
1436 | provider_instance = gateway.providers.get(fallback_provider)
1437 | if provider_instance:
1438 | model = provider_instance.get_default_model()
1439 | provider = fallback_provider
1440 | break
1441 | except Exception:
1442 | continue
1443 |
1444 | # If still no model, use a reasonable default
1445 | if not model:
1446 | model = "gpt-4.1-mini"
1447 | provider = Provider.OPENAI.value
1448 | else:
1449 | model = refinement_model
1450 |
1451 | # Define refinement parameters with variations from the first demo
1452 | iterations = max_iterations or 1 # Default to 1 for multi-tool
1453 |
1454 | params = {
1455 | "tool_names": selected_tools,
1456 | "max_iterations": iterations,
1457 | "refinement_model_config": {
1458 | "provider": provider,
1459 | "model": model,
1460 | "temperature": 0.3,
1461 | },
1462 | # Add an ensemble for better analysis if using full visualization
1463 | "analysis_ensemble_configs": [
1464 | {
1465 | "provider": Provider.ANTHROPIC.value if provider != Provider.ANTHROPIC.value else Provider.OPENAI.value,
1466 | "model": "claude-3-5-haiku" if provider != Provider.ANTHROPIC.value else "gpt-4.1-mini",
1467 | "temperature": 0.2,
1468 | }
1469 | ] if SETTINGS["visualization_level"] == "full" else None,
1470 | "validation_level": "basic", # Use basic validation for speed
1471 | "enable_winnowing": False, # Skip winnowing for demo speed
1472 | "progress_callback": display_refinement_progress,
1473 | }
1474 |
1475 | console.print(Panel(
1476 | Syntax(json.dumps({k: v for k, v in params.items() if k not in ["progress_callback", "analysis_ensemble_configs"]}, indent=2), "json"),
1477 | title="Multi-Tool Refinement Parameters",
1478 | border_style="dim cyan",
1479 | expand=False
1480 | ))
1481 |
1482 | # Estimate cost - higher with multiple tools
1483 | estimated_cost = 0.02 * iterations * len(selected_tools)
1484 | console.print(f"[cyan]Estimated cost:[/cyan] ${estimated_cost:.2f} USD")
1485 |
1486 | # Check if cost would exceed limit
1487 | if estimated_cost > SETTINGS["cost_limit"]:
1488 | console.print(Panel(
1489 | f"Estimated cost (${estimated_cost:.2f}) exceeds the set limit (${SETTINGS['cost_limit']:.2f}).\n"
1490 | "Reducing tool count to stay within budget.",
1491 | title="⚠️ Cost Limit Warning",
1492 | border_style="yellow",
1493 | expand=False
1494 | ))
1495 | # Reduce the number of tools
1496 | max_tools = max(1, int(SETTINGS["cost_limit"] / (0.02 * iterations)))
1497 | selected_tools = selected_tools[:max_tools]
1498 | params["tool_names"] = selected_tools
1499 | console.print(f"[yellow]Reducing tools to: {', '.join(selected_tools)}[/yellow]")
1500 |
1501 | # Create a progress display
1502 | console.print("\n[bold cyan]Multi-Tool Refinement Progress:[/bold cyan]")
1503 | detail_console.print(f"\n[bold]Starting refinement for {len(selected_tools)} tools...[/bold]")
1504 |
1505 | # We'll create a task for each tool
1506 | with Progress(
1507 | TextColumn("[bold blue]{task.description}"),
1508 | BarColumn(complete_style="green", finished_style="green"),
1509 | TaskProgressColumn(),
1510 | TimeElapsedColumn(),
1511 | console=console,
1512 | expand=True
1513 | ) as progress:
1514 | # Create a task for overall progress
1515 | overall_task = progress.add_task("[cyan]Overall progress...", total=100)
1516 |
1517 | # Execute the refinement
1518 | start_time = time.time()
1519 | try:
1520 | result = await gateway.mcp.call_tool("refine_tool_documentation", params)
1521 |
1522 | # Simulate progress updates
1523 | # The actual progress is displayed through display_refinement_progress
1524 | elapsed = 0
1525 | while progress.tasks[overall_task].completed < 100 and elapsed < 120:
1526 | progress.update(overall_task, completed=min(95, elapsed * 0.8))
1527 | await asyncio.sleep(0.5)
1528 | elapsed = time.time() - start_time
1529 |
1530 | progress.update(overall_task, completed=100)
1531 |
1532 | # Track cost if available
1533 | if isinstance(result, dict) and "total_refinement_cost" in result:
1534 | tracker.add_generic_cost(
1535 | cost=result.get("total_refinement_cost", 0.0),
1536 | description=f"Multi-tool refinement ({len(selected_tools)} tools)",
1537 | provider=provider,
1538 | model=model
1539 | )
1540 |
1541 | # Display the results
1542 | display_refinement_result(
1543 | result,
1544 | console=console,
1545 | visualization_level=SETTINGS["visualization_level"],
1546 | save_to_file=SETTINGS["save_results"],
1547 | output_dir=SETTINGS["output_dir"]
1548 | )
1549 |
1550 | return result
1551 |
1552 | except Exception as e:
1553 | progress.update(overall_task, completed=100, description="[bold red]Refinement failed!")
1554 | logger.error(f"Error during multi-tool refinement: {e}", emoji_key="error", exc_info=True)
1555 | console.print(f"[bold red]Error during multi-tool refinement:[/bold red] {escape(str(e))}")
1556 | return None
1557 |
1558 |
1559 | async def demo_custom_test_generation(
1560 | gateway: Gateway,
1561 | tracker: CostTracker,
1562 | target_tool: Optional[str] = None,
1563 | refinement_provider: Optional[str] = None,
1564 | refinement_model: Optional[str] = None,
1565 | max_iterations: Optional[int] = None
1566 | ):
1567 | """Demonstrate refinement with custom test generation strategies."""
1568 | console.print(Rule("[bold cyan]Custom Test Generation Strategy[/bold cyan]", style="cyan"))
1569 |
1570 | # Choose a single tool to refine
1571 | selected_tool = None
1572 |
1573 | if target_tool:
1574 | # Check if specified tool exists
1575 | tool_list = await gateway.mcp.list_tools()
1576 | available_tools = [t.name for t in tool_list]
1577 |
1578 | if target_tool in available_tools:
1579 | selected_tool = target_tool
1580 | else:
1581 | logger.warning(f"Specified tool '{target_tool}' not found", emoji_key="warning")
1582 | console.print(f"[yellow]Warning:[/yellow] Specified tool '{target_tool}' not found. Selecting automatically.")
1583 |
1584 | # Auto-select if needed (prefer complex tools for custom test demo)
1585 | if not selected_tool:
1586 | complex_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="complex")
1587 |
1588 | if complex_tools:
1589 | selected_tool = complex_tools[0]
1590 | else:
1591 | # Fall back to medium complexity
1592 | medium_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="medium")
1593 |
1594 | if medium_tools:
1595 | selected_tool = medium_tools[0]
1596 | else:
1597 | # Last resort - any tool
1598 | simple_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="simple")
1599 |
1600 | if simple_tools:
1601 | selected_tool = simple_tools[0]
1602 |
1603 | if not selected_tool:
1604 | logger.error("No suitable tools found for custom test generation demo", emoji_key="error")
1605 | console.print("[bold red]Error:[/bold red] No suitable tools found for custom test generation demo.")
1606 | return
1607 |
1608 | console.print(f"Selected tool for custom test generation: [cyan]{selected_tool}[/cyan]")
1609 |
1610 | # Determine provider and model
1611 | provider = refinement_provider or Provider.OPENAI.value
1612 |
1613 | # Find best available model if not specified
1614 | if not refinement_model:
1615 | try:
1616 | if provider == Provider.OPENAI.value:
1617 | model = "gpt-4.1" # Prefer this for best results
1618 | # Check if model is available
1619 | provider_instance = gateway.providers.get(provider)
1620 | if provider_instance:
1621 | models = await provider_instance.list_models()
1622 | model_ids = [m.get("id") for m in models]
1623 | if model not in model_ids:
1624 | model = "gpt-4.1-mini" # Fall back to mini
1625 | elif provider == Provider.ANTHROPIC.value:
1626 | model = "claude-3-5-sonnet"
1627 | else:
1628 | # Use default model for other providers
1629 | provider_instance = gateway.providers.get(provider)
1630 | if provider_instance:
1631 | model = provider_instance.get_default_model()
1632 | else:
1633 | model = None
1634 | except Exception as e:
1635 | logger.warning(f"Error determining model for {provider}: {e}", emoji_key="warning")
1636 | model = None
1637 |
1638 | # If we still don't have a model, try a different provider
1639 | if not model:
1640 | for fallback_provider in SETTINGS["fallback_providers"]:
1641 | try:
1642 | provider_instance = gateway.providers.get(fallback_provider)
1643 | if provider_instance:
1644 | model = provider_instance.get_default_model()
1645 | provider = fallback_provider
1646 | break
1647 | except Exception:
1648 | continue
1649 |
1650 | # If still no model, use a reasonable default
1651 | if not model:
1652 | model = "gpt-4.1-mini"
1653 | provider = Provider.OPENAI.value
1654 | else:
1655 | model = refinement_model
1656 |
1657 | # Define refinement parameters with custom test generation strategy
1658 | iterations = max_iterations or 1
1659 |
1660 | params = {
1661 | "tool_names": [selected_tool],
1662 | "max_iterations": iterations,
1663 | "refinement_model_config": {
1664 | "provider": provider,
1665 | "model": model,
1666 | "temperature": 0.2,
1667 | },
1668 | # Custom test generation strategy
1669 | "generation_config": {
1670 | "positive_required_only": 3, # More tests with just required params
1671 | "positive_optional_mix": 5, # More tests with mixed optional params
1672 | "negative_type": 4, # More type validation checks
1673 | "negative_required": 3, # More tests with missing required params
1674 | "edge_boundary_min": 2, # More tests with boundary values
1675 | "edge_boundary_max": 2,
1676 | "llm_realistic_combo": 5, # More LLM-generated realistic tests
1677 | "llm_ambiguity_probe": 3, # More tests probing ambiguities
1678 | },
1679 | "validation_level": "full",
1680 | "enable_winnowing": True,
1681 | "progress_callback": display_refinement_progress,
1682 | }
1683 |
1684 | console.print(Panel(
1685 | Group(
1686 | Syntax(json.dumps({k: v for k, v in params.items() if k not in ["progress_callback", "generation_config"]}, indent=2), "json"),
1687 | "\n[bold cyan]Custom Test Generation Strategy:[/bold cyan]",
1688 | Syntax(json.dumps(params["generation_config"], indent=2), "json"),
1689 | ),
1690 | title="Custom Test Generation Parameters",
1691 | border_style="dim cyan",
1692 | expand=False
1693 | ))
1694 |
1695 | # Estimate cost (higher due to more test cases)
1696 | estimated_cost = 0.04 * iterations
1697 | console.print(f"[cyan]Estimated cost:[/cyan] ${estimated_cost:.2f} USD")
1698 |
1699 | # Check if cost would exceed limit
1700 | if estimated_cost > SETTINGS["cost_limit"]:
1701 | console.print(Panel(
1702 | f"Estimated cost (${estimated_cost:.2f}) exceeds the set limit (${SETTINGS['cost_limit']:.2f}).\n"
1703 | "Reducing iterations to stay within budget.",
1704 | title="⚠️ Cost Limit Warning",
1705 | border_style="yellow",
1706 | expand=False
1707 | ))
1708 | # Adjust iterations to stay under limit
1709 | params["max_iterations"] = 1
1710 |
1711 | # Create a progress display
1712 | console.print("\n[bold cyan]Custom Test Generation Progress:[/bold cyan]")
1713 | detail_console.print(f"\n[bold]Starting refinement with custom test strategy for {selected_tool}...[/bold]")
1714 |
1715 | with Progress(
1716 | TextColumn("[bold blue]{task.description}"),
1717 | BarColumn(complete_style="green", finished_style="green"),
1718 | TaskProgressColumn(),
1719 | TimeElapsedColumn(),
1720 | console=console,
1721 | expand=True
1722 | ) as progress:
1723 | task_id = progress.add_task("[cyan]Refining with custom test strategy...", total=100)
1724 |
1725 | # Execute the refinement
1726 | start_time = time.time()
1727 | try:
1728 | result = await gateway.mcp.call_tool("refine_tool_documentation", params)
1729 |
1730 | # Simulate progress updates
1731 | elapsed = 0
1732 | while progress.tasks[task_id].completed < 100 and elapsed < 60:
1733 | progress.update(task_id, completed=min(95, elapsed * 1.5))
1734 | await asyncio.sleep(0.5)
1735 | elapsed = time.time() - start_time
1736 |
1737 | progress.update(task_id, completed=100)
1738 |
1739 | # Track cost if available
1740 | if isinstance(result, dict) and "total_refinement_cost" in result:
1741 | tracker.add_generic_cost(
1742 | cost=result.get("total_refinement_cost", 0.0),
1743 | description=f"Custom test strategy for {selected_tool}",
1744 | provider=provider,
1745 | model=model
1746 | )
1747 |
1748 | # Display the results
1749 | display_refinement_result(
1750 | result,
1751 | console=console,
1752 | visualization_level=SETTINGS["visualization_level"],
1753 | save_to_file=SETTINGS["save_results"],
1754 | output_dir=SETTINGS["output_dir"]
1755 | )
1756 |
1757 | return result
1758 |
1759 | except Exception as e:
1760 | progress.update(task_id, completed=100, description="[bold red]Refinement failed!")
1761 | logger.error(f"Error during custom test generation: {e}", emoji_key="error", exc_info=True)
1762 | console.print(f"[bold red]Error during custom test generation:[/bold red] {escape(str(e))}")
1763 | return None
1764 |
1765 |
1766 | async def demo_all_tools_refinement(
1767 | gateway: Gateway,
1768 | tracker: CostTracker,
1769 | refinement_provider: Optional[str] = None,
1770 | refinement_model: Optional[str] = None,
1771 | max_iterations: Optional[int] = None
1772 | ):
1773 | """Demonstrate refining documentation for all available tools."""
1774 | console.print(Rule("[bold cyan]All Tools Refinement[/bold cyan]", style="cyan"))
1775 |
1776 | # Get all available tools (excluding refine_tool_documentation itself)
1777 | tool_list = await gateway.mcp.list_tools()
1778 | available_tools = [
1779 | t.name for t in tool_list
1780 | if t.name != "refine_tool_documentation"
1781 | ]
1782 |
1783 | if not available_tools:
1784 | logger.error("No tools available for refinement", emoji_key="error")
1785 | console.print("[bold red]Error:[/bold red] No tools available for refinement.")
1786 | return
1787 |
1788 | console.print(f"[cyan]Found {len(available_tools)} tools available for refinement[/cyan]")
1789 |
1790 | # Determine provider and model
1791 | provider = refinement_provider or Provider.OPENAI.value
1792 |
1793 | # Find best available model if not specified
1794 | if not refinement_model:
1795 | try:
1796 | if provider == Provider.OPENAI.value:
1797 | model = "gpt-4.1-mini" # Use mini for multi-tool to save cost
1798 | # Check if model is available
1799 | provider_instance = gateway.providers.get(provider)
1800 | if provider_instance:
1801 | models = await provider_instance.list_models()
1802 | model_ids = [m.get("id") for m in models]
1803 | if model not in model_ids:
1804 | model = provider_instance.get_default_model()
1805 | elif provider == Provider.ANTHROPIC.value:
1806 | model = "claude-3-5-haiku"
1807 | else:
1808 | # Use default model for other providers
1809 | provider_instance = gateway.providers.get(provider)
1810 | if provider_instance:
1811 | model = provider_instance.get_default_model()
1812 | else:
1813 | model = None
1814 | except Exception as e:
1815 | logger.warning(f"Error determining model for {provider}: {e}", emoji_key="warning")
1816 | model = None
1817 |
1818 | # If we still don't have a model, try a different provider
1819 | if not model:
1820 | for fallback_provider in SETTINGS["fallback_providers"]:
1821 | try:
1822 | provider_instance = gateway.providers.get(fallback_provider)
1823 | if provider_instance:
1824 | model = provider_instance.get_default_model()
1825 | provider = fallback_provider
1826 | break
1827 | except Exception:
1828 | continue
1829 |
1830 | # If still no model, use a reasonable default
1831 | if not model:
1832 | model = "gpt-4.1-mini"
1833 | provider = Provider.OPENAI.value
1834 | else:
1835 | model = refinement_model
1836 |
1837 | # Define refinement parameters
1838 | iterations = max_iterations or 1 # Default to 1 for all-tools
1839 |
1840 | params = {
1841 | "refine_all_available": True, # This is the key difference for this demo
1842 | "max_iterations": iterations,
1843 | "refinement_model_config": {
1844 | "provider": provider,
1845 | "model": model,
1846 | "temperature": 0.3,
1847 | },
1848 | "validation_level": "basic", # Use basic validation for speed
1849 | "enable_winnowing": False, # Skip winnowing for demo speed
1850 | "progress_callback": display_refinement_progress,
1851 | }
1852 |
1853 | console.print(Panel(
1854 | Syntax(json.dumps({k: v for k, v in params.items() if k != "progress_callback"}, indent=2), "json"),
1855 | title="All Tools Refinement Parameters",
1856 | border_style="dim cyan",
1857 | expand=False
1858 | ))
1859 |
1860 | # Estimate cost - higher with multiple tools
1861 | estimated_cost = 0.01 * iterations * len(available_tools) # Lower per-tool cost with bulk processing
1862 | console.print(f"[cyan]Estimated cost:[/cyan] ${estimated_cost:.2f} USD")
1863 |
1864 | # Check if cost would exceed limit
1865 | if estimated_cost > SETTINGS["cost_limit"]:
1866 | console.print(Panel(
1867 | f"Estimated cost (${estimated_cost:.2f}) exceeds the set limit (${SETTINGS['cost_limit']:.2f}).\n"
1868 | "Switching to targeted refinement to stay within budget.",
1869 | title="⚠️ Cost Limit Warning",
1870 | border_style="yellow",
1871 | expand=False
1872 | ))
1873 |
1874 | # Switch to using targeted tool_names instead of refine_all_available
1875 | max_tools = max(1, int(SETTINGS["cost_limit"] / (0.02 * iterations)))
1876 | selected_tools = random.sample(available_tools, min(max_tools, len(available_tools)))
1877 |
1878 | params["refine_all_available"] = False
1879 | params["tool_names"] = selected_tools
1880 |
1881 | console.print(f"[yellow]Reducing to {len(selected_tools)} randomly selected tools[/yellow]")
1882 |
1883 | # Create a progress display
1884 | console.print("\n[bold cyan]All Tools Refinement Progress:[/bold cyan]")
1885 | detail_console.print(f"\n[bold]Starting refinement for all {len(available_tools)} tools...[/bold]")
1886 |
1887 | with Progress(
1888 | SpinnerColumn(),
1889 | TextColumn("[bold blue]{task.description}"),
1890 | BarColumn(complete_style="green", finished_style="green"),
1891 | TaskProgressColumn(),
1892 | TimeRemainingColumn(),
1893 | console=console,
1894 | expand=True
1895 | ) as progress:
1896 | task_id = progress.add_task("[cyan]Refining all tools...", total=100)
1897 |
1898 | # Execute the refinement
1899 | start_time = time.time()
1900 | try:
1901 | result = await gateway.mcp.call_tool("refine_tool_documentation", params)
1902 |
1903 | # Simulate progress updates
1904 | elapsed = 0
1905 | while progress.tasks[task_id].completed < 100 and elapsed < 300: # Longer timeout for all tools
1906 | progress.update(task_id, completed=min(95, elapsed * 0.3)) # Slower progress for more tools
1907 | await asyncio.sleep(1.0)
1908 | elapsed = time.time() - start_time
1909 |
1910 | progress.update(task_id, completed=100)
1911 |
1912 | # Track cost if available
1913 | if isinstance(result, dict) and "total_refinement_cost" in result:
1914 | tracker.add_generic_cost(
1915 | cost=result.get("total_refinement_cost", 0.0),
1916 | description=f"All tools refinement ({len(available_tools)} tools)",
1917 | provider=provider,
1918 | model=model
1919 | )
1920 |
1921 | # Display the results
1922 | display_refinement_result(
1923 | result,
1924 | console=console,
1925 | visualization_level=SETTINGS["visualization_level"],
1926 | save_to_file=SETTINGS["save_results"],
1927 | output_dir=SETTINGS["output_dir"]
1928 | )
1929 |
1930 | return result
1931 |
1932 | except Exception as e:
1933 | progress.update(task_id, completed=100, description="[bold red]Refinement failed!")
1934 | logger.error(f"Error during all tools refinement: {e}", emoji_key="error", exc_info=True)
1935 | console.print(f"[bold red]Error during all tools refinement:[/bold red] {escape(str(e))}")
1936 | return None
1937 |
1938 |
1939 | async def demo_schema_focused_refinement(
1940 | gateway: Gateway,
1941 | tracker: CostTracker,
1942 | target_tool: Optional[str] = None,
1943 | refinement_provider: Optional[str] = None,
1944 | refinement_model: Optional[str] = None
1945 | ):
1946 | """Demonstrate refinement focused specifically on schema improvements."""
1947 | console.print(Rule("[bold cyan]Schema-Focused Refinement[/bold cyan]", style="cyan"))
1948 |
1949 | # Choose a complex tool to refine
1950 | selected_tool = None
1951 |
1952 | if target_tool:
1953 | # Check if specified tool exists
1954 | tool_list = await gateway.mcp.list_tools()
1955 | available_tools = [t.name for t in tool_list]
1956 |
1957 | if target_tool in available_tools:
1958 | selected_tool = target_tool
1959 | else:
1960 | logger.warning(f"Specified tool '{target_tool}' not found", emoji_key="warning")
1961 | console.print(f"[yellow]Warning:[/yellow] Specified tool '{target_tool}' not found. Selecting automatically.")
1962 |
1963 | # Auto-select if needed (prefer complex tools for schema refinement)
1964 | if not selected_tool:
1965 | complex_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="complex")
1966 |
1967 | if complex_tools:
1968 | selected_tool = complex_tools[0]
1969 | else:
1970 | # Fall back to medium complexity
1971 | medium_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="medium")
1972 |
1973 | if medium_tools:
1974 | selected_tool = medium_tools[0]
1975 | else:
1976 | # Last resort - any tool
1977 | simple_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="simple")
1978 |
1979 | if simple_tools:
1980 | selected_tool = simple_tools[0]
1981 |
1982 | if not selected_tool:
1983 | logger.error("No suitable tools found for schema-focused refinement demo", emoji_key="error")
1984 | console.print("[bold red]Error:[/bold red] No suitable tools found for schema-focused refinement demo.")
1985 | return
1986 |
1987 | console.print(f"Selected tool for schema-focused refinement: [cyan]{selected_tool}[/cyan]")
1988 |
1989 | # Get tool schema
1990 | tool_list = await gateway.mcp.list_tools()
1991 | tool_def = next((t for t in tool_list if t.name == selected_tool), None)
1992 |
1993 | if not tool_def or not hasattr(tool_def, "inputSchema"):
1994 | logger.error(f"Could not get schema for tool {selected_tool}", emoji_key="error")
1995 | console.print(f"[bold red]Error:[/bold red] Could not get schema for tool {selected_tool}.")
1996 | return
1997 |
1998 | input_schema = getattr(tool_def, "inputSchema", {})
1999 |
2000 | # Display the original schema
2001 | console.print("[bold cyan]Original Schema:[/bold cyan]")
2002 | console.print(Panel(
2003 | Syntax(json.dumps(input_schema, indent=2), "json", theme="default", line_numbers=False),
2004 | title="Original Input Schema",
2005 | border_style="dim cyan",
2006 | expand=False
2007 | ))
2008 |
2009 | # Determine provider and model
2010 | provider = refinement_provider or Provider.OPENAI.value
2011 |
2012 | # Find best available model if not specified
2013 | if not refinement_model:
2014 | try:
2015 | if provider == Provider.OPENAI.value:
2016 | model = "gpt-4.1" # Prefer this for best schema analysis
2017 | # Check if model is available
2018 | provider_instance = gateway.providers.get(provider)
2019 | if provider_instance:
2020 | models = await provider_instance.list_models()
2021 | model_ids = [m.get("id") for m in models]
2022 | if model not in model_ids:
2023 | model = "gpt-4.1-mini" # Fall back to mini
2024 | elif provider == Provider.ANTHROPIC.value:
2025 | model = "claude-3-5-sonnet"
2026 | else:
2027 | # Use default model for other providers
2028 | provider_instance = gateway.providers.get(provider)
2029 | if provider_instance:
2030 | model = provider_instance.get_default_model()
2031 | else:
2032 | model = None
2033 | except Exception as e:
2034 | logger.warning(f"Error determining model for {provider}: {e}", emoji_key="warning")
2035 | model = None
2036 |
2037 | # If we still don't have a model, try a different provider
2038 | if not model:
2039 | for fallback_provider in SETTINGS["fallback_providers"]:
2040 | try:
2041 | provider_instance = gateway.providers.get(fallback_provider)
2042 | if provider_instance:
2043 | model = provider_instance.get_default_model()
2044 | provider = fallback_provider
2045 | break
2046 | except Exception:
2047 | continue
2048 |
2049 | # If still no model, use a reasonable default
2050 | if not model:
2051 | model = "gpt-4.1-mini"
2052 | provider = Provider.OPENAI.value
2053 | else:
2054 | model = refinement_model
2055 |
2056 | # Define refinement parameters focused on schema improvements
2057 | params = {
2058 | "tool_names": [selected_tool],
2059 | "max_iterations": 1, # Single iteration focused on schema
2060 | "refinement_model_config": {
2061 | "provider": provider,
2062 | "model": model,
2063 | "temperature": 0.2,
2064 | },
2065 | # Custom test generation strategy focused on schema edge cases
2066 | "generation_config": {
2067 | "positive_required_only": 2,
2068 | "positive_optional_mix": 3,
2069 | "negative_type": 4, # More type validation checks
2070 | "negative_required": 3, # More tests with missing required params
2071 | "negative_enum": 3, # More enum testing
2072 | "negative_format": 3, # More format testing
2073 | "negative_range": 3, # More range testing
2074 | "negative_length": 3, # More length testing
2075 | "negative_pattern": 3, # More pattern testing
2076 | "edge_boundary_min": 3, # More tests with min boundary values
2077 | "edge_boundary_max": 3, # More tests with max boundary values
2078 | "llm_ambiguity_probe": 2, # Probe for ambiguities
2079 | },
2080 | "validation_level": "full", # Strict validation
2081 | "enable_winnowing": False, # No winnowing needed
2082 | "progress_callback": display_refinement_progress,
2083 | }
2084 |
2085 | console.print(Panel(
2086 | Syntax(json.dumps({k: v for k, v in params.items() if k not in ["progress_callback", "generation_config"]}, indent=2), "json"),
2087 | title="Schema-Focused Refinement Parameters",
2088 | border_style="dim cyan",
2089 | expand=False
2090 | ))
2091 |
2092 | # Estimate cost
2093 | estimated_cost = 0.035 # Schema focus costs a bit more due to edge case testing
2094 | console.print(f"[cyan]Estimated cost:[/cyan] ${estimated_cost:.2f} USD")
2095 |
2096 | # Create a progress display
2097 | console.print("\n[bold cyan]Schema-Focused Refinement Progress:[/bold cyan]")
2098 | detail_console.print(f"\n[bold]Starting schema-focused refinement for {selected_tool}...[/bold]")
2099 |
2100 | with Progress(
2101 | TextColumn("[bold blue]{task.description}"),
2102 | BarColumn(complete_style="green", finished_style="green"),
2103 | TaskProgressColumn(),
2104 | TimeElapsedColumn(),
2105 | console=console,
2106 | expand=True
2107 | ) as progress:
2108 | task_id = progress.add_task("[cyan]Refining schema...", total=100)
2109 |
2110 | # Execute the refinement
2111 | start_time = time.time()
2112 | try:
2113 | result = await gateway.mcp.call_tool("refine_tool_documentation", params)
2114 |
2115 | # Simulate progress updates
2116 | elapsed = 0
2117 | while progress.tasks[task_id].completed < 100 and elapsed < 60:
2118 | progress.update(task_id, completed=min(95, elapsed * 1.5))
2119 | await asyncio.sleep(0.5)
2120 | elapsed = time.time() - start_time
2121 |
2122 | progress.update(task_id, completed=100)
2123 |
2124 | # Track cost if available
2125 | if isinstance(result, dict) and "total_refinement_cost" in result:
2126 | tracker.add_generic_cost(
2127 | cost=result.get("total_refinement_cost", 0.0),
2128 | description=f"Schema-focused refinement of {selected_tool}",
2129 | provider=provider,
2130 | model=model
2131 | )
2132 |
2133 | # Extract schema patches from the result
2134 | refined_tools = result.get("refined_tools", [])
2135 | target_tool_result = next((t for t in refined_tools if t.get("tool_name") == selected_tool), None)
2136 |
2137 | if target_tool_result and target_tool_result.get("final_proposed_schema_patches"):
2138 | schema_patches = target_tool_result.get("final_proposed_schema_patches", [])
2139 | patched_schema = target_tool_result.get("final_schema_after_patches", {})
2140 |
2141 | if schema_patches:
2142 | console.print("[bold green]Schema Refinement Results:[/bold green]")
2143 |
2144 | console.print(Panel(
2145 | Syntax(json.dumps(schema_patches, indent=2), "json", theme="default", line_numbers=False),
2146 | title="Applied Schema Patches",
2147 | border_style="magenta",
2148 | expand=False
2149 | ))
2150 |
2151 | if patched_schema:
2152 | console.print(Panel(
2153 | Syntax(json.dumps(patched_schema, indent=2), "json", theme="default", line_numbers=False),
2154 | title="Refined Schema",
2155 | border_style="green",
2156 | expand=False
2157 | ))
2158 |
2159 | # Generate a side-by-side comparison
2160 | console.print(create_side_by_side_diff(
2161 | json.dumps(input_schema, indent=2),
2162 | json.dumps(patched_schema, indent=2),
2163 | title="Schema Before/After Comparison"
2164 | ))
2165 | else:
2166 | console.print("[yellow]No schema patches were applied.[/yellow]")
2167 |
2168 | # Display the full results
2169 | display_refinement_result(
2170 | result,
2171 | console=console,
2172 | visualization_level=SETTINGS["visualization_level"],
2173 | save_to_file=SETTINGS["save_results"],
2174 | output_dir=SETTINGS["output_dir"]
2175 | )
2176 |
2177 | return result
2178 |
2179 | except Exception as e:
2180 | progress.update(task_id, completed=100, description="[bold red]Refinement failed!")
2181 | logger.error(f"Error during schema-focused refinement: {e}", emoji_key="error", exc_info=True)
2182 | console.print(f"[bold red]Error during schema-focused refinement:[/bold red] {escape(str(e))}")
2183 | return None
2184 |
2185 |
2186 | async def demo_model_comparison(
2187 | gateway: Gateway,
2188 | tracker: CostTracker,
2189 | target_tool: Optional[str] = None
2190 | ):
2191 | """Demonstrate comparing different LLM models for refinement."""
2192 | console.print(Rule("[bold cyan]Model Comparison for Refinement[/bold cyan]", style="cyan"))
2193 |
2194 | # Choose a single tool to refine
2195 | selected_tool = None
2196 |
2197 | if target_tool:
2198 | # Check if specified tool exists
2199 | tool_list = await gateway.mcp.list_tools()
2200 | available_tools = [t.name for t in tool_list]
2201 |
2202 | if target_tool in available_tools:
2203 | selected_tool = target_tool
2204 | else:
2205 | logger.warning(f"Specified tool '{target_tool}' not found", emoji_key="warning")
2206 | console.print(f"[yellow]Warning:[/yellow] Specified tool '{target_tool}' not found. Selecting automatically.")
2207 |
2208 | # Auto-select if needed
2209 | if not selected_tool:
2210 | medium_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="medium")
2211 |
2212 | if medium_tools:
2213 | selected_tool = medium_tools[0]
2214 | else:
2215 | # Fall back to any available tool
2216 | simple_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="simple")
2217 |
2218 | if simple_tools:
2219 | selected_tool = simple_tools[0]
2220 |
2221 | if not selected_tool:
2222 | logger.error("No suitable tools found for model comparison demo", emoji_key="error")
2223 | console.print("[bold red]Error:[/bold red] No suitable tools found for model comparison demo.")
2224 | return
2225 |
2226 | console.print(f"Selected tool for model comparison: [cyan]{selected_tool}[/cyan]")
2227 |
2228 | # Define models to compare
2229 | models_to_compare = []
2230 |
2231 | # Check which models are available
2232 | for provider_name in SETTINGS["preferred_providers"] + SETTINGS["fallback_providers"]:
2233 | provider_instance = gateway.providers.get(provider_name)
2234 | if provider_instance:
2235 | try:
2236 | available_models = await provider_instance.list_models()
2237 | model_ids = [m.get("id") for m in available_models]
2238 |
2239 | if provider_name == Provider.OPENAI.value:
2240 | if "gpt-4.1" in model_ids:
2241 | models_to_compare.append((provider_name, "gpt-4.1"))
2242 | if "gpt-4.1-mini" in model_ids:
2243 | models_to_compare.append((provider_name, "gpt-4.1-mini"))
2244 |
2245 | elif provider_name == Provider.ANTHROPIC.value:
2246 | if "claude-3-5-sonnet" in model_ids:
2247 | models_to_compare.append((provider_name, "claude-3-5-sonnet"))
2248 | if "claude-3-5-haiku" in model_ids:
2249 | models_to_compare.append((provider_name, "claude-3-5-haiku"))
2250 |
2251 | elif provider_name == Provider.GEMINI.value:
2252 | if "gemini-2.0-pro" in model_ids:
2253 | models_to_compare.append((provider_name, "gemini-2.0-pro"))
2254 |
2255 | elif provider_name == Provider.DEEPSEEK.value:
2256 | if "deepseek-chat" in model_ids:
2257 | models_to_compare.append((provider_name, "deepseek-chat"))
2258 |
2259 | # If we already have 3+ models, stop looking
2260 | if len(models_to_compare) >= 3:
2261 | break
2262 |
2263 | except Exception as e:
2264 | logger.warning(f"Error listing models for {provider_name}: {e}", emoji_key="warning")
2265 |
2266 | # If we don't have enough models, add some defaults that might work
2267 | if len(models_to_compare) < 2:
2268 | fallback_models = [
2269 | (Provider.OPENAI.value, "gpt-4.1-mini"),
2270 | (Provider.ANTHROPIC.value, "claude-3-5-haiku"),
2271 | (Provider.GEMINI.value, "gemini-2.0-pro")
2272 | ]
2273 |
2274 | for provider, model in fallback_models:
2275 | if (provider, model) not in models_to_compare:
2276 | models_to_compare.append((provider, model))
2277 | if len(models_to_compare) >= 3:
2278 | break
2279 |
2280 | # Limit to max 3 models for a reasonable comparison
2281 | models_to_compare = models_to_compare[:3]
2282 |
2283 | if not models_to_compare:
2284 | logger.error("No models available for comparison", emoji_key="error")
2285 | console.print("[bold red]Error:[/bold red] No models available for comparison.")
2286 | return
2287 |
2288 | console.print(f"Models being compared: [cyan]{', '.join([f'{p}/{m}' for p, m in models_to_compare])}[/cyan]")
2289 |
2290 | # Estimate total cost
2291 | estimated_cost = 0.03 * len(models_to_compare)
2292 | console.print(f"[cyan]Estimated total cost:[/cyan] ${estimated_cost:.2f} USD")
2293 |
2294 | # Check if cost would exceed limit
2295 | if estimated_cost > SETTINGS["cost_limit"]:
2296 | console.print(Panel(
2297 | f"Estimated cost (${estimated_cost:.2f}) exceeds the set limit (${SETTINGS['cost_limit']:.2f}).\n"
2298 | "Reducing the number of models to compare.",
2299 | title="⚠️ Cost Limit Warning",
2300 | border_style="yellow",
2301 | expand=False
2302 | ))
2303 | max_models = max(2, int(SETTINGS["cost_limit"] / 0.03))
2304 | models_to_compare = models_to_compare[:max_models]
2305 | console.print(f"[yellow]Comparing only: {', '.join([f'{p}/{m}' for p, m in models_to_compare])}[/yellow]")
2306 |
2307 | # Create a progress display
2308 | console.print("\n[bold cyan]Model Comparison Progress:[/bold cyan]")
2309 |
2310 | # Results storage
2311 | model_results = {}
2312 |
2313 | # Run refinement with each model
2314 | for provider, model in models_to_compare:
2315 | detail_console.print(f"\n[bold]Starting refinement with {provider}/{model}...[/bold]")
2316 |
2317 | params = {
2318 | "tool_names": [selected_tool],
2319 | "max_iterations": 1,
2320 | "refinement_model_config": {
2321 | "provider": provider,
2322 | "model": model,
2323 | "temperature": 0.2,
2324 | },
2325 | "validation_level": "basic",
2326 | "enable_winnowing": False,
2327 | "progress_callback": display_refinement_progress,
2328 | }
2329 |
2330 | with Progress(
2331 | TextColumn(f"[bold blue]Testing {provider}/{model}..."),
2332 | BarColumn(complete_style="green", finished_style="green"),
2333 | TaskProgressColumn(),
2334 | TimeElapsedColumn(),
2335 | console=console,
2336 | expand=True
2337 | ) as progress:
2338 | task_id = progress.add_task(f"[cyan]Refining with {model}...", total=100)
2339 |
2340 | # Execute the refinement
2341 | start_time = time.time()
2342 | try:
2343 | result = await gateway.mcp.call_tool("refine_tool_documentation", params)
2344 |
2345 | # Simulate progress updates
2346 | elapsed = 0
2347 | while progress.tasks[task_id].completed < 100 and elapsed < 60:
2348 | progress.update(task_id, completed=min(95, elapsed * 1.5))
2349 | await asyncio.sleep(0.5)
2350 | elapsed = time.time() - start_time
2351 |
2352 | progress.update(task_id, completed=100)
2353 |
2354 | # Track cost if available
2355 | if isinstance(result, dict) and "total_refinement_cost" in result:
2356 | tracker.add_generic_cost(
2357 | cost=result.get("total_refinement_cost", 0.0),
2358 | description=f"{provider}/{model} refinement of {selected_tool}",
2359 | provider=provider,
2360 | model=model
2361 | )
2362 |
2363 | # Store result for comparison
2364 | model_results[(provider, model)] = {
2365 | "result": result,
2366 | "processing_time": time.time() - start_time,
2367 | "cost": result.get("total_refinement_cost", 0.0) if isinstance(result, dict) else 0.0
2368 | }
2369 |
2370 | except Exception as e:
2371 | progress.update(task_id, completed=100, description=f"[bold red]{model} failed!")
2372 | logger.error(f"Error during refinement with {provider}/{model}: {e}", emoji_key="error", exc_info=True)
2373 | console.print(f"[bold red]Error during refinement with {provider}/{model}:[/bold red] {escape(str(e))}")
2374 |
2375 | # Compare and display results
2376 | if model_results:
2377 | console.print(Rule("[bold blue]Model Comparison Results[/bold blue]", style="blue"))
2378 |
2379 | # Create comparison table
2380 | comparison_table = Table(title="Model Performance Comparison", box=box.ROUNDED)
2381 | comparison_table.add_column("Model", style="cyan")
2382 | comparison_table.add_column("Initial Success", style="dim yellow")
2383 | comparison_table.add_column("Final Success", style="green")
2384 | comparison_table.add_column("Improvement", style="magenta")
2385 | comparison_table.add_column("Processing Time", style="blue")
2386 | comparison_table.add_column("Cost", style="red")
2387 |
2388 | for (provider, model), data in model_results.items():
2389 | result = data["result"]
2390 | refined_tools = result.get("refined_tools", [])
2391 |
2392 | # Find the specific tool result
2393 | tool_result = next((t for t in refined_tools if t.get("tool_name") == selected_tool), None)
2394 |
2395 | if tool_result:
2396 | initial_success = tool_result.get("initial_success_rate", 0.0)
2397 | final_success = tool_result.get("final_success_rate", 0.0)
2398 | improvement = tool_result.get("improvement_factor", 0.0)
2399 |
2400 | comparison_table.add_row(
2401 | f"{provider}/{model}",
2402 | f"{initial_success:.1%}",
2403 | f"{final_success:.1%}",
2404 | f"{improvement:.2f}x",
2405 | f"{data['processing_time']:.2f}s",
2406 | f"${data['cost']:.6f}"
2407 | )
2408 |
2409 | console.print(comparison_table)
2410 |
2411 | # Find the best model
2412 | best_model = None
2413 | best_improvement = -1
2414 |
2415 | for (provider, model), data in model_results.items():
2416 | result = data["result"]
2417 | refined_tools = result.get("refined_tools", [])
2418 | tool_result = next((t for t in refined_tools if t.get("tool_name") == selected_tool), None)
2419 |
2420 | if tool_result:
2421 | improvement = tool_result.get("improvement_factor", 0.0)
2422 | if improvement > best_improvement:
2423 | best_improvement = improvement
2424 | best_model = (provider, model)
2425 |
2426 | if best_model:
2427 | console.print(f"[bold green]Best model:[/bold green] [cyan]{best_model[0]}/{best_model[1]}[/cyan] with {best_improvement:.2f}x improvement")
2428 |
2429 | # Show detailed results for the best model
2430 | best_data = model_results[best_model]
2431 | console.print("\n[bold cyan]Detailed Results for Best Model:[/bold cyan]")
2432 |
2433 | display_refinement_result(
2434 | best_data["result"],
2435 | console=console,
2436 | visualization_level=SETTINGS["visualization_level"],
2437 | save_to_file=SETTINGS["save_results"],
2438 | output_dir=SETTINGS["output_dir"]
2439 | )
2440 |
2441 | return model_results
2442 | else:
2443 | console.print("[yellow]No results available for comparison.[/yellow]")
2444 | return None
2445 |
2446 |
2447 | async def demo_cost_optimization(
2448 | gateway: Gateway,
2449 | tracker: CostTracker,
2450 | target_tool: Optional[str] = None
2451 | ):
2452 | """Demonstrate cost optimization techniques for documentation refinement."""
2453 | console.print(Rule("[bold cyan]Cost Optimization Techniques[/bold cyan]", style="cyan"))
2454 |
2455 | # Choose a single tool to refine
2456 | selected_tool = None
2457 |
2458 | if target_tool:
2459 | # Check if specified tool exists
2460 | tool_list = await gateway.mcp.list_tools()
2461 | available_tools = [t.name for t in tool_list]
2462 |
2463 | if target_tool in available_tools:
2464 | selected_tool = target_tool
2465 | else:
2466 | logger.warning(f"Specified tool '{target_tool}' not found", emoji_key="warning")
2467 | console.print(f"[yellow]Warning:[/yellow] Specified tool '{target_tool}' not found. Selecting automatically.")
2468 |
2469 | # Auto-select if needed
2470 | if not selected_tool:
2471 | medium_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="medium")
2472 |
2473 | if medium_tools:
2474 | selected_tool = medium_tools[0]
2475 | else:
2476 | # Fall back to any available tool
2477 | simple_tools = await get_suitable_tools(gateway.mcp, count=1, complexity="simple")
2478 |
2479 | if simple_tools:
2480 | selected_tool = simple_tools[0]
2481 |
2482 | if not selected_tool:
2483 | logger.error("No suitable tools found for cost optimization demo", emoji_key="error")
2484 | console.print("[bold red]Error:[/bold red] No suitable tools found for cost optimization demo.")
2485 | return
2486 |
2487 | console.print(f"Selected tool for cost optimization: [cyan]{selected_tool}[/cyan]")
2488 |
2489 | # Create a table of optimization techniques
2490 | optimization_table = Table(title="Cost Optimization Techniques", box=box.SIMPLE_HEAD)
2491 | optimization_table.add_column("Technique", style="cyan")
2492 | optimization_table.add_column("Description", style="white")
2493 | optimization_table.add_column("Est. Savings", style="green")
2494 |
2495 | optimization_table.add_row(
2496 | "Smaller Models",
2497 | "Use smaller, faster models for initial iterations or simple tools",
2498 | "50-80%"
2499 | )
2500 | optimization_table.add_row(
2501 | "Reduced Iterations",
2502 | "Single iteration can capture most improvements",
2503 | "30-60%"
2504 | )
2505 | optimization_table.add_row(
2506 | "Basic Validation",
2507 | "Use 'basic' validation level instead of 'full'",
2508 | "10-20%"
2509 | )
2510 | optimization_table.add_row(
2511 | "Focused Strategies",
2512 | "Custom test generation focused on important cases",
2513 | "20-40%"
2514 | )
2515 | optimization_table.add_row(
2516 | "Bulk Processing",
2517 | "Refine multiple related tools at once",
2518 | "30-50%"
2519 | )
2520 | optimization_table.add_row(
2521 | "Skip Winnowing",
2522 | "Disable winnowing for quick improvements",
2523 | "5-10%"
2524 | )
2525 |
2526 | console.print(optimization_table)
2527 |
2528 | # Define and display standard vs. optimized configurations
2529 | standard_config = {
2530 | "tool_names": [selected_tool],
2531 | "max_iterations": 3,
2532 | "refinement_model_config": {
2533 | "provider": Provider.OPENAI.value,
2534 | "model": "gpt-4.1",
2535 | "temperature": 0.2,
2536 | },
2537 | "validation_level": "full",
2538 | "enable_winnowing": True
2539 | }
2540 |
2541 | optimized_config = {
2542 | "tool_names": [selected_tool],
2543 | "max_iterations": 1,
2544 | "refinement_model_config": {
2545 | "provider": Provider.OPENAI.value,
2546 | "model": "gpt-4.1-mini",
2547 | "temperature": 0.3,
2548 | },
2549 | "validation_level": "basic",
2550 | "enable_winnowing": False,
2551 | # Focused test generation to save costs
2552 | "generation_config": {
2553 | "positive_required_only": 2,
2554 | "positive_optional_mix": 2,
2555 | "negative_type": 2,
2556 | "negative_required": 1,
2557 | "negative_enum": 0,
2558 | "negative_format": 0,
2559 | "negative_range": 0,
2560 | "negative_length": 0,
2561 | "negative_pattern": 0,
2562 | "edge_empty": 0,
2563 | "edge_null": 0,
2564 | "edge_boundary_min": 0,
2565 | "edge_boundary_max": 0,
2566 | "llm_realistic_combo": 2,
2567 | "llm_ambiguity_probe": 1,
2568 | "llm_simulation_based": 0
2569 | }
2570 | }
2571 |
2572 | # Compare costs
2573 | standard_est_cost = 0.09 # 3 iterations with gpt-4.1
2574 | optimized_est_cost = 0.015 # 1 iteration with gpt-4.1-mini and reduced tests
2575 | savings_pct = ((standard_est_cost - optimized_est_cost) / standard_est_cost) * 100
2576 |
2577 | console.print(Panel(
2578 | Group(
2579 | "[bold]Standard Config:[/bold]",
2580 | Syntax(json.dumps(standard_config, indent=2), "json", theme="default", line_numbers=False),
2581 | f"[yellow]Estimated Cost: ${standard_est_cost:.3f}[/yellow]",
2582 | "\n[bold]Optimized Config:[/bold]",
2583 | Syntax(json.dumps(optimized_config, indent=2), "json", theme="default", line_numbers=False),
2584 | f"[green]Estimated Cost: ${optimized_est_cost:.3f}[/green]",
2585 | f"\n[bold cyan]Estimated Savings: {savings_pct:.1f}%[/bold cyan]"
2586 | ),
2587 | title="Cost Comparison",
2588 | border_style="dim cyan",
2589 | expand=False
2590 | ))
2591 |
2592 | # Run the optimized configuration
2593 | console.print("\n[bold cyan]Running Cost-Optimized Refinement:[/bold cyan]")
2594 | detail_console.print(f"\n[bold]Starting cost-optimized refinement for {selected_tool}...[/bold]")
2595 |
2596 | # Add progress callback
2597 | optimized_config["progress_callback"] = display_refinement_progress
2598 |
2599 | with Progress(
2600 | TextColumn("[bold blue]{task.description}"),
2601 | BarColumn(complete_style="green", finished_style="green"),
2602 | TaskProgressColumn(),
2603 | TimeElapsedColumn(),
2604 | console=console,
2605 | expand=True
2606 | ) as progress:
2607 | task_id = progress.add_task("[cyan]Running cost-optimized refinement...", total=100)
2608 |
2609 | # Execute the refinement
2610 | start_time = time.time()
2611 | try:
2612 | result = await gateway.mcp.call_tool("refine_tool_documentation", optimized_config)
2613 |
2614 | # Simulate progress updates
2615 | elapsed = 0
2616 | while progress.tasks[task_id].completed < 100 and elapsed < 30:
2617 | progress.update(task_id, completed=min(95, elapsed * 3)) # Faster progress for optimized mode
2618 | await asyncio.sleep(0.5)
2619 | elapsed = time.time() - start_time
2620 |
2621 | progress.update(task_id, completed=100)
2622 |
2623 | # Track cost if available
2624 | if isinstance(result, dict) and "total_refinement_cost" in result:
2625 | actual_cost = result.get("total_refinement_cost", 0.0)
2626 | tracker.add_generic_cost(
2627 | cost=actual_cost,
2628 | description=f"Cost-optimized refinement of {selected_tool}",
2629 | provider=optimized_config["refinement_model_config"]["provider"],
2630 | model=optimized_config["refinement_model_config"]["model"]
2631 | )
2632 |
2633 | # Compare estimated vs actual cost
2634 | console.print("[bold cyan]Cost Analysis:[/bold cyan]")
2635 | console.print(f"Estimated Cost: ${optimized_est_cost:.3f}")
2636 | console.print(f"Actual Cost: ${actual_cost:.3f}")
2637 | console.print(f"Actual Savings vs. Standard: {((standard_est_cost - actual_cost) / standard_est_cost) * 100:.1f}%")
2638 |
2639 | # Display the results
2640 | display_refinement_result(
2641 | result,
2642 | console=console,
2643 | visualization_level=SETTINGS["visualization_level"],
2644 | save_to_file=SETTINGS["save_results"],
2645 | output_dir=SETTINGS["output_dir"]
2646 | )
2647 |
2648 | return result
2649 |
2650 | except Exception as e:
2651 | progress.update(task_id, completed=100, description="[bold red]Refinement failed!")
2652 | logger.error(f"Error during cost-optimized refinement: {e}", emoji_key="error", exc_info=True)
2653 | console.print(f"[bold red]Error during cost-optimized refinement:[/bold red] {escape(str(e))}")
2654 | return None
2655 |
2656 | async def demo_practical_testing(
2657 | gateway: Gateway,
2658 | tracker: CostTracker
2659 | ):
2660 | """Demonstrate practical testing with flawed examples."""
2661 | console.print(Rule("[bold cyan]Practical Testing with Flawed Tools[/bold cyan]", style="cyan"))
2662 |
2663 | # Check if we have flawed example tools
2664 | created_tools = await create_flawed_example_tools(gateway.mcp)
2665 |
2666 | if not created_tools:
2667 | logger.error("Failed to create flawed example tools", emoji_key="error")
2668 | console.print("[bold red]Error:[/bold red] Failed to create flawed example tools for demonstration.")
2669 | return
2670 |
2671 | console.print(f"Created {len(created_tools)} flawed example tools for practical testing:\n" +
2672 | "\n".join([f"- [cyan]{name}[/cyan]" for name in created_tools]))
2673 |
2674 | # Get details on the intentional flaws
2675 | flaws_table = Table(title="Intentional Documentation Flaws", box=box.ROUNDED)
2676 | flaws_table.add_column("Tool", style="cyan")
2677 | flaws_table.add_column("Flaw Type", style="yellow")
2678 | flaws_table.add_column("Description", style="white")
2679 |
2680 | flaws_table.add_row(
2681 | "flawed_process_text",
2682 | "Ambiguous Description",
2683 | "Description is vague and doesn't explain parameters."
2684 | )
2685 | flaws_table.add_row(
2686 | "flawed_scrape_website",
2687 | "Missing Parameter Descriptions",
2688 | "Parameters in schema have no descriptions."
2689 | )
2690 | flaws_table.add_row(
2691 | "flawed_data_processor",
2692 | "Confusing Schema & Description Mismatch",
2693 | "Description calls the tool 'analyzer' but name is 'processor'."
2694 | )
2695 | flaws_table.add_row(
2696 | "flawed_product_search",
2697 | "Misleading Examples",
2698 | "Example shows incorrect parameter name 'sort_by' vs schema 'sort'."
2699 | )
2700 | flaws_table.add_row(
2701 | "flawed_calculator",
2702 | "Schema/Implementation Conflict",
2703 | "Clear description but possible schema type confusion."
2704 | )
2705 |
2706 | console.print(flaws_table)
2707 |
2708 | # Select a flawed tool to demonstrate refinement
2709 | selected_tool = created_tools[0] # Start with the first one
2710 | console.print(f"\nSelected tool for demonstration: [cyan]{selected_tool}[/cyan]")
2711 |
2712 | # Show the original flawed tool definition
2713 | tool_list = await gateway.mcp.list_tools()
2714 | tool_def = next((t for t in tool_list if t.name == selected_tool), None)
2715 |
2716 | if tool_def and hasattr(tool_def, "inputSchema") and hasattr(tool_def, "description"):
2717 | input_schema = getattr(tool_def, "inputSchema", {})
2718 | description = getattr(tool_def, "description", "")
2719 |
2720 | console.print("[bold cyan]Original Flawed Tool Definition:[/bold cyan]")
2721 |
2722 | console.print(Panel(
2723 | escape(description),
2724 | title="Original Description",
2725 | border_style="dim red",
2726 | expand=False
2727 | ))
2728 |
2729 | console.print(Panel(
2730 | Syntax(json.dumps(input_schema, indent=2), "json", theme="default", line_numbers=False),
2731 | title="Original Schema",
2732 | border_style="dim red",
2733 | expand=False
2734 | ))
2735 |
2736 | # Run refinement on the flawed tool
2737 | console.print("\n[bold cyan]Running Refinement on Flawed Tool:[/bold cyan]")
2738 | detail_console.print(f"\n[bold]Starting refinement for flawed tool {selected_tool}...[/bold]")
2739 |
2740 | params = {
2741 | "tool_names": [selected_tool],
2742 | "max_iterations": 2,
2743 | "refinement_model_config": {
2744 | "provider": Provider.OPENAI.value,
2745 | "model": "gpt-4.1", # Use the best model for these challenging cases
2746 | "temperature": 0.2,
2747 | },
2748 | "validation_level": "full",
2749 | "enable_winnowing": True,
2750 | "progress_callback": display_refinement_progress,
2751 | }
2752 |
2753 | with Progress(
2754 | TextColumn("[bold blue]{task.description}"),
2755 | BarColumn(complete_style="green", finished_style="green"),
2756 | TaskProgressColumn(),
2757 | TimeElapsedColumn(),
2758 | console=console,
2759 | expand=True
2760 | ) as progress:
2761 | task_id = progress.add_task("[cyan]Refining flawed tool...", total=100)
2762 |
2763 | # Execute the refinement
2764 | start_time = time.time()
2765 | try:
2766 | result = await gateway.mcp.call_tool("refine_tool_documentation", params)
2767 |
2768 | # Simulate progress updates
2769 | elapsed = 0
2770 | while progress.tasks[task_id].completed < 100 and elapsed < 60:
2771 | progress.update(task_id, completed=min(95, elapsed * 1.5))
2772 | await asyncio.sleep(0.5)
2773 | elapsed = time.time() - start_time
2774 |
2775 | progress.update(task_id, completed=100)
2776 |
2777 | # Track cost if available
2778 | if isinstance(result, dict) and "total_refinement_cost" in result:
2779 | tracker.add_generic_cost(
2780 | cost=result.get("total_refinement_cost", 0.0),
2781 | description=f"Flawed tool refinement of {selected_tool}",
2782 | provider=Provider.OPENAI.value,
2783 | model="gpt-4.1"
2784 | )
2785 |
2786 | # Display the results
2787 | display_refinement_result(
2788 | result,
2789 | console=console,
2790 | visualization_level=SETTINGS["visualization_level"],
2791 | save_to_file=SETTINGS["save_results"],
2792 | output_dir=SETTINGS["output_dir"]
2793 | )
2794 |
2795 | # Highlight identified flaws
2796 | refined_tools = result.get("refined_tools", [])
2797 | target_tool_result = next((t for t in refined_tools if t.get("tool_name") == selected_tool), None)
2798 |
2799 | if target_tool_result:
2800 | identified_flaws = []
2801 | for iter_data in target_tool_result.get("iterations", []):
2802 | analysis = iter_data.get("analysis", {})
2803 | if analysis:
2804 | flaws = analysis.get("identified_flaw_categories", [])
2805 | for flaw in flaws:
2806 | if flaw not in identified_flaws:
2807 | identified_flaws.append(flaw)
2808 |
2809 | if identified_flaws:
2810 | console.print("\n[bold cyan]Identified Documentation Flaws:[/bold cyan]")
2811 | flaw_details = {
2812 | "MISSING_DESCRIPTION": "Documentation is missing key information",
2813 | "AMBIGUOUS_DESCRIPTION": "Description is unclear or can be interpreted in multiple ways",
2814 | "INCORRECT_DESCRIPTION": "Description contains incorrect information",
2815 | "MISSING_SCHEMA_CONSTRAINT": "Schema is missing important constraints",
2816 | "INCORRECT_SCHEMA_CONSTRAINT": "Schema contains incorrect constraints",
2817 | "OVERLY_RESTRICTIVE_SCHEMA": "Schema is unnecessarily restrictive",
2818 | "TYPE_CONFUSION": "Parameter types are inconsistent or unclear",
2819 | "MISSING_EXAMPLE": "Documentation lacks necessary examples",
2820 | "MISLEADING_EXAMPLE": "Examples provided are incorrect or misleading",
2821 | "INCOMPLETE_EXAMPLE": "Examples are present but insufficient",
2822 | "PARAMETER_DEPENDENCY_UNCLEAR": "Dependencies between parameters are not explained",
2823 | "CONFLICTING_CONSTRAINTS": "Schema contains contradictory constraints",
2824 | "AGENT_FORMULATION_ERROR": "Documentation hinders LLM agent's ability to use the tool",
2825 | "SCHEMA_PREVALIDATION_FAILURE": "Schema validation issues",
2826 | "TOOL_EXECUTION_ERROR": "Issues with tool execution",
2827 | "UNKNOWN": "Unspecified documentation issue"
2828 | }
2829 |
2830 | for flaw in identified_flaws:
2831 | console.print(f"- [bold yellow]{flaw}[/bold yellow]: {flaw_details.get(flaw, 'No description available')}")
2832 |
2833 | return result
2834 |
2835 | except Exception as e:
2836 | progress.update(task_id, completed=100, description="[bold red]Refinement failed!")
2837 | logger.error(f"Error during flawed tool refinement: {e}", emoji_key="error", exc_info=True)
2838 | console.print(f"[bold red]Error during flawed tool refinement:[/bold red] {escape(str(e))}")
2839 | return None
2840 |
2841 |
2842 | async def main():
2843 | """Main entry point for the demo."""
2844 | try:
2845 | print("Starting demo...")
2846 | logger.debug("Starting demo...")
2847 | args = parse_arguments()
2848 | print(f"Args parsed: {args}")
2849 | logger.debug(f"Args parsed: {args}")
2850 |
2851 | # Set up gateway
2852 | print("Setting up gateway...")
2853 | gateway = await setup_gateway_and_tools(create_flawed_tools=args.create_flawed) # noqa: F841
2854 | print("Gateway setup complete")
2855 |
2856 | # Initialize cost tracker
2857 | tracker = CostTracker(limit=SETTINGS["cost_limit"])
2858 |
2859 | # Check if the tool was successfully registered
2860 | print("Checking if tool is registered...")
2861 | tool_list = await mcp.list_tools()
2862 | available_tools = [t.name for t in tool_list]
2863 | print(f"Available tools: {available_tools}")
2864 |
2865 | if "refine_tool_documentation" in available_tools:
2866 | print("Tool is available, proceeding with demo")
2867 | logger.info("Tool successfully registered, proceeding with demo", emoji_key="success")
2868 |
2869 | # Run the selected demo based on CLI arguments
2870 | print(f"Running demo: {args.demo}")
2871 |
2872 | # Select a demo based on specified arguments
2873 | if args.demo == "single" or args.demo == "all":
2874 | print("Running single tool refinement demo")
2875 | result = await demo_single_tool_refinement(
2876 | gateway,
2877 | tracker,
2878 | target_tool=args.tool,
2879 | refinement_provider=args.provider,
2880 | refinement_model=args.model,
2881 | max_iterations=args.iterations
2882 | )
2883 | if result:
2884 | logger.success("Single tool refinement demo completed", emoji_key="success")
2885 |
2886 | elif args.demo == "multi":
2887 | print("Running multi-tool refinement demo")
2888 | result = await demo_multi_tool_refinement(
2889 | gateway,
2890 | tracker,
2891 | target_tools=[args.tool] if args.tool else None,
2892 | refinement_provider=args.provider,
2893 | refinement_model=args.model,
2894 | max_iterations=args.iterations
2895 | )
2896 | if result:
2897 | logger.success("Multi-tool refinement demo completed", emoji_key="success")
2898 |
2899 | elif args.demo == "custom-testing":
2900 | print("Running custom test generation demo")
2901 | result = await demo_custom_test_generation(
2902 | gateway,
2903 | tracker,
2904 | target_tool=args.tool,
2905 | refinement_provider=args.provider,
2906 | refinement_model=args.model,
2907 | max_iterations=args.iterations
2908 | )
2909 | if result:
2910 | logger.success("Custom test generation demo completed", emoji_key="success")
2911 |
2912 | elif args.demo == "optimize":
2913 | print("Running cost optimization demo")
2914 | result = await demo_cost_optimization(
2915 | gateway,
2916 | tracker,
2917 | target_tool=args.tool
2918 | )
2919 | if result:
2920 | logger.success("Cost optimization demo completed", emoji_key="success")
2921 |
2922 | elif args.demo == "all-tools":
2923 | print("Running all-tools refinement demo")
2924 | result = await demo_all_tools_refinement(
2925 | gateway,
2926 | tracker,
2927 | refinement_provider=args.provider,
2928 | refinement_model=args.model,
2929 | max_iterations=args.iterations
2930 | )
2931 | if result:
2932 | logger.success("All-tools refinement demo completed", emoji_key="success")
2933 |
2934 | elif args.demo == "schema-focus":
2935 | print("Running schema-focused refinement demo")
2936 | result = await demo_schema_focused_refinement(
2937 | gateway,
2938 | tracker,
2939 | target_tool=args.tool,
2940 | refinement_provider=args.provider,
2941 | refinement_model=args.model
2942 | )
2943 | if result:
2944 | logger.success("Schema-focused refinement demo completed", emoji_key="success")
2945 |
2946 | elif args.demo == "practical":
2947 | print("Running practical testing demo")
2948 | result = await demo_practical_testing(gateway, tracker)
2949 | if result:
2950 | logger.success("Practical testing demo completed", emoji_key="success")
2951 |
2952 | elif args.demo == "model-comparison":
2953 | print("Running model comparison demo")
2954 | result = await demo_model_comparison(
2955 | gateway,
2956 | tracker,
2957 | target_tool=args.tool
2958 | )
2959 | if result:
2960 | logger.success("Model comparison demo completed", emoji_key="success")
2961 |
2962 | elif args.demo == "all":
2963 | print("Running all demos")
2964 | console.print(Panel(
2965 | "Running all demos in sequence. This may take some time.",
2966 | title="ℹ️ Running All Demos",
2967 | border_style="cyan",
2968 | expand=False
2969 | ))
2970 |
2971 | # Run each demo in sequence
2972 | demos = [
2973 | demo_single_tool_refinement(gateway, tracker, target_tool=args.tool,
2974 | refinement_provider=args.provider,
2975 | refinement_model=args.model,
2976 | max_iterations=args.iterations),
2977 | demo_multi_tool_refinement(gateway, tracker,
2978 | refinement_provider=args.provider,
2979 | refinement_model=args.model,
2980 | max_iterations=args.iterations),
2981 | demo_custom_test_generation(gateway, tracker, target_tool=args.tool,
2982 | refinement_provider=args.provider,
2983 | refinement_model=args.model),
2984 | demo_cost_optimization(gateway, tracker, target_tool=args.tool),
2985 | demo_schema_focused_refinement(gateway, tracker, target_tool=args.tool,
2986 | refinement_provider=args.provider,
2987 | refinement_model=args.model),
2988 | demo_model_comparison(gateway, tracker, target_tool=args.tool)
2989 | ]
2990 |
2991 | if args.create_flawed:
2992 | demos.append(demo_practical_testing(gateway, tracker))
2993 |
2994 | for demo_coro in demos:
2995 | try:
2996 | await demo_coro
2997 | except Exception as e:
2998 | logger.error(f"Error running demo: {e}", emoji_key="error", exc_info=True)
2999 | console.print(f"[bold red]Error running demo:[/bold red] {escape(str(e))}")
3000 |
3001 | logger.success("All demos completed", emoji_key="success")
3002 |
3003 | else:
3004 | print("No valid demo specified")
3005 | console.print(Panel(
3006 | f"The specified demo '{args.demo}' is not recognized.\n"
3007 | "Available demos: all, single, multi, custom-testing, optimize, all-tools, schema-focus, practical, model-comparison",
3008 | title="⚠️ Invalid Demo Selection",
3009 | border_style="yellow",
3010 | expand=False
3011 | ))
3012 | else:
3013 | print("Tool is not available")
3014 | # Tool not available, show error message
3015 | console.print(Panel(
3016 | "This demo requires the docstring_refiner tool to be properly registered.\n"
3017 | "Due to known issues with Pydantic definitions, the tool can't be registered in this demo.\n\n"
3018 | "Check that you have the correct version of the Ultimate MCP Server and dependencies installed.",
3019 | title="⚠️ Demo Requirements Not Met",
3020 | border_style="red",
3021 | expand=False
3022 | ))
3023 |
3024 | # Display cost summary
3025 | console.print(Rule("[bold green]Total Demo Cost Summary[/bold green]", style="green"))
3026 | tracker.display_costs(console=console)
3027 |
3028 | logger.info("Docstring Refiner Demo completed successfully", emoji_key="success")
3029 | console.print(Rule("[bold green]Demo Complete[/bold green]", style="green"))
3030 | print("Demo completed successfully")
3031 |
3032 | except Exception as e:
3033 | print(f"Error in main: {type(e).__name__}: {str(e)}")
3034 | import traceback
3035 | traceback.print_exc()
3036 | return 1
3037 |
3038 | return 0
3039 |
3040 |
3041 | if __name__ == "__main__":
3042 | exit_code = asyncio.run(main())
3043 | sys.exit(exit_code)
```