This is page 16 of 45. Use http://codebase.md/dicklesworthstone/llm_gateway_mcp_server?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .cursorignore
├── .env.example
├── .envrc
├── .gitignore
├── additional_features.md
├── check_api_keys.py
├── completion_support.py
├── comprehensive_test.py
├── docker-compose.yml
├── Dockerfile
├── empirically_measured_model_speeds.json
├── error_handling.py
├── example_structured_tool.py
├── examples
│ ├── __init__.py
│ ├── advanced_agent_flows_using_unified_memory_system_demo.py
│ ├── advanced_extraction_demo.py
│ ├── advanced_unified_memory_system_demo.py
│ ├── advanced_vector_search_demo.py
│ ├── analytics_reporting_demo.py
│ ├── audio_transcription_demo.py
│ ├── basic_completion_demo.py
│ ├── cache_demo.py
│ ├── claude_integration_demo.py
│ ├── compare_synthesize_demo.py
│ ├── cost_optimization.py
│ ├── data
│ │ ├── sample_event.txt
│ │ ├── Steve_Jobs_Introducing_The_iPhone_compressed.md
│ │ └── Steve_Jobs_Introducing_The_iPhone_compressed.mp3
│ ├── docstring_refiner_demo.py
│ ├── document_conversion_and_processing_demo.py
│ ├── entity_relation_graph_demo.py
│ ├── filesystem_operations_demo.py
│ ├── grok_integration_demo.py
│ ├── local_text_tools_demo.py
│ ├── marqo_fused_search_demo.py
│ ├── measure_model_speeds.py
│ ├── meta_api_demo.py
│ ├── multi_provider_demo.py
│ ├── ollama_integration_demo.py
│ ├── prompt_templates_demo.py
│ ├── python_sandbox_demo.py
│ ├── rag_example.py
│ ├── research_workflow_demo.py
│ ├── sample
│ │ ├── article.txt
│ │ ├── backprop_paper.pdf
│ │ ├── buffett.pdf
│ │ ├── contract_link.txt
│ │ ├── legal_contract.txt
│ │ ├── medical_case.txt
│ │ ├── northwind.db
│ │ ├── research_paper.txt
│ │ ├── sample_data.json
│ │ └── text_classification_samples
│ │ ├── email_classification.txt
│ │ ├── news_samples.txt
│ │ ├── product_reviews.txt
│ │ └── support_tickets.txt
│ ├── sample_docs
│ │ └── downloaded
│ │ └── attention_is_all_you_need.pdf
│ ├── sentiment_analysis_demo.py
│ ├── simple_completion_demo.py
│ ├── single_shot_synthesis_demo.py
│ ├── smart_browser_demo.py
│ ├── sql_database_demo.py
│ ├── sse_client_demo.py
│ ├── test_code_extraction.py
│ ├── test_content_detection.py
│ ├── test_ollama.py
│ ├── text_classification_demo.py
│ ├── text_redline_demo.py
│ ├── tool_composition_examples.py
│ ├── tournament_code_demo.py
│ ├── tournament_text_demo.py
│ ├── unified_memory_system_demo.py
│ ├── vector_search_demo.py
│ ├── web_automation_instruction_packs.py
│ └── workflow_delegation_demo.py
├── LICENSE
├── list_models.py
├── marqo_index_config.json.example
├── mcp_protocol_schema_2025-03-25_version.json
├── mcp_python_lib_docs.md
├── mcp_tool_context_estimator.py
├── model_preferences.py
├── pyproject.toml
├── quick_test.py
├── README.md
├── resource_annotations.py
├── run_all_demo_scripts_and_check_for_errors.py
├── storage
│ └── smart_browser_internal
│ ├── locator_cache.db
│ ├── readability.js
│ └── storage_state.enc
├── test_client.py
├── test_connection.py
├── TEST_README.md
├── test_sse_client.py
├── test_stdio_client.py
├── tests
│ ├── __init__.py
│ ├── conftest.py
│ ├── integration
│ │ ├── __init__.py
│ │ └── test_server.py
│ ├── manual
│ │ ├── test_extraction_advanced.py
│ │ └── test_extraction.py
│ └── unit
│ ├── __init__.py
│ ├── test_cache.py
│ ├── test_providers.py
│ └── test_tools.py
├── TODO.md
├── tool_annotations.py
├── tools_list.json
├── ultimate_mcp_banner.webp
├── ultimate_mcp_logo.webp
├── ultimate_mcp_server
│ ├── __init__.py
│ ├── __main__.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── commands.py
│ │ ├── helpers.py
│ │ └── typer_cli.py
│ ├── clients
│ │ ├── __init__.py
│ │ ├── completion_client.py
│ │ └── rag_client.py
│ ├── config
│ │ └── examples
│ │ └── filesystem_config.yaml
│ ├── config.py
│ ├── constants.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── evaluation
│ │ │ ├── base.py
│ │ │ └── evaluators.py
│ │ ├── providers
│ │ │ ├── __init__.py
│ │ │ ├── anthropic.py
│ │ │ ├── base.py
│ │ │ ├── deepseek.py
│ │ │ ├── gemini.py
│ │ │ ├── grok.py
│ │ │ ├── ollama.py
│ │ │ ├── openai.py
│ │ │ └── openrouter.py
│ │ ├── server.py
│ │ ├── state_store.py
│ │ ├── tournaments
│ │ │ ├── manager.py
│ │ │ ├── tasks.py
│ │ │ └── utils.py
│ │ └── ums_api
│ │ ├── __init__.py
│ │ ├── ums_database.py
│ │ ├── ums_endpoints.py
│ │ ├── ums_models.py
│ │ └── ums_services.py
│ ├── exceptions.py
│ ├── graceful_shutdown.py
│ ├── services
│ │ ├── __init__.py
│ │ ├── analytics
│ │ │ ├── __init__.py
│ │ │ ├── metrics.py
│ │ │ └── reporting.py
│ │ ├── cache
│ │ │ ├── __init__.py
│ │ │ ├── cache_service.py
│ │ │ ├── persistence.py
│ │ │ ├── strategies.py
│ │ │ └── utils.py
│ │ ├── cache.py
│ │ ├── document.py
│ │ ├── knowledge_base
│ │ │ ├── __init__.py
│ │ │ ├── feedback.py
│ │ │ ├── manager.py
│ │ │ ├── rag_engine.py
│ │ │ ├── retriever.py
│ │ │ └── utils.py
│ │ ├── prompts
│ │ │ ├── __init__.py
│ │ │ ├── repository.py
│ │ │ └── templates.py
│ │ ├── prompts.py
│ │ └── vector
│ │ ├── __init__.py
│ │ ├── embeddings.py
│ │ └── vector_service.py
│ ├── tool_token_counter.py
│ ├── tools
│ │ ├── __init__.py
│ │ ├── audio_transcription.py
│ │ ├── base.py
│ │ ├── completion.py
│ │ ├── docstring_refiner.py
│ │ ├── document_conversion_and_processing.py
│ │ ├── enhanced-ums-lookbook.html
│ │ ├── entity_relation_graph.py
│ │ ├── excel_spreadsheet_automation.py
│ │ ├── extraction.py
│ │ ├── filesystem.py
│ │ ├── html_to_markdown.py
│ │ ├── local_text_tools.py
│ │ ├── marqo_fused_search.py
│ │ ├── meta_api_tool.py
│ │ ├── ocr_tools.py
│ │ ├── optimization.py
│ │ ├── provider.py
│ │ ├── pyodide_boot_template.html
│ │ ├── python_sandbox.py
│ │ ├── rag.py
│ │ ├── redline-compiled.css
│ │ ├── sentiment_analysis.py
│ │ ├── single_shot_synthesis.py
│ │ ├── smart_browser.py
│ │ ├── sql_databases.py
│ │ ├── text_classification.py
│ │ ├── text_redline_tools.py
│ │ ├── tournament.py
│ │ ├── ums_explorer.html
│ │ └── unified_memory_system.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── async_utils.py
│ │ ├── display.py
│ │ ├── logging
│ │ │ ├── __init__.py
│ │ │ ├── console.py
│ │ │ ├── emojis.py
│ │ │ ├── formatter.py
│ │ │ ├── logger.py
│ │ │ ├── panels.py
│ │ │ ├── progress.py
│ │ │ └── themes.py
│ │ ├── parse_yaml.py
│ │ ├── parsing.py
│ │ ├── security.py
│ │ └── text.py
│ └── working_memory_api.py
├── unified_memory_system_technical_analysis.md
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/ultimate_mcp_server/tools/marqo_fused_search.py:
--------------------------------------------------------------------------------
```python
1 | """MCP tool for flexible searching of Marqo indices."""
2 |
3 | import json
4 | import os
5 | import time
6 | from datetime import datetime
7 | from typing import Any, Dict, List, Optional
8 |
9 | import httpx # Add import for httpx
10 | import marqo
11 | from pydantic import BaseModel, Field, field_validator
12 |
13 | from ultimate_mcp_server.clients import CompletionClient
14 | from ultimate_mcp_server.constants import Provider
15 | from ultimate_mcp_server.exceptions import ToolExecutionError, ToolInputError
16 | from ultimate_mcp_server.tools.base import with_error_handling, with_tool_metrics
17 | from ultimate_mcp_server.utils import get_logger
18 |
19 | logger = get_logger("ultimate_mcp_server.tools.marqo_fused_search")
20 |
21 | # --- Configuration Loading ---
22 |
23 | CONFIG_FILE_PATH = os.path.join(os.path.dirname(__file__), "..", "..", "marqo_index_config.json")
24 |
25 | def load_marqo_config() -> Dict[str, Any]:
26 | """Loads Marqo configuration from the JSON file.
27 |
28 | Returns:
29 | A dictionary containing the loaded configuration.
30 |
31 | Raises:
32 | ToolExecutionError: If the config file cannot be found, parsed, or is invalid.
33 | """
34 | try:
35 | with open(CONFIG_FILE_PATH, 'r') as f:
36 | config = json.load(f)
37 | logger.info(f"Loaded Marqo config from {CONFIG_FILE_PATH}")
38 | return config
39 | except FileNotFoundError:
40 | logger.warning(f"Marqo config file not found at {CONFIG_FILE_PATH}. Using minimal hardcoded defaults.")
41 | # Fallback to minimal, generic defaults if file not found
42 | return {
43 | "default_marqo_url": "http://localhost:8882",
44 | "default_index_name": "my_marqo_index", # Generic name
45 | "default_schema": { # Minimal fallback schema
46 | "fields": {
47 | # Define only essential fields or placeholders if file is missing
48 | "content": {"type": "text", "role": "content"},
49 | "embedding": {"type": "tensor", "role": "tensor_vector"},
50 | "_id": {"type": "keyword", "role": "internal"}
51 | },
52 | "tensor_field": "embedding",
53 | "default_content_field": "content",
54 | "default_date_field": None # No default date field assumed
55 | }
56 | }
57 | except json.JSONDecodeError as e:
58 | logger.error(f"Error decoding Marqo config file {CONFIG_FILE_PATH}: {e}", exc_info=True)
59 | raise ToolExecutionError(f"Failed to load or parse Marqo config file: {CONFIG_FILE_PATH}") from e
60 | except Exception as e:
61 | logger.error(f"Unexpected error loading Marqo config: {e}", exc_info=True)
62 | raise ToolExecutionError("Unexpected error loading Marqo config") from e
63 |
64 | MARQO_CONFIG = load_marqo_config()
65 | DEFAULT_MARQO_URL = MARQO_CONFIG.get("default_marqo_url", "http://localhost:8882")
66 | DEFAULT_INDEX_NAME = MARQO_CONFIG.get("default_index_name", "my_marqo_index") # Use generic default
67 | DEFAULT_INDEX_SCHEMA = MARQO_CONFIG.get("default_schema", {})
68 |
69 | # --- Define cache file path relative to config file ---
70 | CACHE_FILE_DIR = os.path.dirname(CONFIG_FILE_PATH)
71 | CACHE_FILE_PATH = os.path.join(CACHE_FILE_DIR, "marqo_docstring_cache.json")
72 |
73 | # --- LLM Client for Doc Generation ---
74 |
75 | async def _call_llm_for_doc_generation(prompt: str) -> Optional[str]:
76 | """
77 | Calls an LLM to generate dynamic documentation for the Marqo search tool.
78 |
79 | This function uses the Ultimate MCP Server's CompletionClient to send a prompt to an
80 | LLM (preferring Gemini) and retrieve generated content for enhancing the tool's documentation.
81 | It handles the entire process including client initialization, LLM API call configuration,
82 | and error handling.
83 |
84 | The function is designed with low temperature settings for predictable, factual outputs
85 | and enables caching to avoid redundant LLM calls for the same configuration.
86 |
87 | Args:
88 | prompt: A detailed prompt string containing information about the Marqo index
89 | configuration and instructions for generating appropriate documentation.
90 |
91 | Returns:
92 | str: The generated documentation text if successful.
93 | None: If the LLM call fails or returns empty content.
94 |
95 | Notes:
96 | - Uses the Gemini provider by default, but will fall back to other providers if needed.
97 | - Sets temperature to 0.3 for consistent, deterministic outputs.
98 | - Limits output to 400 tokens, which is sufficient for documentation purposes.
99 | - Enables caching to improve performance for repeated calls.
100 | """
101 | try:
102 | # Instantiate the client - assumes necessary env vars/config are set for the gateway
103 | client = CompletionClient()
104 |
105 | logger.info("Calling LLM to generate dynamic docstring augmentation...")
106 | # Use generate_completion (can also use try_providers if preferred)
107 | result = await client.generate_completion(
108 | prompt=prompt,
109 | provider=Provider.GEMINI.value, # Prioritize Gemini, adjust if needed
110 | temperature=0.3, # Lower temperature for more factual/consistent doc generation
111 | max_tokens=400, # Allow sufficient length for the documentation section
112 | use_cache=True # Cache the generated doc string for a given config
113 | )
114 |
115 | # --- FIX: Check for successful result (no exception) and non-empty text ---
116 | # if result.error is None and result.text:
117 | if result and result.text: # Exception handled by the outer try/except
118 | logger.success(f"Successfully generated doc augmentation via {result.provider}. Length: {len(result.text)}")
119 | return result.text.strip()
120 | else:
121 | # This case might be less likely if exceptions are used for errors,
122 | # but handles cases where generation succeeds but returns empty text or None result unexpectedly.
123 | provider_name = result.provider if result else "Unknown"
124 | logger.error(f"LLM call for doc generation succeeded but returned no text. Provider: {provider_name}")
125 | return None
126 |
127 | except Exception as e:
128 | logger.error(f"Error during LLM call for doc generation: {e}", exc_info=True)
129 | return None
130 |
131 |
132 | # --- Docstring Augmentation Generation ---
133 |
134 | async def _generate_docstring_augmentation_from_config(config: Dict[str, Any]) -> str:
135 | """Generates dynamic documentation augmentation by calling an LLM with the config."""
136 | augmentation = ""
137 | try:
138 | index_name = config.get("default_index_name", "(Not specified)")
139 | schema = config.get("default_schema", {})
140 | schema_fields = schema.get("fields", {})
141 | date_field = schema.get("default_date_field")
142 |
143 | # Basic check: Don't call LLM for clearly minimal/default schemas
144 | if index_name == "my_marqo_index" and len(schema_fields) <= 3: # Heuristic
145 | logger.info("Skipping LLM doc generation for minimal default config.")
146 | return ""
147 |
148 | # Format schema fields for the prompt
149 | formatted_schema = []
150 | for name, props in schema_fields.items():
151 | details = [f"type: {props.get('type')}"]
152 | if props.get("role"):
153 | details.append(f"role: {props.get('role')}")
154 | if props.get("filterable"):
155 | details.append("filterable")
156 | if props.get("sortable"):
157 | details.append("sortable")
158 | if props.get("searchable"):
159 | details.append(f"searchable: {props.get('searchable')}")
160 | formatted_schema.append(f" - {name}: ({', '.join(details)})")
161 | schema_str = "\n".join(formatted_schema)
162 |
163 | # Construct the prompt for the LLM
164 | prompt = f"""
165 | Analyze the following Marqo index configuration and generate a concise markdown documentation section for a search tool using this index. Your goal is to help a user understand what kind of data they can search and how to use the tool effectively.
166 |
167 | Instructions:
168 | 1. **Infer Domain:** Based *only* on the index name and field names/types/roles, what is the likely subject matter or domain of the documents in this index (e.g., financial reports, product catalogs, medical articles, general documents)? State this clearly.
169 | 2. **Purpose:** Briefly explain the primary purpose of using a search tool with this index.
170 | 3. **Keywords:** Suggest 3-5 relevant keywords a user might include in their search queries for this specific index.
171 | 4. **Example Queries:** Provide 1-2 diverse example queries demonstrating typical use cases.
172 | 5. **Filtering:** Explain how the 'filters' parameter can be used, mentioning 1-2 specific filterable fields from the schema with examples (e.g., `filters={{"field_name": "value"}}`).
173 | 6. **Date Filtering:** If a default date field is specified (`{date_field}`), mention that the `date_range` parameter can be used with it.
174 | 7. **Format:** Output *only* the generated markdown section, starting with `---` and `**Configuration-Specific Notes:**`.
175 |
176 | Configuration Details:
177 | ----------------------
178 | Index Name: {index_name}
179 | Default Date Field: {date_field or 'Not specified'}
180 |
181 | Schema Fields:
182 | {schema_str}
183 | ----------------------
184 |
185 | Generated Documentation Section (Markdown):
186 | """
187 |
188 | logger.info(f"Attempting to generate docstring augmentation via LLM for index: {index_name}")
189 | logger.debug(f"LLM Prompt for doc generation:\n{prompt}")
190 |
191 | # Call the LLM
192 | generated_text = await _call_llm_for_doc_generation(prompt)
193 |
194 | if generated_text:
195 | augmentation = "\n\n" + generated_text # Add separators
196 | else:
197 | logger.warning("LLM call failed or returned no content. No dynamic augmentation added.")
198 |
199 | except Exception as e:
200 | logger.error(f"Error preparing data or prompt for LLM doc generation: {e}", exc_info=True)
201 |
202 | return augmentation
203 |
204 |
205 | # --- Health Check ---
206 |
207 | async def check_marqo_availability(url: str, timeout_seconds: int = 5) -> bool:
208 | """Checks if the Marqo instance is available and responding via HTTP GET using httpx."""
209 | logger.info(f"Checking Marqo availability at {url} using httpx...")
210 | try:
211 | async with httpx.AsyncClient() as client:
212 | response = await client.get(url, timeout=timeout_seconds)
213 | response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
214 |
215 | # Optionally, check the content if needed
216 | try:
217 | data = response.json()
218 | if isinstance(data, dict) and "message" in data and "Welcome to Marqo" in data["message"]:
219 | logger.success(f"Marqo instance at {url} is available and responded successfully (v{data.get('version', 'unknown')}).")
220 | return True
221 | else:
222 | logger.warning(f"Marqo instance at {url} responded, but content was unexpected: {data}")
223 | return True # Assuming reachability is sufficient
224 | except json.JSONDecodeError: # httpx raises json.JSONDecodeError
225 | logger.warning(f"Marqo instance at {url} responded, but response was not valid JSON.")
226 | return True # Assuming reachability is sufficient
227 |
228 | except httpx.TimeoutException:
229 | logger.error(f"Marqo check timed out after {timeout_seconds} seconds for URL: {url}")
230 | return False
231 | except httpx.RequestError as e:
232 | # Catches connection errors, HTTP errors (if raise_for_status is not used/fails), etc.
233 | logger.error(f"Marqo instance at {url} is unavailable or check failed: {e}")
234 | return False
235 | except httpx.HTTPStatusError as e:
236 | # Explicitly catch status errors after raise_for_status()
237 | logger.error(f"Marqo instance at {url} check failed with HTTP status {e.response.status_code}: {e}")
238 | return False
239 | except Exception as e:
240 | # Catch any other unexpected errors
241 | logger.error(f"Unexpected error during Marqo health check for {url}: {e}", exc_info=True)
242 | return False
243 |
244 | # --- Pydantic Models ---
245 |
246 | class DateRange(BaseModel):
247 | """Date range for filtering."""
248 | start_date: Optional[datetime] = Field(None, description="Start date (inclusive).")
249 | end_date: Optional[datetime] = Field(None, description="End date (inclusive).")
250 |
251 | @field_validator('end_date')
252 | @classmethod
253 | def end_date_must_be_after_start_date(cls, v, info):
254 | """Validate that end_date is after start_date if both are provided"""
255 | if v and info.data and 'start_date' in info.data and info.data['start_date'] and v < info.data['start_date']:
256 | raise ValueError('end_date must be after start_date')
257 | return v
258 |
259 | class MarqoSearchResult(BaseModel):
260 | """Individual search result from Marqo."""
261 | content: Optional[str] = Field(None, description="Main document content snippet from the hit, based on the schema's 'default_content_field'.")
262 | score: float = Field(..., description="Relevance score assigned by Marqo.")
263 | highlights: Optional[List[Dict[str, Any]]] = Field(None, description="List of highlighted text snippets matching the query, if requested.")
264 | metadata: Dict[str, Any] = Field(default_factory=dict, description="Dictionary of metadata fields associated with the hit document.")
265 | # We won't include 'filing' here as that was specific to the smartedgar example's DB lookup
266 |
267 | class MarqoSearchResponse(BaseModel):
268 | """Standardized response structure for Marqo search results."""
269 | results: List[MarqoSearchResult] = Field(default_factory=list, description="List of search result documents.")
270 | total_hits: int = Field(0, description="Total number of matching documents found by Marqo before applying limit/offset.")
271 | limit: int = Field(0, description="The maximum number of results requested.")
272 | offset: int = Field(0, description="The starting offset used for pagination.")
273 | processing_time_ms: int = Field(0, description="Time taken by Marqo to process the search query, in milliseconds.")
274 | query: str = Field("", description="The original query string submitted.")
275 | error: Optional[str] = Field(None, description="Error message if the search operation failed.")
276 | success: bool = Field(True, description="Indicates whether the search operation was successful.")
277 |
278 | # --- Helper Functions ---
279 |
280 | def _quote_marqo_value(value: Any) -> str:
281 | """Formats a Python value into a string suitable for a Marqo filter query.
282 |
283 | Handles strings (quoting if necessary), booleans, numbers, and datetimes (converting to timestamp).
284 |
285 | Args:
286 | value: The value to format.
287 |
288 | Returns:
289 | A string representation of the value formatted for Marqo filtering.
290 | """
291 | if isinstance(value, str):
292 | # Escape backticks and colons within the string if needed, though Marqo is generally tolerant
293 | # If the string contains spaces or special characters Marqo might use for syntax, quote it.
294 | # Marqo's syntax is flexible, but explicit quotes can help. Using simple quotes here.
295 | if ' ' in value or ':' in value or '`' in value or '(' in value or ')' in value:
296 | # Basic escaping of quotes within the string
297 | escaped_value = value.replace("'", "\\'")
298 | return f"'{escaped_value}'"
299 | return value # No quotes needed for simple strings
300 | elif isinstance(value, bool):
301 | return str(value).lower()
302 | elif isinstance(value, (int, float)):
303 | return str(value)
304 | elif isinstance(value, datetime):
305 | # Convert datetime to timestamp for filtering
306 | return str(int(value.timestamp()))
307 | else:
308 | # Default to string representation, quoted
309 | escaped_value = str(value).replace("'", "\\'")
310 | return f"'{escaped_value}'"
311 |
312 | def _build_marqo_filter_string(
313 | filters: Optional[Dict[str, Any]] = None,
314 | date_range: Optional[DateRange] = None,
315 | schema: Dict[str, Any] = DEFAULT_INDEX_SCHEMA
316 | ) -> Optional[str]:
317 | """Builds a Marqo filter string from various filter components based on the schema.
318 |
319 | Constructs a filter string compatible with Marqo's filtering syntax, handling
320 | date ranges and dictionary-based filters with validation against the provided schema.
321 | Ensures that only fields marked as 'filterable' in the schema are used.
322 |
323 | Args:
324 | filters: Dictionary where keys are schema field names and values are filter criteria
325 | (single value or list for OR conditions).
326 | date_range: Optional DateRange object for time-based filtering.
327 | schema: The index schema dictionary used to validate filter fields and types.
328 |
329 | Returns:
330 | A Marqo-compatible filter string, or None if no valid filters are applicable.
331 | """
332 | filter_parts = []
333 | schema_fields = schema.get("fields", {})
334 | date_field_name = schema.get("default_date_field")
335 |
336 | # Add date range filter using the schema-defined date field
337 | if date_range and date_field_name and date_field_name in schema_fields:
338 | if schema_fields[date_field_name].get("type") == "timestamp":
339 | if date_range.start_date:
340 | start_ts = int(date_range.start_date.timestamp())
341 | filter_parts.append(f"{date_field_name}:[{start_ts} TO *]")
342 | if date_range.end_date:
343 | end_ts = int(date_range.end_date.timestamp())
344 | # Ensure the range includes the end date timestamp
345 | if date_range.start_date:
346 | # Modify the start range part to include the end
347 | filter_parts[-1] = f"{date_field_name}:[{start_ts} TO {end_ts}]"
348 | else:
349 | filter_parts.append(f"{date_field_name}:[* TO {end_ts}]")
350 | else:
351 | logger.warning(f"Date range filtering requested, but schema field '{date_field_name}' is not type 'timestamp'. Skipping.")
352 |
353 | # Add other filters
354 | if filters:
355 | for key, value in filters.items():
356 | if value is None:
357 | continue
358 |
359 | # Validate field exists in schema and is filterable
360 | if key not in schema_fields:
361 | logger.warning(f"Filter key '{key}' not found in index schema. Skipping.")
362 | continue
363 | if not schema_fields[key].get("filterable", False):
364 | logger.warning(f"Filter key '{key}' is not marked as filterable in schema. Skipping.")
365 | continue
366 |
367 | # Handle list values (OR condition within the key)
368 | if isinstance(value, list):
369 | if value: # Only if list is not empty
370 | # Quote each value appropriately
371 | quoted_values = [_quote_marqo_value(v) for v in value]
372 | # Create OR parts like field:(val1 OR val2 OR val3)
373 | or_condition = " OR ".join(quoted_values)
374 | filter_parts.append(f"{key}:({or_condition})")
375 | else:
376 | # Simple equality: field:value
377 | filter_parts.append(f"{key}:{_quote_marqo_value(value)}")
378 |
379 | return " AND ".join(filter_parts) if filter_parts else None
380 |
381 |
382 | # --- Main Tool Function ---
383 |
384 | @with_tool_metrics
385 | @with_error_handling
386 | async def marqo_fused_search(
387 | query: str,
388 | marqo_url: Optional[str] = None,
389 | index_name: Optional[str] = None,
390 | index_schema: Optional[Dict[str, Any]] = None,
391 | filters: Optional[Dict[str, Any]] = None,
392 | date_range: Optional[DateRange] = None,
393 | semantic_weight: float = 0.7,
394 | limit: int = 10,
395 | offset: int = 0,
396 | highlighting: bool = True,
397 | rerank: bool = True,
398 | searchable_attributes: Optional[List[str]] = None,
399 | hybrid_search_attributes: Optional[Dict[str, List[str]]] = None, # e.g., {"tensor": ["field1"], "lexical": ["field2"]}
400 | client_type: str = "human" # Could influence which 'file_type' is filtered if schema includes it
401 | ) -> Dict[str, Any]:
402 | """Performs a hybrid semantic and keyword search on a configured Marqo index.
403 |
404 | This tool automatically combines **lexical (keyword)** and **semantic (meaning-based)** search capabilities.
405 | You can provide simple, direct search terms. For specific phrases or keywords, the tool's lexical
406 | search component will find exact matches. For broader concepts or related ideas, the semantic
407 | search component will find relevant results based on meaning, even if the exact words aren't present.
408 | **Therefore, you generally do not need to formulate overly complex queries with many variations;
409 | trust the hybrid search to find relevant matches.**
410 |
411 | This tool allows searching a Marqo index with flexible filtering based on a
412 | provided or default index schema. It supports hybrid search, date ranges,
413 | metadata filtering, sorting, and faceting.
414 |
415 | The connection details (URL, index name) and index structure (schema) default to
416 | values loaded from `marqo_index_config.json` but can be overridden via parameters.
417 |
418 | Args:
419 | query: The search query string.
420 | marqo_url: (Optional) URL of the Marqo instance. Overrides the default from config.
421 | index_name: (Optional) Name of the Marqo index to search. Overrides the default from config.
422 | index_schema: (Optional) A dictionary describing the Marqo index structure (fields, types, roles).
423 | Overrides the default from config. The schema dictates how filters,
424 | sorting, and searchable attributes are interpreted.
425 | Example Schema Structure:
426 | `{
427 | "fields": {
428 | "title": {"type": "text", "role": "metadata", "searchable": "lexical"},
429 | "body": {"type": "text", "role": "content"},
430 | "embedding": {"type": "tensor", "role": "tensor_vector"},
431 | "category": {"type": "keyword", "role": "metadata", "filterable": True},
432 | "created_at": {"type": "timestamp", "role": "date", "filterable": True, "sortable": True}
433 | },
434 | "tensor_field": "embedding",
435 | "default_content_field": "body",
436 | "default_date_field": "created_at"
437 | }`
438 | filters: (Optional) Dictionary of filters. Keys must be field names in the `index_schema` marked
439 | as `"filterable": True`. Values can be single values (e.g., `"category": "news"`) or lists for
440 | OR conditions (e.g., `"year": [2023, 2024]`).
441 | date_range: (Optional) Date range object with `start_date` and/or `end_date`. Applied to the field
442 | specified by `default_date_field` in the schema.
443 | **To filter by time, first inspect the available fields in the `index_schema`
444 | (or the 'Configuration-Specific Notes' section below if available) to find the appropriate
445 | date/timestamp field, then use this parameter.**
446 | semantic_weight: (Optional) Weight for semantic vs. lexical search in hybrid mode (0.0 to 1.0).
447 | 0.0 = pure lexical, 1.0 = pure semantic. Requires both tensor and lexical
448 | fields defined in schema or `hybrid_search_attributes`. Default 0.7.
449 | limit: (Optional) Maximum number of results. Default 10.
450 | offset: (Optional) Starting offset for pagination. Default 0.
451 | highlighting: (Optional) Request highlights from Marqo. Default True.
452 | rerank: (Optional) Enable Marqo's reranking (if supported by the chosen search method/version).
453 | Default True.
454 | searchable_attributes: (Optional) Explicitly provide a list of schema field names to search for
455 | TENSOR or LEXICAL search modes. Overrides auto-detection from schema.
456 | hybrid_search_attributes: (Optional) Explicitly provide fields for HYBRID search.
457 | Example: `{"tensor": ["embedding"], "lexical": ["title", "body"]}`.
458 | Overrides auto-detection from schema.
459 | client_type: (Optional) Identifier ('human', 'ai'). Can potentially be used with filters if the
460 | schema includes a field like `file_type`. Default 'human'.
461 |
462 | Returns:
463 | A dictionary conforming to `MarqoSearchResponse`, containing:
464 | - `results`: List of `MarqoSearchResult` objects.
465 | - `total_hits`: Estimated total number of matching documents.
466 | - `limit`, `offset`, `processing_time_ms`, `query`: Search metadata.
467 | - `error`: Error message string if the search failed.
468 | - `success`: Boolean indicating success or failure.
469 | Example Successful Return:
470 | `{
471 | "results": [
472 | {
473 | "content": "Example document content...",
474 | "score": 0.85,
475 | "highlights": [{"matched_text": "content snippet..."}],
476 | "metadata": {"category": "news", "created_at": 1678886400, ...}
477 | }
478 | ],
479 | "total_hits": 150,
480 | "limit": 10,
481 | "offset": 0,
482 | "processing_time_ms": 55,
483 | "query": "search query text",
484 | "error": null,
485 | "success": true
486 | }`
487 |
488 | Raises:
489 | ToolInputError: For invalid parameters like negative limit, bad weight, unknown filter/sort fields,
490 | or incompatible schema/parameter combinations.
491 | ToolExecutionError: If connection to Marqo fails or the search query itself fails execution on Marqo.
492 | """
493 | start_time_perf = time.perf_counter()
494 |
495 | # --- Use loaded config defaults or provided overrides ---
496 | final_marqo_url = marqo_url or DEFAULT_MARQO_URL
497 | final_index_name = index_name or DEFAULT_INDEX_NAME
498 | final_index_schema = index_schema or DEFAULT_INDEX_SCHEMA
499 |
500 | # --- Input Validation ---
501 | if not query:
502 | raise ToolInputError("Query cannot be empty.")
503 | if not 0.0 <= semantic_weight <= 1.0:
504 | raise ToolInputError("semantic_weight must be between 0.0 and 1.0.")
505 | if limit <= 0:
506 | raise ToolInputError("Limit must be positive.")
507 | if offset < 0:
508 | raise ToolInputError("Offset cannot be negative.")
509 |
510 | # Validate schema basics
511 | if not isinstance(final_index_schema, dict) or "fields" not in final_index_schema:
512 | raise ToolInputError("Invalid index_schema format. Must be a dict with a 'fields' key.")
513 | schema_fields = final_index_schema.get("fields", {})
514 |
515 | # Validate searchable_attributes if provided
516 | if searchable_attributes:
517 | for field in searchable_attributes:
518 | if field not in schema_fields:
519 | raise ToolInputError(f"Searchable attribute '{field}' not found in index schema.")
520 |
521 | # Validate hybrid_search_attributes if provided
522 | if hybrid_search_attributes:
523 | for role in ["tensor", "lexical"]:
524 | if role in hybrid_search_attributes:
525 | for field in hybrid_search_attributes[role]:
526 | if field not in schema_fields:
527 | raise ToolInputError(f"Hybrid searchable attribute '{field}' (role: {role}) not found in index schema.")
528 |
529 | # --- Prepare Marqo Request ---
530 | try:
531 | mq = marqo.Client(url=final_marqo_url)
532 | marqo_index = mq.index(final_index_name)
533 | except Exception as e:
534 | raise ToolExecutionError(f"Failed to connect to Marqo at {final_marqo_url}: {e}", cause=e) from e
535 |
536 | # Build filter string dynamically using the final schema
537 | filter_str = _build_marqo_filter_string(filters, date_range, final_index_schema)
538 |
539 | # Determine search method and parameters
540 | search_method = "TENSOR" # Default to semantic
541 | marqo_search_params: Dict[str, Any] = {
542 | "q": query,
543 | "limit": limit,
544 | "offset": offset,
545 | "show_highlights": highlighting,
546 | # Rerank parameter might vary based on Marqo version and method
547 | # "re_ranker": "ms-marco-MiniLM-L-12-v2" if rerank else None
548 | }
549 |
550 | # Add filter string if generated
551 | if filter_str:
552 | marqo_search_params["filter_string"] = filter_str
553 |
554 | # Determine searchable attributes if not explicitly provided
555 | final_searchable_attributes = searchable_attributes
556 | final_hybrid_attributes = hybrid_search_attributes
557 |
558 | if not final_searchable_attributes and not final_hybrid_attributes:
559 | # Auto-detect attributes based on schema roles and types if not explicitly provided.
560 | logger.debug("Attempting to auto-detect searchable attributes from schema...")
561 |
562 | # 1. Identify potential tensor fields
563 | auto_tensor_fields = [name for name, props in schema_fields.items() if props.get("role") == "tensor_vector" or props.get("type") == "tensor"]
564 | if not auto_tensor_fields:
565 | # Fallback: use the explicitly named top-level tensor field if roles aren't set
566 | tensor_field_name = final_index_schema.get("tensor_field")
567 | if tensor_field_name and tensor_field_name in schema_fields:
568 | logger.debug(f"Using schema-defined tensor_field: {tensor_field_name}")
569 | auto_tensor_fields = [tensor_field_name]
570 | else:
571 | logger.debug("No tensor fields identified via role or top-level schema key.")
572 |
573 | # 2. Identify potential lexical fields
574 | auto_lexical_fields = [name for name, props in schema_fields.items() if props.get("searchable") == "lexical"]
575 | logger.debug(f"Auto-detected lexical fields: {auto_lexical_fields}")
576 |
577 | # 3. Decide configuration based on detected fields
578 | if auto_tensor_fields and auto_lexical_fields:
579 | # Both tensor and lexical fields found -> configure for HYBRID
580 | final_hybrid_attributes = {"tensor": auto_tensor_fields, "lexical": auto_lexical_fields}
581 | logger.debug(f"Configuring for HYBRID search with attributes: {final_hybrid_attributes}")
582 | elif auto_tensor_fields:
583 | # Only tensor fields found -> configure for TENSOR
584 | final_searchable_attributes = auto_tensor_fields
585 | logger.debug(f"Configuring for TENSOR search with attributes: {final_searchable_attributes}")
586 | elif auto_lexical_fields:
587 | # Only lexical fields found -> configure for LEXICAL
588 | final_searchable_attributes = auto_lexical_fields
589 | logger.debug(f"Configuring for LEXICAL search with attributes: {final_searchable_attributes}")
590 | else:
591 | # Last resort: No specific searchable fields identified.
592 | # Default to searching the schema's 'default_content_field' lexically.
593 | default_content = final_index_schema.get("default_content_field")
594 | if default_content and default_content in schema_fields:
595 | final_searchable_attributes = [default_content]
596 | logger.warning(f"No tensor or lexical fields marked in schema/params. Defaulting to LEXICAL search on field: '{default_content}'")
597 | else:
598 | # Critical fallback failure - cannot determine what to search.
599 | raise ToolInputError("Could not determine searchable attributes from schema. Ensure schema defines roles/searchable flags, or provide explicit attributes.")
600 |
601 |
602 | # Configure Hybrid Search based on semantic_weight and determined attributes
603 | if final_hybrid_attributes and 0.0 < semantic_weight < 1.0:
604 | search_method = "HYBRID"
605 | marqo_search_params["search_method"] = search_method
606 | marqo_search_params["hybrid_parameters"] = {
607 | "alpha": semantic_weight,
608 | "searchableAttributesTensor": final_hybrid_attributes.get("tensor", []),
609 | "searchableAttributesLexical": final_hybrid_attributes.get("lexical", []),
610 | # Add other hybrid params like retrievalMethod, rankingMethod if needed/supported
611 | "retrievalMethod": "disjunction", # Example
612 | "rankingMethod": "rrf", # Example
613 | }
614 | elif semantic_weight == 0.0:
615 | search_method = "LEXICAL"
616 | marqo_search_params["search_method"] = search_method
617 | # Ensure we use lexical attributes if hybrid wasn't configured
618 | if final_searchable_attributes:
619 | marqo_search_params["searchable_attributes"] = final_searchable_attributes
620 | elif final_hybrid_attributes and "lexical" in final_hybrid_attributes:
621 | marqo_search_params["searchable_attributes"] = final_hybrid_attributes["lexical"]
622 | else:
623 | raise ToolInputError("Lexical search selected (weight=0.0) but no lexical fields defined or provided.")
624 |
625 | else: # semantic_weight == 1.0 or hybrid attributes not set for hybrid
626 | search_method = "TENSOR"
627 | marqo_search_params["search_method"] = search_method
628 | # Ensure we use tensor attributes
629 | if final_searchable_attributes:
630 | marqo_search_params["searchable_attributes"] = final_searchable_attributes
631 | elif final_hybrid_attributes and "tensor" in final_hybrid_attributes:
632 | marqo_search_params["searchable_attributes"] = final_hybrid_attributes["tensor"]
633 | else:
634 | # Try the schema's main tensor field
635 | main_tensor_field = final_index_schema.get("tensor_field")
636 | if main_tensor_field:
637 | marqo_search_params["searchable_attributes"] = [main_tensor_field]
638 | else:
639 | raise ToolInputError("Tensor search selected (weight=1.0) but no tensor fields defined or provided.")
640 |
641 | # --- Execute Search ---
642 | logger.info(f"Executing Marqo search on index '{final_index_name}' with method '{search_method}'")
643 | logger.debug(f"Marqo search parameters: {marqo_search_params}")
644 |
645 | try:
646 | response = marqo_index.search(**marqo_search_params)
647 | logger.debug("Marqo response received.")
648 | except Exception as e:
649 | logger.error(f"Marqo search failed: {e}", exc_info=True)
650 | raise ToolExecutionError(f"Marqo search failed on index '{final_index_name}': {str(e)}", cause=e) from e
651 |
652 | # --- Process Response ---
653 | results_list = []
654 | default_content_field = final_index_schema.get("default_content_field", "content") # Fallback
655 |
656 | for hit in response.get("hits", []):
657 | metadata = {k: v for k, v in hit.items() if k not in ["_score", "_highlights", "_id"] and not k.startswith("__vector")}
658 | # Try to extract content from the default field, otherwise None
659 | content_value = hit.get(default_content_field)
660 |
661 | results_list.append(
662 | MarqoSearchResult(
663 | content=str(content_value) if content_value is not None else None,
664 | score=hit.get("_score", 0.0),
665 | highlights=hit.get("_highlights"),
666 | metadata=metadata,
667 | )
668 | )
669 |
670 | processing_time_ms = int(response.get("processingTimeMs", (time.perf_counter() - start_time_perf) * 1000))
671 |
672 | final_response = MarqoSearchResponse(
673 | results=results_list,
674 | total_hits=response.get("nbHits", 0), # Or use 'estimatedTotalHits' if available/preferred
675 | limit=response.get("limit", limit),
676 | offset=response.get("offset", offset),
677 | processing_time_ms=processing_time_ms,
678 | query=response.get("query", query),
679 | error=None,
680 | success=True
681 | )
682 |
683 | return final_response.dict()
684 |
685 |
686 | # --- Dynamically Augment Docstring ---
687 | # Logic to generate and apply dynamic documentation based on MARQO_CONFIG via LLM call.
688 |
689 | _docstring_augmentation_result: Optional[str] = None # Store the generated string
690 | _docstring_generation_done: bool = False # Flag to ensure generation/loading happens only once
691 |
692 | async def trigger_dynamic_docstring_generation():
693 | """
694 | Dynamically enhances the Marqo search tool docstring with index-specific documentation.
695 |
696 | This function uses an LLM to analyze the Marqo index configuration and generate custom
697 | documentation explaining the specific data domain, available filters, and example queries
698 | for the configured index. The resulting documentation is appended to the marqo_fused_search
699 | function's docstring.
700 |
701 | The function implements a caching mechanism:
702 | 1. First checks for a cached docstring in the CACHE_FILE_PATH
703 | 2. Validates cache freshness by comparing the modification time of the config file
704 | 3. If cache is invalid or missing, calls an LLM to generate a new docstring
705 | 4. Saves the new docstring to cache for future use
706 |
707 | This function should be called once during application startup, before any documentation
708 | is accessed. It is designed for async environments like FastAPI's startup events or
709 | any async initialization code.
710 |
711 | Dependencies:
712 | - Requires marqo_index_config.json to be properly configured
713 | - Uses CompletionClient to communicate with LLMs, requiring valid API keys
714 | - Needs write access to the cache directory for saving generated docstrings
715 |
716 | Returns:
717 | None. The result is applied directly to the marqo_fused_search.__doc__ attribute.
718 | """
719 | global _docstring_augmentation_result, _docstring_generation_done
720 | if _docstring_generation_done:
721 | return # Already done
722 |
723 | logger.info("Checking cache and potentially triggering dynamic docstring generation...")
724 | cached_data = None
725 | current_config_mtime = 0.0
726 |
727 | # 1. Get config file modification time
728 | try:
729 | if os.path.exists(CONFIG_FILE_PATH):
730 | current_config_mtime = os.path.getmtime(CONFIG_FILE_PATH)
731 | else:
732 | logger.warning(f"Marqo config file not found at {CONFIG_FILE_PATH} for mtime check.")
733 | except Exception as e:
734 | logger.error(f"Error getting modification time for {CONFIG_FILE_PATH}: {e}", exc_info=True)
735 |
736 | # 2. Try to load cache
737 | try:
738 | if os.path.exists(CACHE_FILE_PATH):
739 | with open(CACHE_FILE_PATH, 'r') as f:
740 | cached_data = json.load(f)
741 | logger.info(f"Loaded docstring augmentation cache from {CACHE_FILE_PATH}")
742 | except Exception as e:
743 | logger.warning(f"Could not load or parse cache file {CACHE_FILE_PATH}: {e}. Will regenerate.", exc_info=True)
744 | cached_data = None # Ensure regeneration if cache is corrupt
745 |
746 | # 3. Check cache validity
747 | if (
748 | cached_data and
749 | isinstance(cached_data, dict) and
750 | "timestamp" in cached_data and
751 | "augmentation" in cached_data and
752 | current_config_mtime > 0 and # Ensure we got a valid mtime for the config
753 | abs(cached_data["timestamp"] - current_config_mtime) < 1e-6 # Compare timestamps (allowing for float precision)
754 | ):
755 | logger.info("Cache is valid. Using cached docstring augmentation.")
756 | _docstring_augmentation_result = cached_data["augmentation"]
757 | else:
758 | logger.info("Cache invalid, missing, or config file updated. Regenerating docstring augmentation via LLM...")
759 | # Call the async function that constructs prompt and calls LLM
760 | generated_augmentation = await _generate_docstring_augmentation_from_config(MARQO_CONFIG)
761 |
762 | if generated_augmentation:
763 | _docstring_augmentation_result = generated_augmentation
764 | # Save to cache if successful
765 | try:
766 | cache_content = {
767 | "timestamp": current_config_mtime,
768 | "augmentation": _docstring_augmentation_result
769 | }
770 | with open(CACHE_FILE_PATH, 'w') as f:
771 | json.dump(cache_content, f, indent=2)
772 | logger.info(f"Saved new docstring augmentation to cache: {CACHE_FILE_PATH}")
773 | except Exception as e:
774 | logger.error(f"Failed to save docstring augmentation to cache file {CACHE_FILE_PATH}: {e}", exc_info=True)
775 | else:
776 | _docstring_augmentation_result = "" # Ensure it's a string even if generation fails
777 | logger.error("LLM generation failed. Docstring will not be augmented.")
778 | # Optional: Consider removing the cache file if generation fails to force retry next time?
779 | # try:
780 | # if os.path.exists(CACHE_FILE_PATH):
781 | # os.remove(CACHE_FILE_PATH)
782 | # except Exception as e_rem:
783 | # logger.error(f"Failed to remove potentially stale cache file {CACHE_FILE_PATH}: {e_rem}")
784 |
785 | _docstring_generation_done = True
786 | logger.info("Dynamic docstring generation/loading process complete.")
787 | # Now apply the result (either cached or newly generated)
788 | _apply_generated_docstring()
789 |
790 |
791 | def _apply_generated_docstring():
792 | """
793 | Applies the dynamically generated documentation to the marqo_fused_search function's docstring.
794 |
795 | This function takes the content from _docstring_augmentation_result (generated either via LLM
796 | or loaded from cache) and appends it to the existing docstring of the marqo_fused_search function.
797 | The function checks for the presence of a marker ("Configuration-Specific Notes:") to avoid
798 | applying the same augmentation multiple times.
799 |
800 | This function is called automatically at the end of trigger_dynamic_docstring_generation()
801 | and should not typically be called directly. It's designed as a separate function to allow
802 | for potential manual application in specialized scenarios.
803 |
804 | The function accesses the global variable _docstring_augmentation_result, which must be set
805 | prior to calling this function.
806 |
807 | Side Effects:
808 | Modifies marqo_fused_search.__doc__ by appending the dynamically generated content.
809 | """
810 | global _docstring_augmentation_result
811 |
812 | # Check if augmentation was successful and produced content
813 | if _docstring_augmentation_result:
814 | if marqo_fused_search.__doc__:
815 | base_doc = marqo_fused_search.__doc__.strip()
816 | # Avoid appending if already present (simple check)
817 | if "Configuration-Specific Notes:" not in base_doc:
818 | marqo_fused_search.__doc__ = base_doc + _docstring_augmentation_result
819 | logger.info(f"Dynamically generated docstring augmentation applied. New length: {len(marqo_fused_search.__doc__)}")
820 | else:
821 | logger.warning("marqo_fused_search function is missing a base docstring. Augmentation skipped.")
822 |
823 |
824 | # IMPORTANT:
825 | # The async function `trigger_dynamic_docstring_generation()`
826 | # must be called from your main application's async setup code
827 | # (e.g., FastAPI startup event) *before* the tool documentation is needed.
828 |
829 |
830 | # Example usage (for testing locally, if needed)
831 | async def _run_test():
832 | # Example: Ensure dynamic docstring is generated before running the test search
833 | # In a real app, this trigger would happen during startup.
834 | await trigger_dynamic_docstring_generation()
835 | print("--- Current Docstring ---")
836 | print(marqo_fused_search.__doc__)
837 | print("-----------------------")
838 |
839 | test_query = "revenue growth"
840 | logger.info(f"Running test search with query: '{test_query}'")
841 | try:
842 | # Assuming MARQO_CONFIG points to the financial index for this test
843 | results = await marqo_fused_search(
844 | query=test_query,
845 | limit=5,
846 | filters={"form_type": "10-K"}, # Example filter using default schema
847 | # date_range=DateRange(start_date=datetime(2023, 1, 1)) # Example date range
848 | )
849 | import json
850 | print(json.dumps(results, indent=2))
851 | logger.info(f"Test search successful. Found {results['total_hits']} hits.")
852 | except Exception as e:
853 | logger.error(f"Test search failed: {e}", exc_info=True)
854 |
855 | if __name__ == "__main__":
856 | import asyncio
857 | asyncio.run(_run_test())
```
--------------------------------------------------------------------------------
/examples/tournament_text_demo.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 | """
3 | Tournament Text Demo - Demonstrates running a text improvement tournament
4 |
5 | This script shows how to:
6 | 1. Create a tournament with multiple models focused on text refinement
7 | 2. Track progress across multiple rounds
8 | 3. Retrieve and analyze the improved essay/text
9 |
10 | The tournament task is to refine and improve a comparative essay on
11 | transformer vs. diffusion model architectures, demonstrating how
12 | the tournament system can be used for general text refinement tasks.
13 |
14 | Usage:
15 | python examples/tournament_text_demo.py [--topic TOPIC]
16 |
17 | Options:
18 | --topic TOPIC Specify a different essay topic (default: transformers vs diffusion models)
19 | """
20 |
21 | import argparse
22 | import asyncio
23 | import json
24 | import os
25 | import re
26 | import sys
27 | from collections import namedtuple
28 | from pathlib import Path
29 | from typing import Any, Dict, List, Optional
30 |
31 | # Add project root to path for imports when running as script
32 | sys.path.insert(0, str(Path(__file__).parent.parent))
33 |
34 | from rich import box
35 | from rich.markup import escape
36 | from rich.panel import Panel
37 | from rich.rule import Rule
38 | from rich.table import Table
39 |
40 | from ultimate_mcp_server.core.models.requests import CompletionRequest
41 | from ultimate_mcp_server.core.providers.base import get_provider
42 | from ultimate_mcp_server.core.server import Gateway
43 | from ultimate_mcp_server.services.prompts import PromptTemplate
44 | from ultimate_mcp_server.tools.tournament import (
45 | create_tournament,
46 | get_tournament_results,
47 | get_tournament_status,
48 | )
49 | from ultimate_mcp_server.utils import get_logger, process_mcp_result
50 | from ultimate_mcp_server.utils.display import (
51 | CostTracker,
52 | display_tournament_results,
53 | display_tournament_status,
54 | )
55 | from ultimate_mcp_server.utils.logging.console import console
56 |
57 | DEFAULT_MODEL_CONFIGS_TEXT: List[Dict[str, Any]] = [
58 | {
59 | "model_id": "openai/gpt-4o-mini",
60 | "diversity_count": 1,
61 | "temperature": 0.75,
62 | },
63 | {
64 | "model_id": "anthropic/claude-3-5-haiku-20241022",
65 | "diversity_count": 1,
66 | "temperature": 0.7,
67 | },
68 | ]
69 | DEFAULT_NUM_ROUNDS_TEXT = 2
70 | DEFAULT_TOURNAMENT_NAME_TEXT = "Advanced Text Refinement Tournament"
71 |
72 | def parse_arguments_text():
73 | parser = argparse.ArgumentParser(description="Run a text refinement tournament demo")
74 | parser.add_argument(
75 | "--topic", type=str, default="transformer_vs_diffusion",
76 | choices=list(TOPICS.keys()) + ["custom"],
77 | help="Essay topic (default: transformer_vs_diffusion)"
78 | )
79 | parser.add_argument(
80 | "--custom-topic", type=str,
81 | help="Custom essay topic (used when --topic=custom)"
82 | )
83 | parser.add_argument(
84 | "--rounds", type=int, default=DEFAULT_NUM_ROUNDS_TEXT,
85 | help=f"Number of tournament rounds (default: {DEFAULT_NUM_ROUNDS_TEXT})"
86 | )
87 | parser.add_argument(
88 | "--models", type=str, nargs="+",
89 | default=[mc["model_id"] for mc in DEFAULT_MODEL_CONFIGS_TEXT],
90 | help="List of model IDs to participate."
91 | )
92 | return parser.parse_args()
93 |
94 |
95 | # Initialize logger using get_logger
96 | logger = get_logger("example.tournament_text")
97 |
98 | # Create a simple structure for cost tracking from dict (tokens might be missing)
99 | TrackableResult = namedtuple("TrackableResult", ["cost", "input_tokens", "output_tokens", "provider", "model", "processing_time"])
100 |
101 | # Initialize global gateway
102 | gateway: Optional[Gateway] = None
103 |
104 | # --- Configuration ---
105 | # Adjust model IDs based on your configured providers
106 | MODEL_IDS = [
107 | "openai:gpt-4.1-mini",
108 | "deepseek:deepseek-chat",
109 | "gemini:gemini-2.5-pro-preview-03-25"
110 | ]
111 | NUM_ROUNDS = 2 # Changed from 3 to 2 for faster execution and debugging
112 | TOURNAMENT_NAME = "Text Refinement Tournament Demo" # More generic name
113 |
114 | # The generic essay prompt template
115 | TEMPLATE_TEXT = """
116 | # GENERIC TEXT TOURNAMENT PROMPT TEMPLATE
117 |
118 | Please write a high-quality, comprehensive {{content_type}} on the topic of: "{{topic}}".
119 |
120 | {{context}}
121 |
122 | Your {{content_type}} should thoroughly explore the following sections and subtopics:
123 | {% for section in sections %}
124 | ## {{section.title}}
125 | {% for subtopic in section.subtopics %}
126 | - {{subtopic}}
127 | {% endfor %}
128 | {% endfor %}
129 |
130 | Adhere to the following style and content requirements:
131 | {{style_requirements}}
132 |
133 | Please provide only the {{content_type}} text. If you have meta-comments or a thinking process,
134 | enclose it in <thinking>...</thinking> tags at the very beginning of your response.
135 | """
136 |
137 | # Define predefined topics
138 | TOPICS = {
139 | "transformer_vs_diffusion": {
140 | "content_type": "technical essay",
141 | "topic": "comparing transformer architecture and diffusion models",
142 | "context": "Focus on their underlying mechanisms, common applications, strengths, weaknesses, and future potential in AI.",
143 | "sections": [
144 | {"title": "Core Principles", "subtopics": ["Transformer self-attention, positional encoding", "Diffusion forward/reverse processes, noise schedules"]},
145 | {"title": "Applications & Performance", "subtopics": ["Typical tasks for transformers (NLP, vision)", "Typical tasks for diffusion models (image/audio generation)", "Comparative performance benchmarks or known strengths"]},
146 | {"title": "Limitations & Challenges", "subtopics": ["Computational costs, data requirements", "Interpretability, controllability, known failure modes for each"]},
147 | {"title": "Future Outlook", "subtopics": ["Potential for hybridization", "Scaling frontiers", "Impact on AGI research"]}
148 | ],
149 | "style_requirements": "Write in a clear, objective, and technically precise manner suitable for an audience with a machine learning background. Aim for around 800-1200 words."
150 | },
151 | "llm_vs_traditional_ai": {
152 | "content_type": "comparative analysis",
153 | "topic": "comparing large language models to traditional AI approaches",
154 | "context": "The rise of large language models has shifted the AI landscape significantly.",
155 | "sections": [
156 | {
157 | "title": "Fundamental Differences",
158 | "subtopics": [
159 | "How LLMs differ architecturally from traditional ML/AI systems",
160 | "Data requirements and training approaches"
161 | ]
162 | },
163 | {
164 | "title": "Capabilities and Limitations",
165 | "subtopics": [
166 | "Tasks where LLMs excel compared to traditional approaches",
167 | "Situations where traditional AI methods remain superior",
168 | "Emergent capabilities unique to large language models"
169 | ]
170 | },
171 | {
172 | "title": "Real-world Applications",
173 | "subtopics": [
174 | "Industries being transformed by LLMs",
175 | "Where traditional AI approaches continue to dominate",
176 | "Examples of hybrid systems combining both approaches"
177 | ]
178 | },
179 | {
180 | "title": "Future Outlook",
181 | "subtopics": [
182 | "Projected evolution of both paradigms",
183 | "Potential convergence or further divergence",
184 | "Research frontiers for each approach"
185 | ]
186 | }
187 | ],
188 | "style_requirements": "Present a balanced analysis that acknowledges the strengths and weaknesses of both paradigms. Support claims with specific examples where possible."
189 | }
190 | }
191 |
192 | # Create custom topic template
193 | def create_custom_topic_variables(topic_description):
194 | """Create a simple custom topic with standard sections"""
195 | return {
196 | "content_type": "essay",
197 | "topic": topic_description,
198 | "context": "",
199 | "sections": [
200 | {
201 | "title": "Background and Key Concepts",
202 | "subtopics": [
203 | "Define and explain the core elements of the topic",
204 | "Provide necessary historical or theoretical context"
205 | ]
206 | },
207 | {
208 | "title": "Analysis of Main Aspects",
209 | "subtopics": [
210 | "Examine the primary dimensions or elements of the topic",
211 | "Discuss relationships between different aspects",
212 | "Identify patterns or trends relevant to the topic"
213 | ]
214 | },
215 | {
216 | "title": "Practical Implications",
217 | "subtopics": [
218 | "Real-world applications or impacts",
219 | "How this topic affects related fields or domains"
220 | ]
221 | },
222 | {
223 | "title": "Future Perspectives",
224 | "subtopics": [
225 | "Emerging trends or developments",
226 | "Potential challenges and opportunities",
227 | "Areas requiring further research or exploration"
228 | ]
229 | }
230 | ],
231 | "style_requirements": "Present a comprehensive and well-structured analysis with clear reasoning and specific examples where appropriate."
232 | }
233 |
234 | # Create the prompt template object
235 | essay_template = PromptTemplate(
236 | template=TEMPLATE_TEXT,
237 | template_id="text_tournament_template",
238 | description="A template for text tournament prompts",
239 | required_vars=["content_type", "topic", "context", "sections", "style_requirements"]
240 | )
241 |
242 | # --- Helper Functions ---
243 | def parse_result(result):
244 | """Parse the result from a tool call into a usable dictionary.
245 |
246 | Handles various return types from MCP tools.
247 | """
248 | try:
249 | # Handle TextContent object (which has a .text attribute)
250 | if hasattr(result, 'text'):
251 | try:
252 | # Try to parse the text as JSON
253 | return json.loads(result.text)
254 | except json.JSONDecodeError:
255 | # Return the raw text if not JSON
256 | return {"text": result.text}
257 |
258 | # Handle list result
259 | if isinstance(result, list):
260 | if result:
261 | first_item = result[0]
262 | if hasattr(first_item, 'text'):
263 | try:
264 | return json.loads(first_item.text)
265 | except json.JSONDecodeError:
266 | return {"text": first_item.text}
267 | else:
268 | return first_item
269 | return {}
270 |
271 | # Handle dictionary directly
272 | if isinstance(result, dict):
273 | return result
274 |
275 | # Handle other potential types or return error
276 | else:
277 | return {"error": f"Unexpected result type: {type(result)}"}
278 |
279 | except Exception as e:
280 | return {"error": f"Error parsing result: {str(e)}"}
281 |
282 |
283 | async def setup_gateway():
284 | """Set up the gateway for demonstration."""
285 | global gateway
286 |
287 | # Create gateway instance
288 | logger.info("Initializing gateway for demonstration", emoji_key="start")
289 | gateway = Gateway("text-tournament-demo", register_tools=False)
290 |
291 | # Initialize the server with all providers and built-in tools
292 | await gateway._initialize_providers()
293 |
294 | # Manually register tournament tools
295 | mcp = gateway.mcp
296 | mcp.tool()(create_tournament)
297 | mcp.tool()(get_tournament_status)
298 | mcp.tool()(get_tournament_results)
299 | logger.info("Manually registered tournament tools.")
300 |
301 | # Verify tools are registered
302 | tools = await gateway.mcp.list_tools()
303 | tournament_tools = [t.name for t in tools if t.name.startswith('tournament') or 'tournament' in t.name]
304 | logger.info(f"Registered tournament tools: {tournament_tools}", emoji_key="info")
305 |
306 | if not any('tournament' in t.lower() for t in [t.name for t in tools]):
307 | logger.warning("No tournament tools found. Make sure tournament plugins are registered.", emoji_key="warning")
308 |
309 | logger.success("Gateway initialized", emoji_key="success")
310 |
311 |
312 | async def poll_tournament_status(tournament_id: str, storage_path: Optional[str] = None, interval: int = 5) -> Optional[str]:
313 | """Poll the tournament status until it reaches a final state.
314 |
315 | Args:
316 | tournament_id: ID of the tournament to poll
317 | storage_path: Optional storage path to avoid tournament not found issues
318 | interval: Time between status checks in seconds
319 | """
320 | logger.info(f"Polling status for tournament {tournament_id}...", emoji_key="poll")
321 | final_states = ["COMPLETED", "FAILED", "CANCELLED"]
322 |
323 | # Add direct file polling capability to handle case where tournament manager can't find the tournament
324 | if storage_path:
325 | storage_dir = Path(storage_path)
326 | state_file = storage_dir / "tournament_state.json"
327 | logger.debug(f"Will check tournament state file directly at: {state_file}")
328 |
329 | while True:
330 | status_input = {"tournament_id": tournament_id}
331 | status_result = await gateway.mcp.call_tool("get_tournament_status", status_input)
332 | status_data = await process_mcp_result(status_result)
333 |
334 | if "error" in status_data:
335 | # If tournament manager couldn't find the tournament but we have the storage path,
336 | # try to read the state file directly (this is a fallback mechanism)
337 | if storage_path and "not found" in status_data.get("error", "").lower():
338 | try:
339 | logger.debug(f"Attempting to read tournament state directly from: {state_file}")
340 | if state_file.exists():
341 | with open(state_file, 'r', encoding='utf-8') as f:
342 | direct_status_data = json.load(f)
343 | status = direct_status_data.get("status")
344 | current_round = direct_status_data.get("current_round", 0)
345 | total_rounds = direct_status_data.get("config", {}).get("rounds", 0)
346 |
347 | # Create a status object compatible with our display function
348 | status_data = {
349 | "tournament_id": tournament_id,
350 | "status": status,
351 | "current_round": current_round,
352 | "total_rounds": total_rounds,
353 | "storage_path": storage_path
354 | }
355 | logger.debug(f"Successfully read direct state: {status}")
356 | else:
357 | logger.warning(f"State file not found at: {state_file}")
358 | except Exception as e:
359 | logger.error(f"Error reading state file directly: {e}")
360 | logger.error(f"Error fetching status: {status_data['error']}", emoji_key="error")
361 | return None # Indicate error during polling
362 | else:
363 | # Standard error case
364 | logger.error(f"Error fetching status: {status_data['error']}", emoji_key="error")
365 | return None # Indicate error during polling
366 |
367 | # Display improved status using the imported function
368 | display_tournament_status(status_data)
369 |
370 | status = status_data.get("status")
371 | if status in final_states:
372 | logger.success(f"Tournament reached final state: {status}", emoji_key="success")
373 | return status
374 |
375 | await asyncio.sleep(interval)
376 |
377 |
378 | def extract_thinking(text: str) -> str:
379 | """Extract <thinking> tags content (simple version)."""
380 | match = re.search(r"<thinking>(.*?)</thinking>", text, re.DOTALL)
381 | return match.group(1).strip() if match else ""
382 |
383 |
384 | def analyze_text_quality(text: str) -> Dict[str, Any]:
385 | """Basic text quality analysis."""
386 | word_count = len(text.split())
387 | # Add more metrics later (readability, sentiment, etc.)
388 | return {"word_count": word_count}
389 |
390 |
391 | async def evaluate_essays(essays_by_model: Dict[str, str], tracker: CostTracker = None) -> Dict[str, Any]:
392 | """Use LLM to evaluate which essay is the best.
393 |
394 | Args:
395 | essays_by_model: Dictionary mapping model IDs to their essay texts
396 | tracker: Optional CostTracker to track API call costs
397 |
398 | Returns:
399 | Dictionary with evaluation results
400 | """
401 | if not essays_by_model or len(essays_by_model) < 2:
402 | return {"error": "Not enough essays to compare"}
403 |
404 | eval_cost = 0.0 # Initialize evaluation cost
405 |
406 | try:
407 | # Format the essays for evaluation
408 | evaluation_prompt = "# Essay Evaluation\n\nPlease analyze the following essays on the same topic and determine which one is the best. "
409 | evaluation_prompt += "Consider factors such as technical accuracy, clarity, organization, depth of analysis, and overall quality.\n\n"
410 |
411 | # Add each essay
412 | for i, (model_id, essay) in enumerate(essays_by_model.items(), 1):
413 | display_model = model_id.split(':')[-1] if ':' in model_id else model_id
414 | # Limit each essay to 3000 chars to fit context windows
415 | truncated_essay = essay[:3000]
416 | if len(essay) > 3000:
417 | truncated_essay += "..."
418 | evaluation_prompt += f"## Essay {i} (by {display_model})\n\n{truncated_essay}\n\n"
419 |
420 | evaluation_prompt += "\n# Your Evaluation Task\n\n"
421 | evaluation_prompt += "1. Rank the essays from best to worst\n"
422 | evaluation_prompt += "2. Explain your reasoning for the ranking\n"
423 | evaluation_prompt += "3. Highlight specific strengths of the best essay\n"
424 | evaluation_prompt += "4. Suggest one improvement for each essay\n"
425 |
426 | # Use a more capable model for evaluation
427 | model_to_use = "gemini:gemini-2.5-pro-preview-03-25"
428 |
429 | logger.info(f"Evaluating essays using {model_to_use}...", emoji_key="evaluate")
430 |
431 | # Get the provider
432 | provider_id = model_to_use.split(':')[0]
433 | provider = await get_provider(provider_id)
434 |
435 | if not provider:
436 | return {
437 | "error": f"Provider {provider_id} not available for evaluation",
438 | "model_used": model_to_use,
439 | "eval_prompt": evaluation_prompt,
440 | "cost": 0.0
441 | }
442 |
443 | # Generate completion for evaluation with timeout
444 | try:
445 | request = CompletionRequest(prompt=evaluation_prompt, model=model_to_use)
446 |
447 | # Set a timeout for the completion request
448 | completion_task = provider.generate_completion(
449 | prompt=request.prompt,
450 | model=request.model
451 | )
452 |
453 | # 45 second timeout for evaluation
454 | completion_result = await asyncio.wait_for(completion_task, timeout=45)
455 |
456 | # Track API call if tracker provided
457 | if tracker:
458 | tracker.add_call(completion_result)
459 |
460 | # Accumulate cost
461 | if hasattr(completion_result, 'cost'):
462 | eval_cost = completion_result.cost
463 | elif hasattr(completion_result, 'metrics') and isinstance(completion_result.metrics, dict):
464 | eval_cost = completion_result.metrics.get('cost', 0.0)
465 |
466 | # Prepare result dict
467 | result = {
468 | "evaluation": completion_result.text,
469 | "model_used": model_to_use,
470 | "eval_prompt": evaluation_prompt,
471 | "cost": eval_cost # Return the cost
472 | }
473 | except asyncio.TimeoutError:
474 | logger.warning(f"Evaluation with {model_to_use} timed out after 45 seconds", emoji_key="warning")
475 | return {
476 | "error": "Evaluation timed out after 45 seconds",
477 | "model_used": model_to_use,
478 | "eval_prompt": evaluation_prompt,
479 | "cost": 0.0
480 | }
481 | except Exception as request_error:
482 | logger.error(f"Error during model request: {str(request_error)}", emoji_key="error")
483 | return {
484 | "error": f"Error during model request: {str(request_error)}",
485 | "model_used": model_to_use,
486 | "eval_prompt": evaluation_prompt,
487 | "cost": 0.0
488 | }
489 |
490 | except Exception as e:
491 | logger.error(f"Essay evaluation failed: {str(e)}", emoji_key="error", exc_info=True)
492 | return {
493 | "error": str(e),
494 | "model_used": model_to_use if 'model_to_use' in locals() else "unknown",
495 | "eval_prompt": evaluation_prompt if 'evaluation_prompt' in locals() else "Error generating prompt",
496 | "cost": 0.0
497 | }
498 |
499 | return result
500 |
501 |
502 | async def calculate_tournament_costs(rounds_results, evaluation_cost=None):
503 | """Calculate total costs of the tournament by model and grand total.
504 |
505 | Args:
506 | rounds_results: List of round results data from tournament results
507 | evaluation_cost: Optional cost of the final evaluation step
508 |
509 | Returns:
510 | Dictionary with cost information
511 | """
512 | model_costs = {}
513 | total_cost = 0.0
514 |
515 | # Process costs for each round
516 | for _round_idx, round_data in enumerate(rounds_results):
517 | responses = round_data.get('responses', {})
518 | for model_id, response in responses.items():
519 | metrics = response.get('metrics', {})
520 | cost = metrics.get('cost', 0.0)
521 |
522 | # Convert to float if it's a string
523 | if isinstance(cost, str):
524 | try:
525 | cost = float(cost.replace('$', ''))
526 | except (ValueError, TypeError):
527 | cost = 0.0
528 |
529 | # Initialize model if not present
530 | if model_id not in model_costs:
531 | model_costs[model_id] = 0.0
532 |
533 | # Add to model total and grand total
534 | model_costs[model_id] += cost
535 | total_cost += cost
536 |
537 | # Add evaluation cost if provided
538 | if evaluation_cost:
539 | total_cost += evaluation_cost
540 | model_costs['evaluation'] = evaluation_cost
541 |
542 | return {
543 | 'model_costs': model_costs,
544 | 'total_cost': total_cost
545 | }
546 |
547 |
548 | # --- Main Script Logic ---
549 | async def run_tournament_demo(tracker: CostTracker):
550 | """Run the text tournament demo."""
551 | # Parse command line arguments
552 | args = parse_arguments_text()
553 |
554 | # Determine which topic to use
555 | if args.topic == "custom" and args.custom_topic:
556 | # Custom topic provided via command line
557 | topic_name = "custom"
558 | essay_variables = create_custom_topic_variables(args.custom_topic)
559 | topic_description = args.custom_topic
560 | log_topic_info = f"Using custom topic: [yellow]{escape(topic_description)}[/yellow]"
561 | elif args.topic in TOPICS:
562 | # Use one of the predefined topics
563 | topic_name = args.topic
564 | essay_variables = TOPICS[args.topic]
565 | topic_description = essay_variables["topic"]
566 | log_topic_info = f"Using predefined topic: [yellow]{escape(topic_description)}[/yellow]"
567 | else:
568 | # Default to transformer vs diffusion if topic not recognized
569 | topic_name = "transformer_vs_diffusion"
570 | essay_variables = TOPICS[topic_name]
571 | topic_description = essay_variables['topic']
572 | log_topic_info = f"Using default topic: [yellow]{escape(topic_description)}[/yellow]"
573 |
574 | # Use Rich Rule for title
575 | console.print(Rule(f"[bold blue]{TOURNAMENT_NAME} - {topic_name.replace('_', ' ').title()}[/bold blue]"))
576 | console.print(log_topic_info)
577 | console.print(f"Models: [cyan]{', '.join(MODEL_IDS)}[/cyan]")
578 | console.print(f"Rounds: [cyan]{NUM_ROUNDS}[/cyan]")
579 |
580 | # Render the template
581 | try:
582 | rendered_prompt = essay_template.render(essay_variables)
583 | logger.info(f"Template rendered for topic: {topic_name}", emoji_key="template")
584 |
585 | # Show prompt preview in a Panel
586 | prompt_preview = rendered_prompt.split("\n")[:10] # Show more lines
587 | preview_text = "\n".join(prompt_preview) + "\n..."
588 | console.print(Panel(escape(preview_text), title="[bold]Rendered Prompt Preview[/bold]", border_style="dim blue", expand=False))
589 |
590 | except Exception as e:
591 | logger.error(f"Template rendering failed: {str(e)}", emoji_key="error", exc_info=True)
592 | # Log template and variables for debugging using logger
593 | logger.debug(f"Template: {TEMPLATE_TEXT}")
594 | logger.debug(f"Variables: {escape(str(essay_variables))}") # Escape potentially complex vars
595 | return 1
596 |
597 | # 1. Create the tournament
598 | # Prepare model configurations
599 | # Default temperature from DEFAULT_MODEL_CONFIGS_TEXT, assuming it's a common parameter.
600 | # The create_tournament tool itself will parse these against InputModelConfig.
601 | model_configs = [{"model_id": mid, "diversity_count": 1, "temperature": 0.7 } for mid in MODEL_IDS]
602 |
603 | create_input = {
604 | "name": f"{TOURNAMENT_NAME} - {topic_name.replace('_', ' ').title()}",
605 | "prompt": rendered_prompt,
606 | "models": model_configs, # Changed from model_ids to models
607 | "rounds": NUM_ROUNDS,
608 | "tournament_type": "text"
609 | }
610 |
611 | try:
612 | logger.info("Creating tournament...", emoji_key="processing")
613 | create_result = await gateway.mcp.call_tool("create_tournament", create_input)
614 | create_data = await process_mcp_result(create_result)
615 |
616 | if "error" in create_data:
617 | error_msg = create_data.get("error", "Unknown error")
618 | logger.error(f"Failed to create tournament: {error_msg}. Exiting.", emoji_key="error")
619 | return 1
620 |
621 | tournament_id = create_data.get("tournament_id")
622 | if not tournament_id:
623 | logger.error("No tournament ID returned. Exiting.", emoji_key="error")
624 | return 1
625 |
626 | # Extract storage path for reference
627 | storage_path = create_data.get("storage_path")
628 | logger.info(f"Tournament created with ID: {tournament_id}", emoji_key="tournament")
629 | if storage_path:
630 | logger.info(f"Tournament storage path: {storage_path}", emoji_key="path")
631 |
632 | # Add a small delay to ensure the tournament state is saved before proceeding
633 | await asyncio.sleep(2)
634 |
635 | # 2. Poll for status
636 | final_status = await poll_tournament_status(tournament_id, storage_path)
637 |
638 | # 3. Fetch and display final results
639 | if final_status == "COMPLETED":
640 | logger.info("Fetching final results...", emoji_key="results")
641 | results_input = {"tournament_id": tournament_id}
642 | final_results = await gateway.mcp.call_tool("get_tournament_results", results_input)
643 | results_data = await process_mcp_result(final_results)
644 |
645 | if "error" not in results_data:
646 | # Use the imported display function for tournament results
647 | display_tournament_results(results_data)
648 |
649 | # Track aggregated tournament cost (excluding separate evaluation)
650 | if isinstance(results_data, dict) and "cost" in results_data:
651 | try:
652 | total_cost = results_data.get("cost", {}).get("total_cost", 0.0)
653 | processing_time = results_data.get("total_processing_time", 0.0)
654 | trackable = TrackableResult(
655 | cost=total_cost,
656 | input_tokens=0,
657 | output_tokens=0,
658 | provider="tournament",
659 | model="text_tournament",
660 | processing_time=processing_time
661 | )
662 | tracker.add_call(trackable)
663 | logger.info(f"Tracked tournament cost: ${total_cost:.6f}", emoji_key="cost")
664 | except Exception as track_err:
665 | logger.warning(f"Could not track tournament cost: {track_err}", exc_info=False)
666 |
667 | # Analyze round progression if available
668 | rounds_results = results_data.get('rounds_results', [])
669 | if rounds_results:
670 | console.print(Rule("[bold blue]Essay Evolution Analysis[/bold blue]"))
671 |
672 | for round_idx, round_data in enumerate(rounds_results):
673 | console.print(f"[bold]Round {round_idx} Analysis:[/bold]")
674 | responses = round_data.get('responses', {})
675 |
676 | round_table = Table(box=box.MINIMAL, show_header=True, expand=False)
677 | round_table.add_column("Model", style="magenta")
678 | round_table.add_column("Word Count", style="green", justify="right")
679 |
680 | has_responses = False
681 | for model_id, response in responses.items():
682 | display_model = escape(model_id.split(':')[-1])
683 | response_text = response.get('response_text', '')
684 |
685 | if response_text:
686 | has_responses = True
687 | metrics = analyze_text_quality(response_text)
688 | round_table.add_row(
689 | display_model,
690 | str(metrics['word_count'])
691 | )
692 |
693 | if has_responses:
694 | console.print(round_table)
695 | else:
696 | console.print("[dim]No valid responses recorded for this round.[/dim]")
697 | console.print() # Add space between rounds
698 |
699 | # Evaluate final essays using LLM
700 | final_round = rounds_results[-1]
701 | final_responses = final_round.get('responses', {})
702 |
703 | # Track evaluation cost
704 | evaluation_cost = 0.0
705 |
706 | if final_responses:
707 | console.print(Rule("[bold blue]AI Evaluation of Essays[/bold blue]"))
708 | console.print("[bold]Evaluating final essays...[/bold]")
709 |
710 | essays_by_model = {}
711 | for model_id, response in final_responses.items():
712 | essays_by_model[model_id] = response.get('response_text', '')
713 |
714 | evaluation_result = await evaluate_essays(essays_by_model, tracker)
715 |
716 | if "error" not in evaluation_result:
717 | console.print(Panel(
718 | escape(evaluation_result["evaluation"]),
719 | title=f"[bold]Essay Evaluation (by {evaluation_result['model_used'].split(':')[-1]})[/bold]",
720 | border_style="green",
721 | expand=False
722 | ))
723 |
724 | # Track evaluation cost separately
725 | if evaluation_cost > 0:
726 | try:
727 | trackable_eval = TrackableResult(
728 | cost=evaluation_cost,
729 | input_tokens=0, # Tokens for eval not easily available here
730 | output_tokens=0,
731 | provider=evaluation_result['model_used'].split(':')[0],
732 | model=evaluation_result['model_used'].split(':')[-1],
733 | processing_time=0 # Eval time not tracked here
734 | )
735 | tracker.add_call(trackable_eval)
736 | except Exception as track_err:
737 | logger.warning(f"Could not track evaluation cost: {track_err}", exc_info=False)
738 |
739 | # Save evaluation result to a file in the tournament directory
740 | if storage_path:
741 | try:
742 | evaluation_file = os.path.join(storage_path, "essay_evaluation.md")
743 | with open(evaluation_file, "w", encoding="utf-8") as f:
744 | f.write(f"# Essay Evaluation by {evaluation_result['model_used']}\n\n")
745 | f.write(evaluation_result["evaluation"])
746 |
747 | logger.info(f"Evaluation saved to {evaluation_file}", emoji_key="save")
748 | except Exception as e:
749 | logger.warning(f"Could not save evaluation to file: {str(e)}", emoji_key="warning")
750 |
751 | # Track evaluation cost if available
752 | evaluation_cost = evaluation_result.get('cost', 0.0)
753 | logger.info(f"Evaluation cost: ${evaluation_cost:.6f}", emoji_key="cost")
754 | else:
755 | console.print(f"[yellow]Could not evaluate essays: {evaluation_result.get('error')}[/yellow]")
756 | # Try with fallback model if Gemini fails
757 | if "gemini" in evaluation_result.get("model_used", ""):
758 | console.print("[bold]Trying evaluation with fallback model (gpt-4.1-mini)...[/bold]")
759 | # Switch to OpenAI model as backup
760 | essays_by_model_limited = {}
761 | # Limit content size to avoid token limits
762 | for model_id, essay in essays_by_model.items():
763 | essays_by_model_limited[model_id] = essay[:5000] # Shorter excerpt to fit in context
764 |
765 | fallback_evaluation = {
766 | "model_used": "openai:gpt-4.1-mini",
767 | "eval_prompt": evaluation_result.get("eval_prompt", "Evaluation failed")
768 | }
769 |
770 | try:
771 | provider_id = "openai"
772 | provider = await get_provider(provider_id)
773 |
774 | if provider:
775 | # Create a shorter, simplified prompt
776 | simple_prompt = "Compare these essays and rank them from best to worst:\n\n"
777 | for i, (model_id, essay) in enumerate(essays_by_model_limited.items(), 1):
778 | display_model = model_id.split(':')[-1] if ':' in model_id else model_id
779 | simple_prompt += f"Essay {i} ({display_model}):\n{essay[:2000]}...\n\n"
780 |
781 | request = CompletionRequest(prompt=simple_prompt, model="openai:gpt-4.1-mini")
782 | completion_result = await provider.generate_completion(
783 | prompt=request.prompt,
784 | model=request.model
785 | )
786 |
787 | fallback_evaluation["evaluation"] = completion_result.text
788 |
789 | # Track fallback evaluation cost
790 | if completion_result.cost > 0:
791 | try:
792 | trackable_fallback = TrackableResult(
793 | cost=completion_result.cost,
794 | input_tokens=0,
795 | output_tokens=0,
796 | provider="openai",
797 | model="gpt-4.1-mini",
798 | processing_time=0 # Eval time not tracked
799 | )
800 | tracker.add_call(trackable_fallback)
801 | except Exception as track_err:
802 | logger.warning(f"Could not track fallback evaluation cost: {track_err}", exc_info=False)
803 |
804 | logger.info(f"Fallback evaluation cost: ${completion_result.cost:.6f}", emoji_key="cost")
805 |
806 | console.print(Panel(
807 | escape(fallback_evaluation["evaluation"]),
808 | title="[bold]Fallback Evaluation (by gpt-4.1-mini)[/bold]",
809 | border_style="yellow",
810 | expand=False
811 | ))
812 |
813 | # Save fallback evaluation to file
814 | if storage_path:
815 | try:
816 | fallback_eval_file = os.path.join(storage_path, "fallback_evaluation.md")
817 | with open(fallback_eval_file, "w", encoding="utf-8") as f:
818 | f.write("# Fallback Essay Evaluation by gpt-4.1-mini\n\n")
819 | f.write(fallback_evaluation["evaluation"])
820 |
821 | logger.info(f"Fallback evaluation saved to {fallback_eval_file}", emoji_key="save")
822 | except Exception as e:
823 | logger.warning(f"Could not save fallback evaluation: {str(e)}", emoji_key="warning")
824 | else:
825 | console.print("[red]Fallback model unavailable[/red]")
826 | except Exception as fallback_error:
827 | console.print(f"[red]Fallback evaluation failed: {str(fallback_error)}[/red]")
828 |
829 | # Find and highlight comparison file for final round
830 | comparison_file = final_round.get('comparison_file_path')
831 | if comparison_file:
832 | console.print(Panel(
833 | f"Check the final comparison file for the full essay text and detailed round comparisons:\n[bold yellow]{escape(comparison_file)}[/bold yellow]",
834 | title="[bold]Final Comparison File[/bold]",
835 | border_style="yellow",
836 | expand=False
837 | ))
838 | else:
839 | logger.warning("Could not find path to final comparison file in results", emoji_key="warning")
840 |
841 | # Display cost summary
842 | costs = await calculate_tournament_costs(rounds_results, evaluation_cost)
843 | model_costs = costs.get('model_costs', {})
844 | total_cost = costs.get('total_cost', 0.0)
845 |
846 | console.print(Rule("[bold blue]Tournament Cost Summary[/bold blue]"))
847 |
848 | cost_table = Table(box=box.MINIMAL, show_header=True, expand=False)
849 | cost_table.add_column("Model", style="magenta")
850 | cost_table.add_column("Total Cost", style="green", justify="right")
851 |
852 | # Add model costs to table
853 | for model_id, cost in sorted(model_costs.items()):
854 | if model_id == 'evaluation':
855 | display_model = "Evaluation"
856 | else:
857 | display_model = model_id.split(':')[-1] if ':' in model_id else model_id
858 |
859 | cost_table.add_row(
860 | display_model,
861 | f"${cost:.6f}"
862 | )
863 |
864 | # Add grand total
865 | cost_table.add_row(
866 | "[bold]GRAND TOTAL[/bold]",
867 | f"[bold]${total_cost:.6f}[/bold]"
868 | )
869 |
870 | console.print(cost_table)
871 |
872 | # Save cost summary to file
873 | if storage_path:
874 | try:
875 | cost_file = os.path.join(storage_path, "cost_summary.md")
876 | with open(cost_file, "w", encoding="utf-8") as f:
877 | f.write("# Tournament Cost Summary\n\n")
878 | f.write("## Per-Model Costs\n\n")
879 |
880 | for model_id, cost in sorted(model_costs.items()):
881 | if model_id == 'evaluation':
882 | display_model = "Evaluation"
883 | else:
884 | display_model = model_id.split(':')[-1] if ':' in model_id else model_id
885 |
886 | f.write(f"- **{display_model}**: ${cost:.6f}\n")
887 |
888 | f.write("\n## Grand Total\n\n")
889 | f.write(f"**TOTAL COST**: ${total_cost:.6f}\n")
890 |
891 | logger.info(f"Cost summary saved to {cost_file}", emoji_key="save")
892 | except Exception as e:
893 | logger.warning(f"Could not save cost summary: {str(e)}", emoji_key="warning")
894 | else:
895 | logger.error(f"Could not fetch final results: {results_data.get('error', 'Unknown error')}", emoji_key="error")
896 | elif final_status:
897 | logger.warning(f"Tournament ended with status {final_status}. Check logs or status details for more info.", emoji_key="warning")
898 |
899 | except Exception as e:
900 | logger.error(f"Error in tournament demo: {str(e)}", emoji_key="error", exc_info=True)
901 | return 1
902 |
903 | # Display cost summary at the end
904 | tracker.display_summary(console)
905 |
906 | logger.success("Text Tournament Demo Finished", emoji_key="complete")
907 | console.print(Panel(
908 | "To view full essays and detailed comparisons, check the storage directory indicated in the results summary.",
909 | title="[bold]Next Steps[/bold]",
910 | border_style="dim green",
911 | expand=False
912 | ))
913 | return 0
914 |
915 |
916 | async def main():
917 | """Run the tournament demo."""
918 | tracker = CostTracker() # Instantiate tracker
919 | try:
920 | # Set up gateway
921 | await setup_gateway()
922 |
923 | # Run the demo
924 | return await run_tournament_demo(tracker) # Pass tracker
925 | except Exception as e:
926 | logger.critical(f"Demo failed: {str(e)}", emoji_key="critical", exc_info=True)
927 | return 1
928 | finally:
929 | # Clean up
930 | if gateway:
931 | pass # No cleanup needed for Gateway instance
932 |
933 |
934 | if __name__ == "__main__":
935 | # Run the demo
936 | exit_code = asyncio.run(main())
937 | sys.exit(exit_code)
```
--------------------------------------------------------------------------------
/ultimate_mcp_server/tools/meta_api_tool.py:
--------------------------------------------------------------------------------
```python
1 | """API Meta-Tool for dynamically exposing FastAPI endpoints via MCP.
2 |
3 | This module provides a tool for automatically discovering and integrating
4 | FastAPI-compatible REST APIs into the MCP server by pointing it at the
5 | FastAPI server's OpenAPI specification (e.g., /openapi.json).
6 |
7 | Usage Examples:
8 |
9 | 1. Register an API:
10 | ```python
11 | result = await client.tools.register_api(
12 | api_name="petstore",
13 | openapi_url="https://petstore.swagger.io/v2/swagger.json"
14 | )
15 | print(f"Registered {result['tools_count']} tools for the Petstore API")
16 | ```
17 |
18 | 2. List all registered APIs:
19 | ```python
20 | apis = await client.tools.list_registered_apis()
21 | for api_name, api_info in apis["apis"].items():
22 | print(f"{api_name}: {api_info['tools_count']} tools")
23 | ```
24 |
25 | 3. Call a dynamically registered tool:
26 | ```python
27 | # Get a pet by ID
28 | pet = await client.tools.call_dynamic_tool(
29 | tool_name="petstore_getPetById",
30 | inputs={"petId": 123}
31 | )
32 | print(f"Pet name: {pet['name']}")
33 |
34 | # Add a new pet
35 | new_pet = await client.tools.call_dynamic_tool(
36 | tool_name="petstore_addPet",
37 | inputs={
38 | "body": {
39 | "id": 0,
40 | "name": "Fluffy",
41 | "status": "available"
42 | }
43 | }
44 | )
45 | print(f"Added pet with ID: {new_pet['id']}")
46 | ```
47 |
48 | 4. Unregister an API:
49 | ```python
50 | result = await client.tools.unregister_api(api_name="petstore")
51 | print(f"Unregistered {result['tools_count']} tools")
52 | ```
53 | """
54 |
55 | import asyncio
56 | import json
57 | import re
58 | import time
59 | from typing import Any, Dict, List, Optional
60 | from urllib.parse import urlparse
61 |
62 | import httpx
63 |
64 | from ultimate_mcp_server.exceptions import ToolError, ToolInputError
65 | from ultimate_mcp_server.services.cache import with_cache
66 | from ultimate_mcp_server.tools.base import (
67 | with_error_handling,
68 | with_state_management,
69 | with_tool_metrics,
70 | )
71 | from ultimate_mcp_server.utils import get_logger
72 |
73 | logger = get_logger("ultimate_mcp_server.tools.meta_api")
74 |
75 |
76 | async def fetch_openapi_spec(
77 | url: str, timeout: float = 30.0, headers: Optional[Dict[str, str]] = None
78 | ) -> Dict[str, Any]:
79 | """Fetches the OpenAPI spec from the given URL.
80 |
81 | Args:
82 | url: URL of the OpenAPI spec (typically ending in /openapi.json)
83 | timeout: Timeout for the HTTP request in seconds
84 | headers: Optional headers to include in the request (e.g., for authentication)
85 |
86 | Returns:
87 | Parsed OpenAPI spec as a dictionary
88 |
89 | Raises:
90 | ToolError: If the fetch or parsing fails
91 | """
92 | try:
93 | async with httpx.AsyncClient() as client:
94 | response = await client.get(url, timeout=timeout, headers=headers)
95 | response.raise_for_status()
96 | return response.json()
97 | except httpx.HTTPStatusError as e:
98 | raise ToolError(
99 | f"Failed to fetch OpenAPI spec: HTTP {e.response.status_code}",
100 | details={"url": url, "status_code": e.response.status_code},
101 | ) from e
102 | except httpx.RequestError as e:
103 | raise ToolError(
104 | f"Failed to fetch OpenAPI spec: {str(e)}", details={"url": url, "error": str(e)}
105 | ) from e
106 | except json.JSONDecodeError as e:
107 | raise ToolError(
108 | f"Failed to parse OpenAPI spec as JSON: {str(e)}", details={"url": url, "error": str(e)}
109 | ) from e
110 |
111 |
112 | def extract_endpoint_info(openapi_spec: Dict[str, Any]) -> List[Dict[str, Any]]:
113 | """Extracts endpoint information from an OpenAPI spec.
114 |
115 | Args:
116 | openapi_spec: Parsed OpenAPI spec as a dictionary
117 |
118 | Returns:
119 | List of dictionaries containing endpoint information, each with keys:
120 | - path: The endpoint path
121 | - method: The HTTP method (GET, POST, etc.)
122 | - operation_id: The operationId from the spec (used as tool name)
123 | - parameters: List of parameter objects
124 | - request_body: Request body schema (if applicable)
125 | - responses: Response schemas
126 | - summary: Endpoint summary
127 | - description: Endpoint description
128 | """
129 | endpoints = []
130 |
131 | paths = openapi_spec.get("paths", {})
132 | for path, path_item in paths.items():
133 | for method, operation in path_item.items():
134 | if method.lower() not in ["get", "post", "put", "delete", "patch"]:
135 | continue # Skip non-HTTP methods like "parameters"
136 |
137 | # Extract operation ID (fall back to generating one if not provided)
138 | operation_id = operation.get("operationId")
139 | if not operation_id:
140 | # Generate operation ID from path and method
141 | path_parts = [p for p in path.split("/") if p and not p.startswith("{")]
142 | if path_parts:
143 | operation_id = f"{method.lower()}_{path_parts[-1]}"
144 | else:
145 | operation_id = f"{method.lower()}_root"
146 |
147 | # Ensure operation_id is a valid Python identifier
148 | operation_id = re.sub(r"[^a-zA-Z0-9_]", "_", operation_id)
149 | if operation_id[0].isdigit():
150 | operation_id = f"op_{operation_id}"
151 |
152 | # Extract parameters
153 | parameters = []
154 | # Include parameters from the path item
155 | if "parameters" in path_item:
156 | parameters.extend(path_item["parameters"])
157 | # Include parameters from the operation (overriding path item parameters if same name)
158 | if "parameters" in operation:
159 | # Remove any path item parameters with the same name
160 | path_param_names = {
161 | p["name"] for p in path_item.get("parameters", []) if "name" in p
162 | }
163 | op_params = []
164 | for p in operation["parameters"]:
165 | if p.get("name") in path_param_names:
166 | # This parameter overrides a path item parameter
167 | parameters = [
168 | param for param in parameters if param.get("name") != p.get("name")
169 | ]
170 | op_params.append(p)
171 | parameters.extend(op_params)
172 |
173 | # Extract request body schema
174 | request_body = None
175 | if "requestBody" in operation:
176 | request_body = operation["requestBody"]
177 |
178 | # Extract response schemas
179 | responses = operation.get("responses", {})
180 |
181 | endpoints.append(
182 | {
183 | "path": path,
184 | "method": method.lower(),
185 | "operation_id": operation_id,
186 | "parameters": parameters,
187 | "request_body": request_body,
188 | "responses": responses,
189 | "summary": operation.get("summary", ""),
190 | "description": operation.get("description", ""),
191 | "tags": operation.get("tags", []),
192 | }
193 | )
194 |
195 | return endpoints
196 |
197 |
198 | def generate_tool_function_code(
199 | endpoint_info: Dict[str, Any],
200 | base_url: str,
201 | api_name: str,
202 | cache_ttl: Optional[int] = None,
203 | auth_header: Optional[str] = None,
204 | ) -> str:
205 | """Generates Python code for a tool function based on endpoint info.
206 |
207 | Args:
208 | endpoint_info: Dictionary containing endpoint information
209 | base_url: Base URL of the API
210 | api_name: Name of the API (used for function documentation)
211 | cache_ttl: Optional TTL for caching tool results in seconds
212 | auth_header: Optional authentication header name to include in requests
213 |
214 | Returns:
215 | String containing Python code for the tool function
216 | """
217 | operation_id = endpoint_info["operation_id"]
218 | path = endpoint_info["path"]
219 | method = endpoint_info["method"]
220 | summary = endpoint_info["summary"]
221 | description = endpoint_info["description"]
222 | tags = ", ".join(endpoint_info.get("tags", []))
223 |
224 | # Generate a clean function name (no API prefix, will be added during registration)
225 | function_name = operation_id
226 |
227 | # Generate docstring
228 | docstring = (
229 | f'"""{summary}\n\n'
230 | if summary
231 | else f'"""Calls the {method.upper()} {path} endpoint of the {api_name} API.\n\n'
232 | )
233 | if description:
234 | docstring += f"{description}\n\n"
235 | if tags:
236 | docstring += f"Tags: {tags}\n\n"
237 |
238 | docstring += "Args:\n"
239 |
240 | # Generate function parameters
241 | params = []
242 | path_params = []
243 | query_params = []
244 | header_params = []
245 | body_param = None
246 |
247 | for param in endpoint_info.get("parameters", []):
248 | param_name = param["name"]
249 | # Clean the parameter name to be a valid Python identifier
250 | clean_param_name = re.sub(r"[^a-zA-Z0-9_]", "_", param_name)
251 | if clean_param_name[0].isdigit():
252 | clean_param_name = f"p_{clean_param_name}"
253 |
254 | param_type = param.get("schema", {}).get("type", "string")
255 | required = param.get("required", False)
256 | param_in = param.get("in", "query")
257 | param_description = param.get("description", "")
258 |
259 | python_type = "str"
260 | if param_type == "integer":
261 | python_type = "int"
262 | elif param_type == "number":
263 | python_type = "float"
264 | elif param_type == "boolean":
265 | python_type = "bool"
266 | elif param_type == "array":
267 | python_type = "List[Any]"
268 | elif param_type == "object":
269 | python_type = "Dict[str, Any]"
270 |
271 | if required:
272 | params.append(f"{clean_param_name}: {python_type}")
273 | docstring += f" {clean_param_name}: {param_description} (in: {param_in})\n"
274 | else:
275 | params.append(f"{clean_param_name}: Optional[{python_type}] = None")
276 | docstring += (
277 | f" {clean_param_name}: (Optional) {param_description} (in: {param_in})\n"
278 | )
279 |
280 | # Store parameter location for request building
281 | if param_in == "path":
282 | path_params.append((param_name, clean_param_name))
283 | elif param_in == "query":
284 | query_params.append((param_name, clean_param_name))
285 | elif (
286 | param_in == "header" and param_name.lower() != auth_header.lower()
287 | if auth_header
288 | else True
289 | ):
290 | header_params.append((param_name, clean_param_name))
291 |
292 | # Handle request body
293 | if endpoint_info.get("request_body"):
294 | content = endpoint_info["request_body"].get("content", {})
295 | if "application/json" in content:
296 | body_param = "body"
297 | schema_desc = "Request body"
298 | # Try to get schema description from the content schema
299 | schema = content.get("application/json", {}).get("schema", {})
300 | if "description" in schema:
301 | schema_desc = schema["description"]
302 | params.append("body: Dict[str, Any]")
303 | docstring += f" body: {schema_desc}\n"
304 |
305 | # Add timeout and auth_token params if needed
306 | params.append("timeout: float = 30.0")
307 | docstring += " timeout: Timeout for the HTTP request in seconds\n"
308 |
309 | if auth_header:
310 | params.append("auth_token: Optional[str] = None")
311 | docstring += f" auth_token: Optional authentication token to include in the '{auth_header}' header\n"
312 |
313 | docstring += '\n Returns:\n API response data as a dictionary\n """'
314 |
315 | # Generate function body
316 | function_body = []
317 | function_body.append(" async with httpx.AsyncClient() as client:")
318 |
319 | # Format URL with path params
320 | if path_params:
321 | # For path params, replace {param} with {clean_param_name}
322 | url_format = path
323 | for param_name, clean_name in path_params:
324 | url_format = url_format.replace(f"{{{param_name}}}", f"{{{clean_name}}}")
325 | function_body.append(f' url = f"{base_url}{url_format}"')
326 | else:
327 | function_body.append(f' url = "{base_url}{path}"')
328 |
329 | # Prepare query params
330 | if query_params:
331 | function_body.append(" params = {}")
332 | for param_name, clean_name in query_params:
333 | function_body.append(f" if {clean_name} is not None:")
334 | function_body.append(f' params["{param_name}"] = {clean_name}')
335 | else:
336 | function_body.append(" params = None")
337 |
338 | # Prepare headers
339 | function_body.append(" headers = {}")
340 | if auth_header:
341 | function_body.append(" if auth_token is not None:")
342 | function_body.append(f' headers["{auth_header}"] = auth_token')
343 |
344 | if header_params:
345 | for param_name, clean_name in header_params:
346 | function_body.append(f" if {clean_name} is not None:")
347 | function_body.append(f' headers["{param_name}"] = {clean_name}')
348 |
349 | # Prepare request
350 | request_args = ["url"]
351 | if query_params:
352 | request_args.append("params=params")
353 | if header_params or auth_header:
354 | request_args.append("headers=headers")
355 | if body_param:
356 | request_args.append(f"json={body_param}")
357 | request_args.append("timeout=timeout")
358 |
359 | function_body.append(" try:")
360 | function_body.append(" response = await client.{method}({', '.join(request_args)})")
361 | function_body.append(" response.raise_for_status()")
362 | function_body.append(
363 | " if response.headers.get('content-type', '').startswith('application/json'):"
364 | )
365 | function_body.append(" return response.json()")
366 | function_body.append(" else:")
367 | function_body.append(" return {{'text': response.text}}")
368 | function_body.append(" except httpx.HTTPStatusError as e:")
369 | function_body.append(" error_detail = e.response.text")
370 | function_body.append(" try:")
371 | function_body.append(" error_json = e.response.json()")
372 | function_body.append(" if isinstance(error_json, dict):")
373 | function_body.append(" error_detail = error_json")
374 | function_body.append(" except Exception:")
375 | function_body.append(" pass # Couldn't parse JSON error")
376 | function_body.append(" raise ToolError(")
377 | function_body.append(' f"API request failed: HTTP {{e.response.status_code}}",')
378 | function_body.append(
379 | ' details={{"status_code": e.response.status_code, "response": error_detail}}'
380 | )
381 | function_body.append(" )")
382 | function_body.append(" except httpx.RequestError as e:")
383 | function_body.append(" raise ToolError(")
384 | function_body.append(' f"API request failed: {{str(e)}}",')
385 | function_body.append(' details={{"error": str(e)}}')
386 | function_body.append(" )")
387 |
388 | # Generate the full function
389 | param_str = ", ".join(params)
390 | if param_str:
391 | param_str = f", {param_str}"
392 |
393 | # Add decorators based on configuration
394 | decorators = ["@with_tool_metrics", "@with_error_handling"]
395 |
396 | if cache_ttl is not None:
397 | decorators.insert(0, f"@with_cache(ttl={cache_ttl})")
398 |
399 | function_code = [
400 | *decorators,
401 | f"async def {function_name}(self{param_str}):",
402 | f"{docstring}",
403 | *function_body,
404 | ]
405 |
406 | return "\n".join(function_code)
407 |
408 |
409 | # After the generate_tool_function_code function and before register_api_meta_tools
410 | @with_tool_metrics
411 | @with_error_handling
412 | @with_state_management(namespace="meta_api")
413 | async def register_api(
414 | api_name: str,
415 | openapi_url: str,
416 | base_url: Optional[str] = None,
417 | cache_ttl: Optional[int] = None,
418 | auth_header: Optional[str] = None,
419 | auth_token: Optional[str] = None,
420 | tool_name_prefix: Optional[str] = None,
421 | timeout: float = 30.0,
422 | ctx: Optional[Dict[str, Any]] = None,
423 | get_state=None,
424 | set_state=None,
425 | delete_state=None
426 | ) -> Dict[str, Any]:
427 | """Registers an API with the MCP server by fetching its OpenAPI spec.
428 |
429 | Dynamically generates MCP tools for each endpoint in the API and registers
430 | them with the MCP server. The tools are prefixed with the API name by default,
431 | resulting in tool names like "api_name_operation_id".
432 |
433 | Args:
434 | api_name: A unique name for the API (used as a prefix for tool names)
435 | openapi_url: URL of the OpenAPI spec (typically ending in /openapi.json)
436 | base_url: Base URL of the API (if different from the OpenAPI URL)
437 | cache_ttl: Optional TTL for caching tool results in seconds
438 | auth_header: Optional name of the header to use for authentication
439 | auth_token: Optional token to use when fetching the OpenAPI spec
440 | tool_name_prefix: Optional prefix for tool names (default: api_name)
441 | timeout: Timeout for the HTTP request in seconds
442 | ctx: MCP context
443 | get_state: Function to get state from store (injected by decorator)
444 | set_state: Function to set state in store (injected by decorator)
445 | delete_state: Function to delete state from store (injected by decorator)
446 |
447 | Returns:
448 | A dictionary containing the registration results:
449 | {
450 | "success": true,
451 | "api_name": "example_api",
452 | "base_url": "https://api.example.com",
453 | "tools_registered": ["example_api_get_users", "example_api_create_user", ...],
454 | "tools_count": 5,
455 | "processing_time": 1.23
456 | }
457 | """
458 | # Validate inputs
459 | if not api_name:
460 | raise ToolInputError("api_name cannot be empty")
461 |
462 | # Check if API name has invalid characters
463 | if not re.match(r"^[a-zA-Z0-9_]+$", api_name):
464 | raise ToolInputError(
465 | "api_name must contain only alphanumeric characters and underscores"
466 | )
467 |
468 | if not openapi_url:
469 | raise ToolInputError("openapi_url cannot be empty")
470 |
471 | # Get registered APIs from state store
472 | registered_apis = await get_state("registered_apis", {})
473 | generated_tools = await get_state("generated_tools", {})
474 |
475 | # Check if API is already registered
476 | if api_name in registered_apis:
477 | raise ToolInputError(
478 | f"API {api_name} is already registered. Use a different name or unregister it first."
479 | )
480 |
481 | # Set tool name prefix
482 | tool_name_prefix = tool_name_prefix or api_name
483 |
484 | # Determine base URL if not provided
485 | if not base_url:
486 | # Extract base URL from OpenAPI URL
487 | try:
488 | parsed_url = urlparse(openapi_url)
489 | base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
490 | logger.info(f"Using base_url: {base_url} (derived from openapi_url)")
491 | except Exception as e:
492 | raise ToolInputError(f"Could not determine base_url from openapi_url: {str(e)}") from e
493 |
494 | # Prepare headers for fetching OpenAPI spec
495 | headers = None
496 | if auth_token and auth_header:
497 | headers = {auth_header: auth_token}
498 |
499 | # Fetch OpenAPI spec
500 | logger.info(f"Fetching OpenAPI spec from {openapi_url}")
501 | start_time = time.time()
502 | openapi_spec = await fetch_openapi_spec(openapi_url, timeout, headers)
503 |
504 | # Extract endpoint information
505 | endpoints = extract_endpoint_info(openapi_spec)
506 | logger.info(f"Extracted {len(endpoints)} endpoints from OpenAPI spec")
507 |
508 | # Get MCP server from context
509 | mcp = ctx.get('mcp')
510 | if not mcp:
511 | raise ToolError("MCP server context not available")
512 |
513 | # Generate and register tools for each endpoint
514 | registered_tools = []
515 | generated_code = {}
516 |
517 | for endpoint in endpoints:
518 | operation_id = endpoint["operation_id"]
519 | tool_name = f"{tool_name_prefix}_{operation_id}"
520 |
521 | # Skip if this tool is already registered
522 | if tool_name in generated_tools:
523 | logger.warning(f"Tool {tool_name} already registered, skipping")
524 | continue
525 |
526 | # Generate tool function code
527 | tool_code = generate_tool_function_code(
528 | endpoint, base_url, api_name, cache_ttl, auth_header
529 | )
530 |
531 | # Store the generated code for debugging
532 | generated_code[tool_name] = tool_code
533 |
534 | # Create and register the tool function
535 | try:
536 | # Create a namespace for the exec
537 | namespace = {}
538 | # Add required imports to the namespace
539 | namespace.update(
540 | {
541 | "httpx": httpx,
542 | "ToolError": ToolError,
543 | "Dict": Dict,
544 | "Any": Any,
545 | "Optional": Optional,
546 | "List": List,
547 | "with_tool_metrics": with_tool_metrics,
548 | "with_error_handling": with_error_handling,
549 | "with_cache": with_cache,
550 | }
551 | )
552 |
553 | # Execute the generated code
554 | exec(tool_code, namespace)
555 |
556 | # Get the generated function from the namespace
557 | generated_func = namespace[operation_id]
558 |
559 | # Register with MCP server
560 | mcp.tool(name=tool_name)(generated_func)
561 |
562 | # Store the generated tool in state
563 | generated_tools[tool_name] = tool_code
564 | registered_tools.append(tool_name)
565 |
566 | logger.info(
567 | f"Registered tool {tool_name} for endpoint {endpoint['method'].upper()} {endpoint['path']}"
568 | )
569 | except Exception as e:
570 | logger.error(f"Failed to register tool {tool_name}: {str(e)}", exc_info=True)
571 | if "tool_code" in locals():
572 | logger.error(f"Generated code that failed:\n{tool_code}")
573 |
574 | # Store API information in state store
575 | registered_apis[api_name] = {
576 | "base_url": base_url,
577 | "openapi_url": openapi_url,
578 | "spec": openapi_spec,
579 | "tools": registered_tools,
580 | "tool_name_prefix": tool_name_prefix,
581 | "generated_code": generated_code,
582 | "auth_header": auth_header,
583 | }
584 |
585 | # Update state store
586 | await set_state("registered_apis", registered_apis)
587 | await set_state("generated_tools", generated_tools)
588 |
589 | processing_time = time.time() - start_time
590 | logger.success(
591 | f"API {api_name} registered with {len(registered_tools)} tools in {processing_time:.2f}s"
592 | )
593 |
594 | return {
595 | "success": True,
596 | "api_name": api_name,
597 | "base_url": base_url,
598 | "tools_registered": registered_tools,
599 | "tools_count": len(registered_tools),
600 | "processing_time": processing_time,
601 | }
602 |
603 | @with_tool_metrics
604 | @with_error_handling
605 | @with_state_management(namespace="meta_api")
606 | async def list_registered_apis(
607 | ctx: Optional[Dict[str, Any]] = None,
608 | get_state=None,
609 | set_state=None,
610 | delete_state=None
611 | ) -> Dict[str, Any]:
612 | """Lists all registered APIs and their endpoints.
613 |
614 | Args:
615 | ctx: MCP context
616 | get_state: Function to get state from store (injected by decorator)
617 | set_state: Function to set state in store (injected by decorator)
618 | delete_state: Function to delete state from store (injected by decorator)
619 |
620 | Returns:
621 | A dictionary containing the registered APIs:
622 | {
623 | "success": true,
624 | "apis": {
625 | "example_api": {
626 | "base_url": "https://api.example.com",
627 | "openapi_url": "https://api.example.com/openapi.json",
628 | "tools_count": 5,
629 | "tools": ["example_api_get_users", "example_api_create_user", ...]
630 | },
631 | ...
632 | },
633 | "total_apis": 2,
634 | "total_tools": 12
635 | }
636 | """
637 | # Get state data
638 | registered_apis = await get_state("registered_apis", {})
639 | generated_tools = await get_state("generated_tools", {})
640 |
641 | result = {
642 | "success": True,
643 | "apis": {},
644 | "total_apis": len(registered_apis),
645 | "total_tools": len(generated_tools),
646 | }
647 |
648 | for api_name, api_info in registered_apis.items():
649 | result["apis"][api_name] = {
650 | "base_url": api_info["base_url"],
651 | "openapi_url": api_info["openapi_url"],
652 | "tools_count": len(api_info["tools"]),
653 | "tools": api_info["tools"],
654 | "tool_name_prefix": api_info["tool_name_prefix"],
655 | }
656 |
657 | return result
658 |
659 | @with_tool_metrics
660 | @with_error_handling
661 | @with_state_management(namespace="meta_api")
662 | async def get_api_details(
663 | api_name: str,
664 | ctx: Optional[Dict[str, Any]] = None,
665 | get_state=None,
666 | set_state=None,
667 | delete_state=None
668 | ) -> Dict[str, Any]:
669 | """Gets detailed information about a registered API.
670 |
671 | Args:
672 | api_name: The name of the API to get details for
673 | ctx: MCP context
674 | get_state: Function to get state from store (injected by decorator)
675 | set_state: Function to set state in store (injected by decorator)
676 | delete_state: Function to delete state from store (injected by decorator)
677 |
678 | Returns:
679 | A dictionary containing the API details:
680 | {
681 | "success": true,
682 | "api_name": "example_api",
683 | "base_url": "https://api.example.com",
684 | "openapi_url": "https://api.example.com/openapi.json",
685 | "tools": [
686 | {
687 | "name": "example_api_get_users",
688 | "method": "get",
689 | "path": "/users",
690 | "summary": "Get all users",
691 | "description": "Returns a list of all users in the system",
692 | "parameters": [...]
693 | },
694 | ...
695 | ],
696 | "endpoints_count": 5
697 | }
698 | """
699 | # Get registered APIs from state
700 | registered_apis = await get_state("registered_apis", {})
701 |
702 | if api_name not in registered_apis:
703 | raise ToolInputError(f"API {api_name} not found")
704 |
705 | api_info = registered_apis[api_name]
706 |
707 | # Extract endpoint details from the OpenAPI spec
708 | endpoints = []
709 | spec = api_info["spec"]
710 |
711 | for endpoint_info in extract_endpoint_info(spec):
712 | tool_name = f"{api_info['tool_name_prefix']}_{endpoint_info['operation_id']}"
713 | endpoints.append(
714 | {
715 | "name": tool_name,
716 | "method": endpoint_info["method"],
717 | "path": endpoint_info["path"],
718 | "summary": endpoint_info["summary"],
719 | "description": endpoint_info["description"],
720 | "parameters": endpoint_info["parameters"],
721 | "tags": endpoint_info.get("tags", []),
722 | }
723 | )
724 |
725 | return {
726 | "success": True,
727 | "api_name": api_name,
728 | "base_url": api_info["base_url"],
729 | "openapi_url": api_info["openapi_url"],
730 | "tools": endpoints,
731 | "endpoints_count": len(endpoints),
732 | }
733 |
734 | @with_tool_metrics
735 | @with_error_handling
736 | @with_state_management(namespace="meta_api")
737 | async def unregister_api(
738 | api_name: str,
739 | ctx: Optional[Dict[str, Any]] = None,
740 | get_state=None,
741 | set_state=None,
742 | delete_state=None
743 | ) -> Dict[str, Any]:
744 | """Unregisters an API and all its tools from the MCP server.
745 |
746 | Args:
747 | api_name: The name of the API to unregister
748 | ctx: MCP context
749 | get_state: Function to get state from store (injected by decorator)
750 | set_state: Function to set state in store (injected by decorator)
751 | delete_state: Function to delete state from store (injected by decorator)
752 |
753 | Returns:
754 | A dictionary indicating the result:
755 | {
756 | "success": true,
757 | "api_name": "example_api",
758 | "tools_unregistered": ["example_api_get_users", "example_api_create_user", ...],
759 | "tools_count": 5
760 | }
761 | """
762 | # Get state data
763 | registered_apis = await get_state("registered_apis", {})
764 | generated_tools = await get_state("generated_tools", {})
765 |
766 | if api_name not in registered_apis:
767 | raise ToolInputError(f"API {api_name} not found")
768 |
769 | api_info = registered_apis[api_name]
770 | tools = api_info["tools"]
771 |
772 | # Get MCP server from context
773 | mcp = ctx.get('mcp')
774 | if not mcp:
775 | raise ToolError("MCP server context not available")
776 |
777 | # Unregister tools from MCP server
778 | for tool_name in tools:
779 | try:
780 | # Check if the MCP server has a method for unregistering tools
781 | if hasattr(mcp, "unregister_tool"):
782 | mcp.unregister_tool(tool_name)
783 | # If not, try to remove from the tools dictionary
784 | elif hasattr(mcp, "tools"):
785 | if tool_name in mcp.tools:
786 | del mcp.tools[tool_name]
787 |
788 | # Remove from our generated tools dictionary
789 | if tool_name in generated_tools:
790 | del generated_tools[tool_name]
791 |
792 | logger.info(f"Unregistered tool {tool_name}")
793 | except Exception as e:
794 | logger.error(f"Failed to unregister tool {tool_name}: {str(e)}", exc_info=True)
795 |
796 | # Remove API from registered APIs
797 | del registered_apis[api_name]
798 |
799 | # Update state
800 | await set_state("registered_apis", registered_apis)
801 | await set_state("generated_tools", generated_tools)
802 |
803 | logger.success(f"API {api_name} unregistered with {len(tools)} tools")
804 |
805 | return {
806 | "success": True,
807 | "api_name": api_name,
808 | "tools_unregistered": tools,
809 | "tools_count": len(tools),
810 | }
811 |
812 | @with_tool_metrics
813 | @with_error_handling
814 | @with_state_management(namespace="meta_api")
815 | async def call_dynamic_tool(
816 | tool_name: str,
817 | inputs: Optional[Dict[str, Any]] = None,
818 | ctx: Optional[Dict[str, Any]] = None,
819 | get_state=None,
820 | set_state=None,
821 | delete_state=None
822 | ) -> Dict[str, Any]:
823 | """Calls a dynamically registered tool by name.
824 |
825 | This is a convenience function for calling tools registered via register_api,
826 | allowing direct invocation of API endpoints.
827 |
828 | Args:
829 | tool_name: Name of the tool to call
830 | inputs: Inputs to pass to the tool (parameters for the API endpoint)
831 | ctx: MCP context
832 | get_state: Function to get state from store (injected by decorator)
833 | set_state: Function to set state in store (injected by decorator)
834 | delete_state: Function to delete state from store (injected by decorator)
835 |
836 | Returns:
837 | The result of the tool call
838 | """
839 | # Get MCP server from context
840 | mcp = ctx.get('mcp')
841 | if not mcp:
842 | raise ToolError("MCP server context not available")
843 |
844 | # Get registered APIs and generated tools from state
845 | registered_apis = await get_state("registered_apis", {})
846 | generated_tools = await get_state("generated_tools", {})
847 |
848 | if not tool_name:
849 | raise ToolInputError("tool_name cannot be empty")
850 |
851 | # Check if tool exists
852 | if tool_name not in generated_tools:
853 | valid_tools = list(generated_tools.keys())
854 | raise ToolInputError(
855 | f"Tool {tool_name} not found. Valid tools: {', '.join(valid_tools[:10])}..."
856 | if len(valid_tools) > 10
857 | else f"Tool {tool_name} not found. Valid tools: {', '.join(valid_tools)}"
858 | )
859 |
860 | # Initialize inputs
861 | if inputs is None:
862 | inputs = {}
863 |
864 | # Find which API this tool belongs to
865 | api_name = None
866 | for name, info in registered_apis.items():
867 | if tool_name in info["tools"]:
868 | api_name = name
869 | break
870 |
871 | if not api_name:
872 | logger.warning(f"Could not determine which API {tool_name} belongs to")
873 |
874 | # Add auth_token to inputs if specified and the API has an auth_header
875 | api_info = registered_apis.get(api_name, {})
876 | if api_info.get("auth_header") and "auth_token" in ctx:
877 | inputs["auth_token"] = ctx["auth_token"]
878 |
879 | # Call the tool directly through MCP
880 | logger.info(f"Calling dynamic tool {tool_name} with inputs: {inputs}")
881 | start_time = time.time()
882 |
883 | # MCP execute may be different from mcp.call_tool, handle appropriately
884 | if hasattr(mcp, "execute"):
885 | result = await mcp.execute(tool_name, inputs)
886 | else:
887 | result = await mcp.call_tool(tool_name, inputs)
888 |
889 | processing_time = time.time() - start_time
890 |
891 | # Add metadata to result
892 | if isinstance(result, dict):
893 | result["processing_time"] = processing_time
894 | result["success"] = True
895 | else:
896 | result = {"data": result, "processing_time": processing_time, "success": True}
897 |
898 | logger.info(f"Called dynamic tool {tool_name} in {processing_time:.4f}s")
899 | return result
900 |
901 | @with_tool_metrics
902 | @with_error_handling
903 | @with_state_management(namespace="meta_api")
904 | async def refresh_api(
905 | api_name: str,
906 | update_base_url: Optional[str] = None,
907 | timeout: float = 30.0,
908 | ctx: Optional[Dict[str, Any]] = None,
909 | get_state=None,
910 | set_state=None,
911 | delete_state=None
912 | ) -> Dict[str, Any]:
913 | """Refreshes an API by re-fetching its OpenAPI spec and updating tools.
914 |
915 | This is useful when the API has been updated with new endpoints or
916 | modifications to existing endpoints.
917 |
918 | Args:
919 | api_name: The name of the API to refresh
920 | update_base_url: Optional new base URL for the API
921 | timeout: Timeout for the HTTP request in seconds
922 | ctx: MCP context
923 | get_state: Function to get state from store (injected by decorator)
924 | set_state: Function to set state in store (injected by decorator)
925 | delete_state: Function to delete state from store (injected by decorator)
926 |
927 | Returns:
928 | A dictionary indicating the result:
929 | {
930 | "success": true,
931 | "api_name": "example_api",
932 | "tools_added": ["example_api_new_endpoint", ...],
933 | "tools_updated": ["example_api_modified_endpoint", ...],
934 | "tools_removed": ["example_api_deleted_endpoint", ...],
935 | "tools_count": 8
936 | }
937 | """
938 | # Get registered APIs from state
939 | registered_apis = await get_state("registered_apis", {})
940 |
941 | if api_name not in registered_apis:
942 | raise ToolInputError(f"API {api_name} not found")
943 |
944 | api_info = registered_apis[api_name]
945 | old_tools = set(api_info["tools"])
946 |
947 | # Determine if we need to update the base URL
948 | base_url = update_base_url or api_info["base_url"]
949 |
950 | # First, unregister the API
951 | await unregister_api(api_name, ctx=ctx, get_state=get_state, set_state=set_state, delete_state=delete_state)
952 |
953 | # Re-register with the same parameters but potentially updated base URL
954 | result = await register_api(
955 | api_name=api_name,
956 | openapi_url=api_info["openapi_url"],
957 | base_url=base_url,
958 | auth_header=api_info.get("auth_header"),
959 | tool_name_prefix=api_info["tool_name_prefix"],
960 | timeout=timeout,
961 | ctx=ctx,
962 | get_state=get_state,
963 | set_state=set_state,
964 | delete_state=delete_state
965 | )
966 |
967 | # Determine which tools were added, updated, or removed
968 | new_tools = set(result["tools_registered"])
969 | tools_added = list(new_tools - old_tools)
970 | tools_removed = list(old_tools - new_tools)
971 | tools_updated = list(new_tools.intersection(old_tools))
972 |
973 | logger.success(
974 | f"API {api_name} refreshed: "
975 | f"{len(tools_added)} added, {len(tools_removed)} removed, {len(tools_updated)} updated"
976 | )
977 |
978 | return {
979 | "success": True,
980 | "api_name": api_name,
981 | "tools_added": tools_added,
982 | "tools_updated": tools_updated,
983 | "tools_removed": tools_removed,
984 | "tools_count": len(new_tools),
985 | }
986 |
987 | @with_tool_metrics
988 | @with_error_handling
989 | @with_state_management(namespace="meta_api")
990 | async def get_tool_details(
991 | tool_name: str,
992 | ctx: Optional[Dict[str, Any]] = None,
993 | get_state=None,
994 | set_state=None,
995 | delete_state=None
996 | ) -> Dict[str, Any]:
997 | """Gets detailed information about a dynamically registered tool.
998 |
999 | Args:
1000 | tool_name: Name of the tool to get details for
1001 | ctx: MCP context
1002 | get_state: Function to get state from store (injected by decorator)
1003 | set_state: Function to set state in store (injected by decorator)
1004 | delete_state: Function to delete state from store (injected by decorator)
1005 |
1006 | Returns:
1007 | A dictionary containing the tool details:
1008 | {
1009 | "success": true,
1010 | "tool_name": "example_api_get_users",
1011 | "api_name": "example_api",
1012 | "method": "get",
1013 | "path": "/users",
1014 | "summary": "Get all users",
1015 | "description": "Returns a list of all users in the system",
1016 | "parameters": [...],
1017 | "source_code": "..."
1018 | }
1019 | """
1020 | # Get registered APIs and generated tools from state
1021 | registered_apis = await get_state("registered_apis", {})
1022 | generated_tools = await get_state("generated_tools", {})
1023 |
1024 | if tool_name not in generated_tools:
1025 | raise ToolInputError(f"Tool {tool_name} not found")
1026 |
1027 | # Find which API this tool belongs to
1028 | api_name = None
1029 | for name, info in registered_apis.items():
1030 | if tool_name in info["tools"]:
1031 | api_name = name
1032 | break
1033 |
1034 | if not api_name:
1035 | raise ToolError(f"Could not determine which API {tool_name} belongs to")
1036 |
1037 | api_info = registered_apis[api_name]
1038 |
1039 | # Find endpoint information in the API's endpoint list
1040 | endpoint_info = None
1041 | for endpoint in extract_endpoint_info(api_info["spec"]):
1042 | if f"{api_info['tool_name_prefix']}_{endpoint['operation_id']}" == tool_name:
1043 | endpoint_info = endpoint
1044 | break
1045 |
1046 | if not endpoint_info:
1047 | raise ToolError(f"Could not find endpoint information for tool {tool_name}")
1048 |
1049 | # Get the source code
1050 | source_code = api_info.get("generated_code", {}).get(tool_name, "Source code not available")
1051 |
1052 | return {
1053 | "success": True,
1054 | "tool_name": tool_name,
1055 | "api_name": api_name,
1056 | "method": endpoint_info["method"],
1057 | "path": endpoint_info["path"],
1058 | "summary": endpoint_info["summary"],
1059 | "description": endpoint_info["description"],
1060 | "parameters": endpoint_info["parameters"],
1061 | "tags": endpoint_info.get("tags", []),
1062 | "source_code": source_code,
1063 | }
1064 |
1065 | @with_tool_metrics
1066 | @with_error_handling
1067 | @with_state_management(namespace="meta_api")
1068 | async def list_available_tools(
1069 | include_source_code: bool = False,
1070 | ctx: Optional[Dict[str, Any]] = None,
1071 | get_state=None,
1072 | set_state=None,
1073 | delete_state=None
1074 | ) -> Dict[str, Any]:
1075 | """Lists all available tools registered via the API Meta-Tool.
1076 |
1077 | Args:
1078 | include_source_code: Whether to include source code in the response
1079 | ctx: MCP context
1080 | get_state: Function to get state from store (injected by decorator)
1081 | set_state: Function to set state in store (injected by decorator)
1082 | delete_state: Function to delete state from store (injected by decorator)
1083 |
1084 | Returns:
1085 | A dictionary containing the available tools:
1086 | {
1087 | "success": true,
1088 | "tools": [
1089 | {
1090 | "name": "example_api_get_users",
1091 | "api_name": "example_api",
1092 | "method": "get",
1093 | "path": "/users",
1094 | "summary": "Get all users",
1095 | "source_code": "..." # Only if include_source_code=True
1096 | },
1097 | ...
1098 | ],
1099 | "tools_count": 12
1100 | }
1101 | """
1102 | # Get registered APIs from state
1103 | registered_apis = await get_state("registered_apis", {})
1104 | generated_tools = await get_state("generated_tools", {})
1105 |
1106 | tools = []
1107 |
1108 | for api_name, api_info in registered_apis.items():
1109 | spec = api_info["spec"]
1110 | endpoints = extract_endpoint_info(spec)
1111 |
1112 | for endpoint in endpoints:
1113 | tool_name = f"{api_info['tool_name_prefix']}_{endpoint['operation_id']}"
1114 | if tool_name in generated_tools:
1115 | tool_info = {
1116 | "name": tool_name,
1117 | "api_name": api_name,
1118 | "method": endpoint["method"],
1119 | "path": endpoint["path"],
1120 | "summary": endpoint["summary"],
1121 | }
1122 |
1123 | if include_source_code:
1124 | tool_info["source_code"] = api_info.get("generated_code", {}).get(
1125 | tool_name, "Source code not available"
1126 | )
1127 |
1128 | tools.append(tool_info)
1129 |
1130 | return {"success": True, "tools": tools, "tools_count": len(tools)}
1131 |
1132 | # Now we have all our stateless functions defined:
1133 | # register_api, list_registered_apis, get_api_details, unregister_api
1134 | # call_dynamic_tool, refresh_api, get_tool_details, list_available_tools
1135 |
1136 | def register_api_meta_tools(mcp_server):
1137 | """Registers API Meta-Tool with the MCP server.
1138 |
1139 | Args:
1140 | mcp_server: MCP server instance
1141 | """
1142 | # Register tools with MCP server
1143 | mcp_server.tool(name="register_api")(register_api)
1144 | mcp_server.tool(name="list_registered_apis")(list_registered_apis)
1145 | mcp_server.tool(name="get_api_details")(get_api_details)
1146 | mcp_server.tool(name="unregister_api")(unregister_api)
1147 | mcp_server.tool(name="call_dynamic_tool")(call_dynamic_tool)
1148 | mcp_server.tool(name="refresh_api")(refresh_api)
1149 | mcp_server.tool(name="get_tool_details")(get_tool_details)
1150 | mcp_server.tool(name="list_available_tools")(list_available_tools)
1151 |
1152 | logger.info("Registered API Meta-Tool functions")
1153 | return None # No need to return an instance anymore
1154 |
1155 |
1156 | # Example usage if this module is run directly
1157 | if __name__ == "__main__":
1158 | import argparse
1159 | import asyncio
1160 |
1161 | from ultimate_mcp_server import create_app
1162 |
1163 | async def main():
1164 | # Parse command line arguments
1165 | parser = argparse.ArgumentParser(description="API Meta-Tool for Ultimate MCP Server")
1166 | parser.add_argument("--register", help="Register an API with the given name")
1167 | parser.add_argument("--url", help="OpenAPI spec URL")
1168 | parser.add_argument("--list", action="store_true", help="List registered APIs")
1169 | parser.add_argument("--details", help="Get details for the given API")
1170 | parser.add_argument("--unregister", help="Unregister the given API")
1171 | parser.add_argument("--refresh", help="Refresh the given API")
1172 | parser.add_argument("--base-url", help="Base URL for API requests")
1173 | args = parser.parse_args()
1174 |
1175 | # Create MCP server
1176 | create_app()
1177 | # In FastMCP 2.0+, access the MCP server directly from the Gateway instance
1178 | # The create_app() should return the gateway instance or we need to get it differently
1179 | from ultimate_mcp_server.core import _gateway_instance
1180 | mcp_server = _gateway_instance.mcp if _gateway_instance else None
1181 | if not mcp_server:
1182 | raise RuntimeError("Gateway instance not initialized or MCP server not available")
1183 |
1184 | # Register API Meta-Tool
1185 | register_api_meta_tools(mcp_server)
1186 |
1187 | # Create context for stateless functions
1188 | ctx = {"mcp": mcp_server}
1189 |
1190 | # Process commands
1191 | if args.register and args.url:
1192 | result = await register_api(
1193 | api_name=args.register,
1194 | openapi_url=args.url,
1195 | base_url=args.base_url,
1196 | ctx=ctx
1197 | )
1198 | print(f"Registered API {args.register} with {result['tools_count']} tools")
1199 | print(f"Tools: {', '.join(result['tools_registered'])}")
1200 | elif args.list:
1201 | result = await list_registered_apis(ctx=ctx)
1202 | print(f"Registered APIs ({result['total_apis']}):")
1203 | for api_name, api_info in result["apis"].items():
1204 | print(
1205 | f"- {api_name}: {api_info['tools_count']} tools, Base URL: {api_info['base_url']}"
1206 | )
1207 | elif args.details:
1208 | result = await get_api_details(args.details, ctx=ctx)
1209 | print(f"API {args.details} ({result['endpoints_count']} endpoints):")
1210 | print(f"Base URL: {result['base_url']}")
1211 | print(f"OpenAPI URL: {result['openapi_url']}")
1212 | print("Endpoints:")
1213 | for endpoint in result["tools"]:
1214 | print(f"- {endpoint['method'].upper()} {endpoint['path']} ({endpoint['name']})")
1215 | if endpoint["summary"]:
1216 | print(f" Summary: {endpoint['summary']}")
1217 | elif args.unregister:
1218 | result = await unregister_api(args.unregister, ctx=ctx)
1219 | print(f"Unregistered API {args.unregister} with {result['tools_count']} tools")
1220 | elif args.refresh:
1221 | result = await refresh_api(
1222 | api_name=args.refresh,
1223 | update_base_url=args.base_url,
1224 | ctx=ctx
1225 | )
1226 | print(
1227 | f"Refreshed API {args.refresh}: {len(result['tools_added'])} added, {len(result['tools_removed'])} removed, {len(result['tools_updated'])} updated"
1228 | )
1229 | else:
1230 | print("No action specified. Use --help for usage information.")
1231 |
1232 | # Run the main function
1233 | asyncio.run(main())
1234 |
```