This is page 4 of 45. Use http://codebase.md/dicklesworthstone/llm_gateway_mcp_server?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .cursorignore
├── .env.example
├── .envrc
├── .gitignore
├── additional_features.md
├── check_api_keys.py
├── completion_support.py
├── comprehensive_test.py
├── docker-compose.yml
├── Dockerfile
├── empirically_measured_model_speeds.json
├── error_handling.py
├── example_structured_tool.py
├── examples
│ ├── __init__.py
│ ├── advanced_agent_flows_using_unified_memory_system_demo.py
│ ├── advanced_extraction_demo.py
│ ├── advanced_unified_memory_system_demo.py
│ ├── advanced_vector_search_demo.py
│ ├── analytics_reporting_demo.py
│ ├── audio_transcription_demo.py
│ ├── basic_completion_demo.py
│ ├── cache_demo.py
│ ├── claude_integration_demo.py
│ ├── compare_synthesize_demo.py
│ ├── cost_optimization.py
│ ├── data
│ │ ├── sample_event.txt
│ │ ├── Steve_Jobs_Introducing_The_iPhone_compressed.md
│ │ └── Steve_Jobs_Introducing_The_iPhone_compressed.mp3
│ ├── docstring_refiner_demo.py
│ ├── document_conversion_and_processing_demo.py
│ ├── entity_relation_graph_demo.py
│ ├── filesystem_operations_demo.py
│ ├── grok_integration_demo.py
│ ├── local_text_tools_demo.py
│ ├── marqo_fused_search_demo.py
│ ├── measure_model_speeds.py
│ ├── meta_api_demo.py
│ ├── multi_provider_demo.py
│ ├── ollama_integration_demo.py
│ ├── prompt_templates_demo.py
│ ├── python_sandbox_demo.py
│ ├── rag_example.py
│ ├── research_workflow_demo.py
│ ├── sample
│ │ ├── article.txt
│ │ ├── backprop_paper.pdf
│ │ ├── buffett.pdf
│ │ ├── contract_link.txt
│ │ ├── legal_contract.txt
│ │ ├── medical_case.txt
│ │ ├── northwind.db
│ │ ├── research_paper.txt
│ │ ├── sample_data.json
│ │ └── text_classification_samples
│ │ ├── email_classification.txt
│ │ ├── news_samples.txt
│ │ ├── product_reviews.txt
│ │ └── support_tickets.txt
│ ├── sample_docs
│ │ └── downloaded
│ │ └── attention_is_all_you_need.pdf
│ ├── sentiment_analysis_demo.py
│ ├── simple_completion_demo.py
│ ├── single_shot_synthesis_demo.py
│ ├── smart_browser_demo.py
│ ├── sql_database_demo.py
│ ├── sse_client_demo.py
│ ├── test_code_extraction.py
│ ├── test_content_detection.py
│ ├── test_ollama.py
│ ├── text_classification_demo.py
│ ├── text_redline_demo.py
│ ├── tool_composition_examples.py
│ ├── tournament_code_demo.py
│ ├── tournament_text_demo.py
│ ├── unified_memory_system_demo.py
│ ├── vector_search_demo.py
│ ├── web_automation_instruction_packs.py
│ └── workflow_delegation_demo.py
├── LICENSE
├── list_models.py
├── marqo_index_config.json.example
├── mcp_protocol_schema_2025-03-25_version.json
├── mcp_python_lib_docs.md
├── mcp_tool_context_estimator.py
├── model_preferences.py
├── pyproject.toml
├── quick_test.py
├── README.md
├── resource_annotations.py
├── run_all_demo_scripts_and_check_for_errors.py
├── storage
│ └── smart_browser_internal
│ ├── locator_cache.db
│ ├── readability.js
│ └── storage_state.enc
├── test_client.py
├── test_connection.py
├── TEST_README.md
├── test_sse_client.py
├── test_stdio_client.py
├── tests
│ ├── __init__.py
│ ├── conftest.py
│ ├── integration
│ │ ├── __init__.py
│ │ └── test_server.py
│ ├── manual
│ │ ├── test_extraction_advanced.py
│ │ └── test_extraction.py
│ └── unit
│ ├── __init__.py
│ ├── test_cache.py
│ ├── test_providers.py
│ └── test_tools.py
├── TODO.md
├── tool_annotations.py
├── tools_list.json
├── ultimate_mcp_banner.webp
├── ultimate_mcp_logo.webp
├── ultimate_mcp_server
│ ├── __init__.py
│ ├── __main__.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── commands.py
│ │ ├── helpers.py
│ │ └── typer_cli.py
│ ├── clients
│ │ ├── __init__.py
│ │ ├── completion_client.py
│ │ └── rag_client.py
│ ├── config
│ │ └── examples
│ │ └── filesystem_config.yaml
│ ├── config.py
│ ├── constants.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── evaluation
│ │ │ ├── base.py
│ │ │ └── evaluators.py
│ │ ├── providers
│ │ │ ├── __init__.py
│ │ │ ├── anthropic.py
│ │ │ ├── base.py
│ │ │ ├── deepseek.py
│ │ │ ├── gemini.py
│ │ │ ├── grok.py
│ │ │ ├── ollama.py
│ │ │ ├── openai.py
│ │ │ └── openrouter.py
│ │ ├── server.py
│ │ ├── state_store.py
│ │ ├── tournaments
│ │ │ ├── manager.py
│ │ │ ├── tasks.py
│ │ │ └── utils.py
│ │ └── ums_api
│ │ ├── __init__.py
│ │ ├── ums_database.py
│ │ ├── ums_endpoints.py
│ │ ├── ums_models.py
│ │ └── ums_services.py
│ ├── exceptions.py
│ ├── graceful_shutdown.py
│ ├── services
│ │ ├── __init__.py
│ │ ├── analytics
│ │ │ ├── __init__.py
│ │ │ ├── metrics.py
│ │ │ └── reporting.py
│ │ ├── cache
│ │ │ ├── __init__.py
│ │ │ ├── cache_service.py
│ │ │ ├── persistence.py
│ │ │ ├── strategies.py
│ │ │ └── utils.py
│ │ ├── cache.py
│ │ ├── document.py
│ │ ├── knowledge_base
│ │ │ ├── __init__.py
│ │ │ ├── feedback.py
│ │ │ ├── manager.py
│ │ │ ├── rag_engine.py
│ │ │ ├── retriever.py
│ │ │ └── utils.py
│ │ ├── prompts
│ │ │ ├── __init__.py
│ │ │ ├── repository.py
│ │ │ └── templates.py
│ │ ├── prompts.py
│ │ └── vector
│ │ ├── __init__.py
│ │ ├── embeddings.py
│ │ └── vector_service.py
│ ├── tool_token_counter.py
│ ├── tools
│ │ ├── __init__.py
│ │ ├── audio_transcription.py
│ │ ├── base.py
│ │ ├── completion.py
│ │ ├── docstring_refiner.py
│ │ ├── document_conversion_and_processing.py
│ │ ├── enhanced-ums-lookbook.html
│ │ ├── entity_relation_graph.py
│ │ ├── excel_spreadsheet_automation.py
│ │ ├── extraction.py
│ │ ├── filesystem.py
│ │ ├── html_to_markdown.py
│ │ ├── local_text_tools.py
│ │ ├── marqo_fused_search.py
│ │ ├── meta_api_tool.py
│ │ ├── ocr_tools.py
│ │ ├── optimization.py
│ │ ├── provider.py
│ │ ├── pyodide_boot_template.html
│ │ ├── python_sandbox.py
│ │ ├── rag.py
│ │ ├── redline-compiled.css
│ │ ├── sentiment_analysis.py
│ │ ├── single_shot_synthesis.py
│ │ ├── smart_browser.py
│ │ ├── sql_databases.py
│ │ ├── text_classification.py
│ │ ├── text_redline_tools.py
│ │ ├── tournament.py
│ │ ├── ums_explorer.html
│ │ └── unified_memory_system.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── async_utils.py
│ │ ├── display.py
│ │ ├── logging
│ │ │ ├── __init__.py
│ │ │ ├── console.py
│ │ │ ├── emojis.py
│ │ │ ├── formatter.py
│ │ │ ├── logger.py
│ │ │ ├── panels.py
│ │ │ ├── progress.py
│ │ │ └── themes.py
│ │ ├── parse_yaml.py
│ │ ├── parsing.py
│ │ ├── security.py
│ │ └── text.py
│ └── working_memory_api.py
├── unified_memory_system_technical_analysis.md
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/ultimate_mcp_server/constants.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Global constants and enumerations for the Ultimate MCP Server.
3 |
4 | This module defines system-wide constants, enumerations, and mappings used throughout
5 | the Ultimate MCP Server codebase. Centralizing these values ensures consistency across
6 | the application and simplifies maintenance when values need to be updated.
7 |
8 | The module includes:
9 |
10 | - Provider enum: Supported LLM providers (OpenAI, Anthropic, etc.)
11 | - TaskType enum: Categories of tasks that can be performed with LLMs
12 | - LogLevel enum: Standard logging levels
13 | - COST_PER_MILLION_TOKENS: Cost estimates for different models
14 | - DEFAULT_MODELS: Default model mappings for each provider
15 | - EMOJI_MAP: Emoji icons for enhanced logging and visualization
16 |
17 | These constants should be imported and used directly rather than duplicating their
18 | values in other parts of the codebase. This approach ensures that when values need
19 | to be updated (e.g., adding a new provider or updating pricing), changes only need
20 | to be made in this central location.
21 |
22 | Example usage:
23 | ```python
24 | from ultimate_mcp_server.constants import Provider, TaskType, EMOJI_MAP
25 |
26 | # Use provider enum
27 | default_provider = Provider.OPENAI
28 |
29 | # Get emoji for logging
30 | success_emoji = EMOJI_MAP["success"] # ✅
31 |
32 | # Check task type
33 | if task_type == TaskType.COMPLETION:
34 | # Handle completion task
35 | ```
36 | """
37 | from enum import Enum
38 | from typing import Dict
39 |
40 |
41 | class Provider(str, Enum):
42 | """
43 | Enumeration of supported LLM providers in the Ultimate MCP Server.
44 |
45 | This enum defines the canonical names for each supported large language model
46 | provider in the system. These identifiers are used consistently throughout the
47 | codebase for:
48 |
49 | - Configuration settings (provider-specific API keys, endpoints, etc.)
50 | - Tool parameters (selecting which provider to use for a task)
51 | - Logging and error reporting (identifying the source of requests/responses)
52 | - Cost calculation and billing (provider-specific pricing models)
53 |
54 | New providers should be added here as they are integrated into the system.
55 | The string values should be lowercase and match the provider's canonical name
56 | where possible, as these values appear in API requests/responses.
57 |
58 | Usage:
59 | ```python
60 | # Reference a provider by enum
61 | default_provider = Provider.OPENAI
62 |
63 | # Convert between string and enum
64 | provider_name = "anthropic"
65 | provider_enum = Provider(provider_name) # Provider.ANTHROPIC
66 |
67 | # Check if a provider is supported
68 | if user_provider in Provider.__members__.values():
69 | use_provider(user_provider)
70 | ```
71 | """
72 | OPENAI = "openai"
73 | ANTHROPIC = "anthropic"
74 | DEEPSEEK = "deepseek"
75 | GEMINI = "gemini"
76 | OPENROUTER = "openrouter"
77 | OLLAMA = "ollama"
78 | GROK = "grok"
79 | MISTRAL = "mistral"
80 | AWS = "aws"
81 | AZURE = "azure"
82 |
83 |
84 | class TaskType(str, Enum):
85 | """
86 | Enumeration of task types that can be performed by LLMs in the system.
87 |
88 | This enum categorizes the different types of operations that LLMs can perform
89 | within the MCP ecosystem. These task types are used for:
90 |
91 | - Logging and analytics (tracking usage patterns by task type)
92 | - Prompt selection (optimizing prompts for specific task types)
93 | - Resource allocation (prioritizing resources for different task types)
94 | - Performance monitoring (measuring success rates by task category)
95 |
96 | The categorization helps organize tools in a semantically meaningful way and
97 | provides metadata for optimizing the system's handling of different tasks.
98 | When tools register with the system, they typically specify which task type
99 | they represent.
100 |
101 | Task types are roughly organized into these categories:
102 | - Text generation (COMPLETION, GENERATION, etc.)
103 | - Analysis and understanding (ANALYSIS, CLASSIFICATION, etc.)
104 | - Data manipulation (EXTRACTION, TRANSLATION, etc.)
105 | - System interaction (DATABASE, BROWSER, etc.)
106 | - Document operations (DOCUMENT_PROCESSING, etc.)
107 |
108 | Usage:
109 | ```python
110 | # Log with task type
111 | logger.info("Generating text completion", task_type=TaskType.COMPLETION)
112 |
113 | # Register tool with its task type
114 | @register_tool(name="generate_text", task_type=TaskType.COMPLETION)
115 | async def generate_text(prompt: str):
116 | # Implementation
117 | ```
118 | """
119 | COMPLETION = "completion"
120 | CHAT = "chat"
121 | SUMMARIZATION = "summarization"
122 | EXTRACTION = "extraction"
123 | GENERATION = "generation"
124 | ANALYSIS = "analysis"
125 | CLASSIFICATION = "classification"
126 | TRANSLATION = "translation"
127 | QA = "qa"
128 | DATABASE = "database"
129 | QUERY = "query"
130 | BROWSER = "browser"
131 | DOWNLOAD = "download"
132 | UPLOAD = "upload"
133 | DOCUMENT_PROCESSING = "document_processing"
134 | DOCUMENT = "document"
135 | TEXT_ENHANCEMENT = "text_enhancement"
136 | NER = "ner"
137 | QUESTION_ANSWERING = "question_answering"
138 | QUALITY_ASSESSMENT = "quality_assessment"
139 | OCR = "ocr"
140 | TEXT_EXTRACTION = "text_extraction"
141 | CODE_EXECUTION = "code_execution"
142 |
143 |
144 | class LogLevel(str, Enum):
145 | """Log levels."""
146 | DEBUG = "DEBUG"
147 | INFO = "INFO"
148 | WARNING = "WARNING"
149 | ERROR = "ERROR"
150 | CRITICAL = "CRITICAL"
151 |
152 |
153 | # Cost estimates for model pricing (in dollars per million tokens)
154 | # This constant defines the estimated costs for different models, used for cost tracking and budgeting
155 | # Values represent US dollars per million tokens, differentiated by input (prompt) and output (completion) costs
156 | # These costs may change as providers update their pricing, and should be periodically reviewed
157 | COST_PER_MILLION_TOKENS: Dict[str, Dict[str, float]] = {
158 | # OpenAI models
159 | "gpt-4o": {"input": 2.5, "output": 10.0},
160 | "gpt-4o-mini": {"input": 0.15, "output": 0.6},
161 | "gpt-4.1": {"input": 2.0, "output": 8.0},
162 | "gpt-4.1-mini": {"input": 0.40, "output": 1.60},
163 | "gpt-4.1-nano": {"input": 0.10, "output": 0.40},
164 | "o1-preview": {"input": 15.00, "output": 60.00},
165 | "o3-mini": {"input": 1.10, "output": 4.40},
166 |
167 | # Claude models
168 | "claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0},
169 | "claude-3-5-haiku-20241022": {"input": 0.80, "output": 4.0},
170 | "claude-3-opus-20240229": {"input": 15.0, "output": 75.0},
171 |
172 | # DeepSeek models
173 | "deepseek-chat": {"input": 0.27, "output": 1.10},
174 | "deepseek-reasoner": {"input": 0.55, "output": 2.19},
175 |
176 | # Gemini models
177 | "gemini-2.0-flash-lite": {"input": 0.075, "output": 0.30},
178 | "gemini-2.0-flash": {"input": 0.35, "output": 1.05},
179 | "gemini-2.0-flash-thinking-exp-01-21": {"input": 0.0, "output": 0.0},
180 | "gemini-2.5-pro-preview-03-25": {"input": 1.25, "output": 10.0},
181 |
182 | # OpenRouter models
183 | "mistralai/mistral-nemo": {"input": 0.035, "output": 0.08},
184 |
185 | # Grok models (based on the provided documentation)
186 | "grok-3-latest": {"input": 3.0, "output": 15.0},
187 | "grok-3-fast-latest": {"input": 5.0, "output": 25.0},
188 | "grok-3-mini-latest": {"input": 0.30, "output": 0.50},
189 | "grok-3-mini-fast-latest": {"input": 0.60, "output": 4.0},
190 |
191 | # Ollama models (very low estimated costs since they run locally)
192 | "mix_77/gemma3-qat-tools:27b": {"input": 0.0001, "output": 0.0001},
193 | "JollyLlama/GLM-Z1-32B-0414-Q4_K_M:latest": {"input": 0.0001, "output": 0.0001},
194 | "llama3.2-vision:latest": {"input": 0.0001, "output": 0.0001},
195 | }
196 |
197 |
198 | # Default models by provider
199 | # This mapping defines the recommended default model for each supported provider
200 | # Used when no specific model is requested in API calls or tool invocations
201 | # These defaults aim to balance quality, speed, and cost for general-purpose usage
202 | DEFAULT_MODELS = {
203 | Provider.OPENAI: "gpt-4.1-mini",
204 | Provider.ANTHROPIC: "claude-3-5-haiku-20241022",
205 | Provider.DEEPSEEK: "deepseek-chat",
206 | Provider.GEMINI: "gemini-2.5-pro-preview-03-25",
207 | Provider.OPENROUTER: "mistralai/mistral-nemo",
208 | Provider.GROK: "grok-3-latest",
209 | Provider.OLLAMA: "mix_77/gemma3-qat-tools:27b"
210 | }
211 |
212 |
213 | # Emoji mapping by log type and action
214 | # Provides visual indicators for different log types, components, and actions
215 | # Used in rich logging output to improve readability and visual scanning
216 | # Organized into sections: general status, components, tasks, and providers
217 | EMOJI_MAP = {
218 | "start": "🚀",
219 | "success": "✅",
220 | "error": "❌",
221 | "warning": "⚠️",
222 | "info": "ℹ️",
223 | "debug": "🔍",
224 | "critical": "🔥",
225 |
226 | # Component-specific emojis
227 | "server": "🖥️",
228 | "cache": "💾",
229 | "provider": "🔌",
230 | "request": "📤",
231 | "response": "📥",
232 | "processing": "⚙️",
233 | "model": "🧠",
234 | "config": "🔧",
235 | "token": "🔢",
236 | "cost": "💰",
237 | "time": "⏱️",
238 | "tool": "🛠️",
239 | "tournament": "🏆",
240 | "cancel": "🛑",
241 | "database": "🗄️",
242 | "browser": "🌐",
243 |
244 | # Task-specific emojis
245 | "completion": "✍️",
246 | "chat": "💬",
247 | "summarization": "📝",
248 | "extraction": "🔍",
249 | "generation": "🎨",
250 | "analysis": "📊",
251 | "classification": "🏷️",
252 | "query": "🔍",
253 | "browser_automation": "🌐",
254 | "database_interactions": "🗄️",
255 | "download": "⬇️",
256 | "upload": "⬆️",
257 | "document_processing": "📄",
258 | "document": "📄",
259 | "translation": "🔄",
260 | "qa": "❓",
261 |
262 | # Provider-specific emojis
263 | Provider.OPENAI: "🟢",
264 | Provider.ANTHROPIC: "🟣",
265 | Provider.DEEPSEEK: "🟠",
266 | Provider.GEMINI: "🔵",
267 | Provider.OPENROUTER: "🌐",
268 | Provider.OLLAMA: "🦙",
269 | Provider.GROK: "⚡"
270 | }
271 |
272 |
273 | # Base toolset categories for the server
274 | BASE_TOOLSET_CATEGORIES = {
275 | "Completion": ["generate_completion", "stream_completion", "chat_completion", "multi_completion"],
276 | "Provider": ["get_provider_status", "list_models"],
277 | "Filesystem": ["read_file", "write_file", "edit_file", "list_directory", "directory_tree", "search_files"],
278 | "Optimization": ["estimate_cost", "compare_models", "recommend_model"],
279 | "Text Processing": ["run_ripgrep", "run_awk", "run_sed", "run_jq"],
280 | "Meta": ["get_tool_info", "get_llm_instructions", "get_tool_recommendations"],
281 | "Search": ["marqo_fused_search"],
282 | # Browser automation tools
283 | "Browser": [
284 | "browser_init", "browser_navigate", "browser_click", "browser_type",
285 | "browser_screenshot", "browser_close", "browser_select", "browser_checkbox",
286 | "browser_get_text", "browser_get_attributes", "browser_execute_javascript",
287 | "browser_wait", "browser_back", "browser_forward", "browser_reload",
288 | "browser_get_console_logs", "browser_download_file", "browser_upload_file",
289 | "browser_pdf", "browser_tab_new", "browser_tab_close", "browser_tab_list",
290 | "browser_tab_select"
291 | ],
292 | "Web Research": [
293 | "execute_web_workflow", "extract_structured_data_from_pages",
294 | "find_and_download_pdfs", "multi_engine_search_summary",
295 | "monitor_web_data_points", "research_and_synthesize_report"
296 | ],
297 | # HTML to markdown tools
298 | "HTML Processing": [
299 | "clean_and_format_text_as_markdown", "detect_content_type",
300 | "batch_format_texts", "optimize_markdown_formatting"
301 | ],
302 | # Extraction tools
303 | "Extraction": [
304 | "extract_json", "extract_table", "extract_key_value_pairs",
305 | "extract_semantic_schema"
306 | ],
307 | # Cognitive and agent memory tools
308 | "Cognitive and Agent Memory": [
309 | "initialize_memory_system", "create_workflow", "update_workflow_status",
310 | "record_action_start", "record_action_completion", "get_action_details",
311 | "summarize_context_block", "add_action_dependency", "get_action_dependencies",
312 | "record_artifact", "record_thought", "store_memory", "get_memory_by_id",
313 | "hybrid_search_memories", "create_memory_link",
314 | "query_memories", "list_workflows", "get_workflow_details", "get_recent_actions",
315 | "get_artifacts", "get_artifact_by_id", "create_thought_chain", "get_thought_chain",
316 | "get_working_memory", "focus_memory", "optimize_working_memory",
317 | "save_cognitive_state", "load_cognitive_state", "get_workflow_context",
318 | "auto_update_focus", "promote_memory_level", "update_memory", "get_linked_memories",
319 | "consolidate_memories", "generate_reflection", "summarize_text",
320 | "delete_expired_memories", "compute_memory_statistics"
321 | ],
322 | }
```
--------------------------------------------------------------------------------
/tests/unit/test_tools.py:
--------------------------------------------------------------------------------
```python
1 | """Tests for the tool implementations."""
2 | from typing import Any, Dict
3 |
4 | import pytest
5 | from pytest import MonkeyPatch
6 |
7 | from ultimate_mcp_server.core.server import Gateway
8 | from ultimate_mcp_server.tools.base import (
9 | BaseTool,
10 | register_tool,
11 | with_retry,
12 | with_tool_metrics,
13 | )
14 |
15 | # Remove the CompletionTools import as the class was deleted
16 | # from ultimate_mcp_server.tools.completion import CompletionTools
17 | from ultimate_mcp_server.tools.document import DocumentTools
18 | from ultimate_mcp_server.tools.extraction import ExtractionTools
19 | from ultimate_mcp_server.utils import get_logger
20 |
21 | logger = get_logger("test.tools")
22 |
23 |
24 | class TestBaseTools:
25 | """Tests for the base tool classes and decorators."""
26 |
27 | def test_base_tool_init(self, mock_gateway: Gateway):
28 | """Test base tool initialization."""
29 | logger.info("Testing base tool initialization", emoji_key="test")
30 |
31 | # Create a minimal tool class
32 | class TestTool(BaseTool):
33 | tool_name = "test-tool"
34 | description = "Test tool"
35 |
36 | def _register_tools(self):
37 | # No tools to register
38 | pass
39 |
40 | # Initialize
41 | tool = TestTool(mock_gateway)
42 |
43 | # Check properties
44 | assert tool.tool_name == "test-tool"
45 | assert tool.description == "Test tool"
46 | assert tool.mcp == mock_gateway.mcp
47 | assert tool.logger is not None
48 | assert tool.metrics is not None
49 |
50 | @pytest.mark.asyncio
51 | async def test_with_tool_metrics(self):
52 | """Test the with_tool_metrics decorator."""
53 | logger.info("Testing with_tool_metrics decorator", emoji_key="test")
54 |
55 | # Create a tool class with metrics
56 | class TestTool(BaseTool):
57 | tool_name = "test-metrics-tool"
58 | description = "Test metrics tool"
59 |
60 | def _register_tools(self):
61 | pass
62 |
63 | @with_tool_metrics
64 | async def test_method(self, arg1, arg2=None, ctx=None):
65 | return {"result": arg1 + str(arg2 or "")}
66 |
67 | # Create a mock MCP server
68 | mock_mcp = type("MockMCP", (), {"tool": lambda: lambda x: x})
69 | mock_gateway = type("MockGateway", (), {"mcp": mock_mcp})
70 |
71 | # Initialize
72 | tool = TestTool(mock_gateway)
73 |
74 | # Call method
75 | result = await tool.test_method("test", "arg")
76 |
77 | # Check result
78 | assert result == {"result": "testarg"}
79 |
80 | # Check metrics
81 | assert tool.metrics.total_calls == 1
82 | assert tool.metrics.successful_calls == 1
83 | assert tool.metrics.failed_calls == 0
84 |
85 | # Test error case
86 | @with_tool_metrics
87 | async def failing_method(self, arg):
88 | raise ValueError("Test error")
89 |
90 | # Add to class
91 | TestTool.failing_method = failing_method
92 |
93 | # Call failing method
94 | with pytest.raises(ValueError):
95 | await tool.failing_method("test")
96 |
97 | # Check metrics
98 | assert tool.metrics.total_calls == 2
99 | assert tool.metrics.successful_calls == 1
100 | assert tool.metrics.failed_calls == 1
101 |
102 | @pytest.mark.asyncio
103 | async def test_with_retry(self):
104 | """Test the with_retry decorator."""
105 | logger.info("Testing with_retry decorator", emoji_key="test")
106 |
107 | # Track calls
108 | calls = []
109 |
110 | @with_retry(max_retries=2, retry_delay=0.1)
111 | async def flaky_function(succeed_after):
112 | calls.append(len(calls))
113 | if len(calls) < succeed_after:
114 | raise ValueError("Temporary error")
115 | return "success"
116 |
117 | # Should succeed on first try
118 | calls = []
119 | result = await flaky_function(1)
120 | assert result == "success"
121 | assert len(calls) == 1
122 |
123 | # Should fail first, succeed on retry
124 | calls = []
125 | result = await flaky_function(2)
126 | assert result == "success"
127 | assert len(calls) == 2
128 |
129 | # Should fail first two, succeed on second retry
130 | calls = []
131 | result = await flaky_function(3)
132 | assert result == "success"
133 | assert len(calls) == 3
134 |
135 | # Should fail too many times
136 | calls = []
137 | with pytest.raises(ValueError):
138 | await flaky_function(4) # Will make 3 attempts (original + 2 retries)
139 | assert len(calls) == 3
140 |
141 | def test_register_tool(self, mock_gateway: Gateway):
142 | """Test the register_tool decorator."""
143 | logger.info("Testing register_tool decorator", emoji_key="test")
144 |
145 | # Create a mock MCP server with a tool registration function
146 | registered_tools = {}
147 |
148 | class MockMCP:
149 | def tool(self, name=None, description=None):
150 | def decorator(f):
151 | registered_tools[name or f.__name__] = {
152 | "function": f,
153 | "description": description or f.__doc__
154 | }
155 | return f
156 | return decorator
157 |
158 | mock_mcp = MockMCP()
159 | mock_gateway.mcp = mock_mcp
160 |
161 | # Register a tool
162 | @register_tool(mock_gateway.mcp, name="test-tool", description="Test tool")
163 | async def test_tool(arg1, arg2=None):
164 | """Tool docstring."""
165 | return {"result": arg1 + str(arg2 or "")}
166 |
167 | # Check registration
168 | assert "test-tool" in registered_tools
169 | assert registered_tools["test-tool"]["description"] == "Test tool"
170 |
171 | # Register with defaults
172 | @register_tool(mock_gateway.mcp)
173 | async def another_tool(arg):
174 | """Another tool docstring."""
175 | return {"result": arg}
176 |
177 | # Check registration with defaults
178 | assert "another_tool" in registered_tools
179 | assert registered_tools["another_tool"]["description"] == "Another tool docstring."
180 |
181 |
182 | # Comment out the entire TestCompletionTools class as it relies on the deleted class structure
183 | # class TestCompletionTools:
184 | # """Tests for the completion tools."""
185 | #
186 | # @pytest.fixture
187 | # def mock_completion_tools(self, mock_gateway: Gateway) -> CompletionTools:
188 | # """Get mock completion tools."""
189 | # # This fixture is no longer valid as CompletionTools doesn't exist
190 | # # We would need to refactor tests to mock standalone functions
191 | # pass
192 | # # return CompletionTools(mock_gateway)
193 | #
194 | # def test_init(self, mock_completion_tools: CompletionTools):
195 | # """Test initialization."""
196 | # logger.info("Testing completion tools initialization", emoji_key="test")
197 | # # This test is no longer valid
198 | # # assert mock_completion_tools.tool_name == "completion"
199 | # # assert mock_completion_tools.description is not None
200 | # pass
201 | #
202 | # async def test_generate_completion(self, mock_completion_tools: CompletionTools, mock_gateway: Gateway, monkeypatch: MonkeyPatch):
203 | # """Test generate_completion tool."""
204 | # logger.info("Testing generate_completion tool", emoji_key="test")
205 | #
206 | # # Mocking needs to target the standalone function now, not a method
207 | # # This test needs complete refactoring
208 | # pass
209 | #
210 | # async def test_chat_completion(self, mock_completion_tools: CompletionTools, mock_gateway: Gateway, monkeypatch: MonkeyPatch):
211 | # """Test chat_completion tool."""
212 | # logger.info("Testing chat_completion tool", emoji_key="test")
213 | # # This test needs complete refactoring
214 | # pass
215 | #
216 | # async def test_stream_completion(self, mock_completion_tools: CompletionTools, mock_gateway: Gateway, monkeypatch: MonkeyPatch):
217 | # """Test stream_completion tool."""
218 | # logger.info("Testing stream_completion tool", emoji_key="test")
219 | # # This test needs complete refactoring
220 | # pass
221 | #
222 | # async def test_multi_completion(self, mock_completion_tools: CompletionTools, mock_gateway: Gateway, monkeypatch: MonkeyPatch):
223 | # """Test multi_completion tool."""
224 | # logger.info("Testing multi_completion tool", emoji_key="test")
225 | # # This test needs complete refactoring
226 | # pass
227 |
228 |
229 | class TestDocumentTools:
230 | """Tests for the document tools."""
231 |
232 | @pytest.fixture
233 | def mock_document_tools(self, mock_gateway: Gateway) -> DocumentTools:
234 | """Get mock document tools."""
235 | return DocumentTools(mock_gateway)
236 |
237 | def test_init(self, mock_document_tools: DocumentTools):
238 | """Test initialization."""
239 | logger.info("Testing document tools initialization", emoji_key="test")
240 |
241 | assert mock_document_tools.tool_name is not None
242 | assert mock_document_tools.description is not None
243 |
244 | async def test_chunk_document(self, mock_document_tools: DocumentTools, sample_document: str, monkeypatch: MonkeyPatch):
245 | """Test chunk_document tool."""
246 | logger.info("Testing chunk_document tool", emoji_key="test")
247 |
248 | # Create a simplified implementation for testing
249 | async def mock_chunk_document(document, chunk_size=1000, chunk_overlap=100, method="token", ctx=None):
250 | chunks = []
251 | # Simple paragraph chunking for testing
252 | for para in document.split("\n\n"):
253 | if para.strip():
254 | chunks.append(para.strip())
255 | return {
256 | "chunks": chunks,
257 | "chunk_count": len(chunks),
258 | "method": method,
259 | "processing_time": 0.1
260 | }
261 |
262 | # Create a mock execute function for our BaseTool
263 | async def mock_execute(tool_name, params):
264 | # Call our mock implementation
265 | return await mock_chunk_document(**params)
266 |
267 | # Monkeypatch the tool execution using our new execute method
268 | monkeypatch.setattr(mock_document_tools, "execute", mock_execute)
269 |
270 | # Call the tool
271 | result = await mock_document_tools.execute("chunk_document", {
272 | "document": sample_document,
273 | "method": "paragraph"
274 | })
275 |
276 | # Check result
277 | assert isinstance(result, dict)
278 | assert "chunks" in result
279 | assert isinstance(result["chunks"], list)
280 | assert result["chunk_count"] > 0
281 | assert result["method"] == "paragraph"
282 | assert result["processing_time"] > 0
283 |
284 |
285 | class TestExtractionTools:
286 | """Tests for the extraction tools."""
287 |
288 | @pytest.fixture
289 | def mock_extraction_tools(self, mock_gateway: Gateway) -> ExtractionTools:
290 | """Get mock extraction tools."""
291 | return ExtractionTools(mock_gateway)
292 |
293 | def test_init(self, mock_extraction_tools: ExtractionTools):
294 | """Test initialization."""
295 | logger.info("Testing extraction tools initialization", emoji_key="test")
296 |
297 | assert mock_extraction_tools.tool_name == "extraction"
298 | assert mock_extraction_tools.description is not None
299 |
300 | async def test_extract_json(self, mock_extraction_tools: ExtractionTools, sample_json_data: Dict[str, Any], monkeypatch: MonkeyPatch):
301 | """Test extract_json tool."""
302 | logger.info("Testing extract_json tool", emoji_key="test")
303 |
304 | # Mock the tool execution
305 | async def mock_extract_json(text, schema=None, provider="openai", model=None, max_attempts=3, ctx=None):
306 | return {
307 | "data": sample_json_data,
308 | "provider": provider,
309 | "model": model or "mock-model",
310 | "tokens": {
311 | "input": 50,
312 | "output": 30,
313 | "total": 80
314 | },
315 | "cost": 0.01,
316 | "processing_time": 0.2
317 | }
318 |
319 | # Create a mock execute function for our BaseTool
320 | async def mock_execute(tool_name, params):
321 | # Call our mock implementation
322 | return await mock_extract_json(**params)
323 |
324 | # Monkeypatch the tool execution using our new execute method
325 | monkeypatch.setattr(mock_extraction_tools, "execute", mock_execute)
326 |
327 | # Call the tool
328 | result = await mock_extraction_tools.execute("extract_json", {
329 | "text": "Extract JSON from this: " + str(sample_json_data),
330 | "provider": "mock",
331 | "model": "mock-model"
332 | })
333 |
334 | # Check result
335 | assert isinstance(result, dict)
336 | assert "data" in result
337 | assert result["data"] == sample_json_data
338 | assert result["provider"] == "mock"
339 | assert result["model"] == "mock-model"
340 | assert "tokens" in result
341 | assert "cost" in result
342 | assert "processing_time" in result
```
--------------------------------------------------------------------------------
/examples/text_redline_demo.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python
2 | """Comprehensive HTML‑redline demo that exercises **every** change type.
3 |
4 | Run this file after you have installed/linked the Ultimate‑MCP‑Server package
5 | in editable mode (``pip install -e .``) or added the repo root to ``PYTHONPATH``.
6 | It generates a single HTML file (``./redline_outputs/comprehensive_redline_demo.html``)
7 | that you can open in any browser to see insertions (blue), deletions (red),
8 | move‑targets/sources (green), attribute changes (orange) and inline word‑level
9 | diffs.
10 | """
11 |
12 | from __future__ import annotations
13 |
14 | import asyncio
15 | import sys
16 | from pathlib import Path
17 | from typing import Dict, List
18 |
19 | from rich import box
20 | from rich.console import Console
21 | from rich.markup import escape
22 | from rich.table import Table
23 |
24 | # ---------------------------------------------------------------------------
25 | # 1. Make sure we can import the Ultimate‑MCP‑Server package from source.
26 | # ---------------------------------------------------------------------------
27 | ROOT = Path(__file__).resolve().parents[1] # repo root (../..)
28 | if str(ROOT) not in sys.path:
29 | sys.path.insert(0, str(ROOT))
30 |
31 | # ---------------------------------------------------------------------------
32 | # 2. Project‑level imports (raise immediately if the dev env is broken)
33 | # ---------------------------------------------------------------------------
34 | from ultimate_mcp_server.exceptions import ToolError # noqa: E402
35 | from ultimate_mcp_server.tools.filesystem import write_file # noqa: E402
36 | from ultimate_mcp_server.tools.text_redline_tools import ( # noqa: E402
37 | create_html_redline, # noqa: E402
38 | )
39 | from ultimate_mcp_server.utils import get_logger # noqa: E402
40 |
41 | # ---------------------------------------------------------------------------
42 | # 3. Logger / console helpers
43 | # ---------------------------------------------------------------------------
44 | LOGGER = get_logger("demo.comprehensive_redline")
45 | CONSOLE = Console()
46 |
47 | # ---------------------------------------------------------------------------
48 | # 4. Demo input documents (original vs. modified)
49 | # ---------------------------------------------------------------------------
50 | ORIGINAL_HTML = """<!DOCTYPE html>
51 | <html>
52 | <head>
53 | <title>Comprehensive Demo Document</title>
54 | <meta name="description" content="A document to demonstrate redlining features">
55 | </head>
56 | <body>
57 | <h1>Project Documentation</h1>
58 |
59 | <div class="intro">
60 | <p>This project documentation covers all aspects of the Alpha system implementation.</p>
61 | <p>Last updated on January 15, 2025</p>
62 | </div>
63 |
64 | <h2>Executive Summary</h2>
65 | <p>The Alpha system provides robust data processing capabilities for enterprise applications.</p>
66 | <p>This documentation serves as the primary reference for developers and system architects.</p>
67 |
68 | <h2>Architecture Overview</h2>
69 | <p>The system follows a microservices architecture with the following components:</p>
70 | <ul>
71 | <li>Data ingestion layer</li>
72 | <li>Processing engine</li>
73 | <li>Storage layer</li>
74 | <li>API gateway</li>
75 | </ul>
76 |
77 | <h2>Implementation Details</h2>
78 | <p>Implementation follows the standard protocol described in section 5.2 of the technical specifications.</p>
79 | <p>All components must pass integration tests before deployment.</p>
80 |
81 | <h2>Deployment Process</h2>
82 | <p>Deployment occurs in three phases:</p>
83 | <ol>
84 | <li>Development environment validation</li>
85 | <li>Staging environment testing</li>
86 | <li>Production rollout</li>
87 | </ol>
88 | <p>Each phase requires approval from the technical lead.</p>
89 |
90 | <h2>Security Considerations</h2>
91 | <p>All data must be encrypted during transfer and at rest.</p>
92 | <p>Authentication uses OAuth 2.0 with JWT tokens.</p>
93 | <p>Regular security audits are conducted quarterly.</p>
94 |
95 | <table border="1">
96 | <tr>
97 | <th>Component</th>
98 | <th>Responsible Team</th>
99 | <th>Status</th>
100 | </tr>
101 | <tr>
102 | <td>Data ingestion</td>
103 | <td>Data Engineering</td>
104 | <td>Complete</td>
105 | </tr>
106 | <tr>
107 | <td>Processing engine</td>
108 | <td>Core Systems</td>
109 | <td>In progress</td>
110 | </tr>
111 | <tr>
112 | <td>Storage layer</td>
113 | <td>Infrastructure</td>
114 | <td>Complete</td>
115 | </tr>
116 | <tr>
117 | <td>API gateway</td>
118 | <td>API Team</td>
119 | <td>Planning</td>
120 | </tr>
121 | </table>
122 |
123 | <h2>Appendix</h2>
124 | <p>For additional information, refer to the technical specifications document.</p>
125 | <p>Contact <a href="mailto:[email protected]">[email protected]</a> with any questions.</p>
126 | </body>
127 | </html>"""
128 |
129 | MODIFIED_HTML = """<!DOCTYPE html>
130 | <html>
131 | <head>
132 | <title>Comprehensive Demo Document - 2025 Update</title>
133 | <meta name="description" content="A document to demonstrate all redlining features">
134 | <meta name="author" content="Documentation Team">
135 | </head>
136 | <body>
137 | <h1>Project Documentation</h1>
138 |
139 | <div class="intro">
140 | <p>This project documentation covers all aspects of the Alpha system implementation and integration.</p>
141 | <p>Last updated on May 5, 2025</p>
142 | </div>
143 |
144 | <h2>Appendix</h2>
145 | <p>For additional information, refer to the technical specifications document and API references.</p>
146 | <p>Contact <a href="mailto:[email protected]">[email protected]</a> with any questions.</p>
147 |
148 | <h2>Security Considerations</h2>
149 | <p>All data must be encrypted during transfer and at rest using AES-256 encryption.</p>
150 | <p>Authentication uses OAuth 2.0 with JWT tokens and optional two-factor authentication.</p>
151 | <p>Regular security audits are conducted quarterly by an independent security firm.</p>
152 | <p>Penetration testing is performed bi-annually.</p>
153 |
154 | <h2>Executive Summary</h2>
155 | <p>The Alpha system provides robust data processing capabilities for enterprise applications with enhanced performance.</p>
156 | <p>This documentation serves as the primary reference for developers, system architects, and operations teams.</p>
157 | <p>The system has been validated against ISO 27001 standards.</p>
158 |
159 | <h2>Architecture Overview</h2>
160 | <p>The system implements a cloud-native microservices architecture with the following components:</p>
161 | <ul>
162 | <li>Data ingestion layer with real-time processing</li>
163 | <li>Distributed processing engine</li>
164 | <li>Multi-region storage layer</li>
165 | <li>API gateway with rate limiting</li>
166 | <li>Monitoring and observability platform</li>
167 | <li>Disaster recovery system</li>
168 | </ul>
169 |
170 | <h2>Implementation Details</h2>
171 | <p>Implementation follows the enhanced protocol described in section 6.3 of the technical specifications.</p>
172 | <p>All components must pass integration and performance tests before deployment.</p>
173 |
174 | <table border="1">
175 | <tr>
176 | <th>Component</th>
177 | <th>Responsible Team</th>
178 | <th>Status</th>
179 | <th>Performance</th>
180 | </tr>
181 | <tr>
182 | <td>Data ingestion</td>
183 | <td>Data Engineering</td>
184 | <td>Complete</td>
185 | <td>Exceeds SLA</td>
186 | </tr>
187 | <tr>
188 | <td>Processing engine</td>
189 | <td>Core Systems</td>
190 | <td>Complete</td>
191 | <td>Meets SLA</td>
192 | </tr>
193 | <tr>
194 | <td>Storage layer</td>
195 | <td>Infrastructure</td>
196 | <td>Complete</td>
197 | <td>Meets SLA</td>
198 | </tr>
199 | <tr>
200 | <td>API gateway</td>
201 | <td>API Team</td>
202 | <td>Complete</td>
203 | <td>Exceeds SLA</td>
204 | </tr>
205 | <tr>
206 | <td>Monitoring platform</td>
207 | <td>DevOps</td>
208 | <td>Complete</td>
209 | <td>Meets SLA</td>
210 | </tr>
211 | </table>
212 |
213 | <h2>Scalability Considerations</h2>
214 | <p>The system is designed to scale horizontally with increasing load.</p>
215 | <p>Auto-scaling policies are configured for all compute resources.</p>
216 | <p>Database sharding is implemented for high-volume tenants.</p>
217 | </body>
218 | </html>"""
219 |
220 | # ---------------------------------------------------------------------------
221 | # 5. Human‑readable change checklist (for demo output only)
222 | # ---------------------------------------------------------------------------
223 | CHANGE_SUMMARY: Dict[str, List[str]] = {
224 | "insertions": [
225 | "New <meta author> tag",
226 | "'and integration' added to intro paragraph",
227 | "AES‑256 wording added to encryption para",
228 | "Two‑factor authentication mention added",
229 | "Independent security firm phrase added",
230 | "Entire penetration‑testing paragraph added",
231 | "'with enhanced performance' in exec summary",
232 | "Audience now includes operations teams",
233 | "ISO‑27001 paragraph added",
234 | "'cloud‑native' adjective added",
235 | "Real‑time processing detail added",
236 | "'Distributed' processing engine detail",
237 | "Multi‑region storage detail",
238 | "Rate‑limiting mention in API gateway",
239 | "Two new architecture components",
240 | "Protocol reference bumped to 6.3",
241 | "Performance tests requirement added",
242 | "New PERFORMANCE column in table",
243 | "New Monitoring‑platform row",
244 | "Whole SCALABILITY section added",
245 | ],
246 | "deletions": [
247 | "API‑gateway status 'Planning' removed",
248 | "Deployment‑process section removed",
249 | ],
250 | "moves": [
251 | "Appendix moved before Security section",
252 | "Security section moved before Exec‑Summary",
253 | ],
254 | "updates": [
255 | "<title> suffixed with '2025 Update'",
256 | "Meta description tweaked",
257 | "Updated date to 5 May 2025",
258 | "Support e‑mail address changed",
259 | "Processing‑engine status updated",
260 | ],
261 | }
262 |
263 | # ---------------------------------------------------------------------------
264 | # 6. Async helper running the diff + reporting
265 | # ---------------------------------------------------------------------------
266 | OUTPUT_DIR = Path(__file__).with_suffix("").parent / "redline_outputs"
267 | MARKDOWN_PATH = OUTPUT_DIR / "detected_redline_differences.md"
268 |
269 |
270 | async def generate_redline() -> None:
271 | CONSOLE.print("\n[bold blue]Generating HTML redline…[/bold blue]")
272 | OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
273 |
274 | try:
275 | result = await create_html_redline(
276 | original_html=ORIGINAL_HTML,
277 | modified_html=MODIFIED_HTML,
278 | detect_moves=True,
279 | include_css=True,
280 | add_navigation=True,
281 | output_format="html",
282 | generate_markdown=True,
283 | markdown_path=str(MARKDOWN_PATH),
284 | )
285 | except Exception as exc: # demo only
286 | LOGGER.error("Failed to generate redline", exc_info=True)
287 | CONSOLE.print(f"[red bold]Error:[/red bold] {escape(str(exc))}")
288 | return
289 |
290 | # ── Rich stats table ────────────────────────────────────────────────────────────
291 | stats_tbl = Table(title="Redline statistics", box=box.ROUNDED)
292 | stats_tbl.add_column("Metric", style="cyan")
293 | stats_tbl.add_column("Value", style="magenta")
294 | for k, v in result["stats"].items():
295 | stats_tbl.add_row(k.replace("_", " ").title(), str(v))
296 | stats_tbl.add_row("Processing time", f"{result['processing_time']:.3f}s")
297 | CONSOLE.print(stats_tbl)
298 |
299 | # ── manual checklist ────────────────────────────────────────────────────────────
300 | CONSOLE.print("\n[bold green]Manual checklist of expected changes[/bold green]")
301 | for cat, items in CHANGE_SUMMARY.items():
302 | CONSOLE.print(f"[cyan]{cat.title()}[/cyan] ({len(items)})")
303 | for idx, txt in enumerate(items, 1):
304 | CONSOLE.print(f" {idx:>2}. {txt}")
305 |
306 | # ── write HTML diff ─────────────────────────────────────────────────────────────
307 | html_path = OUTPUT_DIR / "comprehensive_redline_demo.html"
308 | try:
309 | await write_file(path=str(html_path), content=result["redline_html"])
310 | except (ToolError, Exception) as exc: # demo only
311 | LOGGER.warning("Unable to save HTML", exc_info=True)
312 | CONSOLE.print(f"\n[bold red]Warning:[/bold red] Could not save HTML — {exc}")
313 | else:
314 | LOGGER.info("Saved redline to %s", html_path)
315 | CONSOLE.print(f"\n[green]HTML written to:[/green] {html_path}")
316 |
317 | # ── ensure Markdown file exists (tool usually writes it already) ────────────────
318 | if not MARKDOWN_PATH.is_file() and "markdown_summary" in result:
319 | MARKDOWN_PATH.write_text(result["markdown_summary"], encoding="utf-8")
320 | if MARKDOWN_PATH.is_file():
321 | CONSOLE.print(f"[green]Markdown summary:[/green] {MARKDOWN_PATH}")
322 |
323 |
324 | # ───────────────────────────── 7. entrypoint ────────────────────────────────────────
325 | async def _amain() -> int:
326 | CONSOLE.rule("[white on blue]📝 Comprehensive Text-Redline Demo 📝")
327 | await generate_redline()
328 | CONSOLE.rule("Complete", style="green")
329 | return 0
330 |
331 |
332 | if __name__ == "__main__":
333 | sys.exit(asyncio.run(_amain()))
334 |
```
--------------------------------------------------------------------------------
/ultimate_mcp_server/services/knowledge_base/manager.py:
--------------------------------------------------------------------------------
```python
1 | """Knowledge base manager for RAG functionality."""
2 | import time
3 | from typing import Any, Dict, List, Optional
4 |
5 | from ultimate_mcp_server.services.vector import VectorDatabaseService
6 | from ultimate_mcp_server.utils import get_logger
7 |
8 | logger = get_logger(__name__)
9 |
10 |
11 | class KnowledgeBaseManager:
12 | """
13 | Manager for creating and maintaining knowledge bases for RAG applications.
14 |
15 | The KnowledgeBaseManager provides a high-level interface for working with vector
16 | databases as knowledge bases for Retrieval-Augmented Generation (RAG) systems.
17 | It abstracts the complexities of vector database operations, focusing on the
18 | domain-specific needs of knowledge management for AI applications.
19 |
20 | Key Features:
21 | - Knowledge base lifecycle management (create, delete, list, get)
22 | - Document ingestion with metadata support
23 | - Vector embedding management for semantic search
24 | - Document chunking and processing
25 | - Persistence and durability guarantees
26 | - Metadata tracking for knowledge base statistics
27 |
28 | Architecture:
29 | The manager sits between RAG applications and the underlying vector database,
30 | providing domain-specific operations while delegating storage and embedding
31 | to specialized services. It primarily interacts with:
32 | 1. Vector Database Service - for persistent storage of embeddings and documents
33 | 2. Embedding Service - for converting text to vector representations
34 | 3. Text Chunking Service - for breaking documents into optimal retrieval units
35 |
36 | Technical Characteristics:
37 | - Asynchronous API for high throughput in server environments
38 | - Thread-safe operations for concurrent access
39 | - Consistent error handling and logging
40 | - Idempotent operations where possible
41 | - Transactional guarantees for critical operations
42 |
43 | This service is typically accessed through the singleton get_knowledge_base_manager()
44 | function, which ensures a single instance is shared across the application.
45 |
46 | Example Usage:
47 | ```python
48 | # Get the manager
49 | kb_manager = get_knowledge_base_manager()
50 |
51 | # Create a new knowledge base
52 | await kb_manager.create_knowledge_base(
53 | name="company_policies",
54 | description="Corporate policy documents and guidelines"
55 | )
56 |
57 | # Add documents with metadata
58 | await kb_manager.add_documents(
59 | knowledge_base_name="company_policies",
60 | documents=[
61 | "All employees must complete annual security training.",
62 | "Remote work is available for eligible positions with manager approval."
63 | ],
64 | metadatas=[
65 | {"source": "security_policy.pdf", "category": "security", "page": 12},
66 | {"source": "hr_handbook.pdf", "category": "remote_work", "page": 45}
67 | ],
68 | chunk_size=500,
69 | chunk_method="semantic"
70 | )
71 |
72 | # List available knowledge bases
73 | kb_list = await kb_manager.list_knowledge_bases()
74 | print(f"Found {kb_list['count']} knowledge bases")
75 |
76 | # Get details about a specific knowledge base
77 | kb_info = await kb_manager.get_knowledge_base("company_policies")
78 | doc_count = kb_info.get("metadata", {}).get("doc_count", 0)
79 | print(f"Knowledge base contains {doc_count} document chunks")
80 | ```
81 | """
82 |
83 | def __init__(self, vector_service: VectorDatabaseService):
84 | """Initialize the knowledge base manager.
85 |
86 | Args:
87 | vector_service: Vector database service for storing embeddings
88 | """
89 | self.vector_service = vector_service
90 | logger.info("Knowledge base manager initialized", extra={"emoji_key": "success"})
91 |
92 | async def create_knowledge_base(
93 | self,
94 | name: str,
95 | description: Optional[str] = None,
96 | embedding_model: Optional[str] = None,
97 | overwrite: bool = False
98 | ) -> Dict[str, Any]:
99 | """Create a new knowledge base.
100 |
101 | Args:
102 | name: Knowledge base name
103 | description: Optional description
104 | embedding_model: Optional embedding model name
105 | overwrite: Whether to overwrite existing knowledge base
106 |
107 | Returns:
108 | Knowledge base metadata
109 | """
110 | # Check if knowledge base already exists
111 | collections = await self.vector_service.list_collections()
112 |
113 | if name in collections and not overwrite:
114 | logger.warning(
115 | f"Knowledge base '{name}' already exists",
116 | extra={"emoji_key": "warning"}
117 | )
118 | return {"status": "exists", "name": name}
119 |
120 | # Create new collection for knowledge base
121 | metadata = {
122 | "type": "knowledge_base",
123 | "description": description or "",
124 | "created_at": time.time(),
125 | "doc_count": 0
126 | }
127 |
128 | # Only add embedding_model if not None (to avoid ChromaDB errors)
129 | if embedding_model is not None:
130 | metadata["embedding_model"] = embedding_model
131 |
132 | logger.debug(f"Creating knowledge base with metadata: {metadata}")
133 |
134 | # Ensure any existing collection is deleted first
135 | if overwrite:
136 | try:
137 | # Force delete any existing collection
138 | await self.vector_service.delete_collection(name)
139 | logger.debug(f"Force deleted existing collection '{name}' for clean creation")
140 | # Add a small delay to ensure deletion completes
141 | import asyncio
142 | await asyncio.sleep(0.2)
143 | except Exception as e:
144 | logger.debug(f"Error during force deletion: {str(e)}")
145 |
146 | try:
147 | await self.vector_service.create_collection(name, metadata=metadata)
148 |
149 | logger.info(
150 | f"Created knowledge base '{name}'",
151 | extra={"emoji_key": "success"}
152 | )
153 |
154 | return {
155 | "status": "created",
156 | "name": name,
157 | "metadata": metadata
158 | }
159 | except Exception as e:
160 | logger.error(
161 | f"Failed to create knowledge base '{name}': {str(e)}",
162 | extra={"emoji_key": "error"}
163 | )
164 | raise ValueError(f"Failed to create knowledge base: {str(e)}") from e
165 |
166 | async def delete_knowledge_base(self, name: str) -> Dict[str, Any]:
167 | """Delete a knowledge base.
168 |
169 | Args:
170 | name: Knowledge base name
171 |
172 | Returns:
173 | Deletion status
174 | """
175 | # Check if knowledge base exists
176 | collections = await self.vector_service.list_collections()
177 |
178 | if name not in collections:
179 | logger.warning(
180 | f"Knowledge base '{name}' not found",
181 | extra={"emoji_key": "warning"}
182 | )
183 | return {"status": "not_found", "name": name}
184 |
185 | # Delete collection
186 | await self.vector_service.delete_collection(name)
187 |
188 | logger.info(
189 | f"Deleted knowledge base '{name}'",
190 | extra={"emoji_key": "success"}
191 | )
192 |
193 | return {
194 | "status": "deleted",
195 | "name": name
196 | }
197 |
198 | async def list_knowledge_bases(self):
199 | """List all knowledge bases.
200 |
201 | Returns:
202 | List of knowledge bases with metadata
203 | """
204 | collection_names = await self.vector_service.list_collections()
205 | kb_list = []
206 |
207 | for name in collection_names:
208 | try:
209 | metadata = await self.vector_service.get_collection_metadata(name)
210 | # Only include collections that are knowledge bases
211 | if metadata and metadata.get("type") == "knowledge_base":
212 | # Create a simple dict with name and metadata
213 | kb = {
214 | "name": name,
215 | "metadata": metadata
216 | }
217 | kb_list.append(kb)
218 | except Exception as e:
219 | logger.error(
220 | f"Error getting metadata for collection '{name}': {str(e)}",
221 | extra={"emoji_key": "error"}
222 | )
223 |
224 | return {
225 | "count": len(kb_list),
226 | "knowledge_bases": kb_list
227 | }
228 |
229 | async def get_knowledge_base(self, name: str) -> Dict[str, Any]:
230 | """Get knowledge base metadata.
231 |
232 | Args:
233 | name: Knowledge base name
234 |
235 | Returns:
236 | Knowledge base metadata
237 | """
238 | # Check if knowledge base exists
239 | collections = await self.vector_service.list_collections()
240 |
241 | if name not in collections:
242 | logger.warning(
243 | f"Knowledge base '{name}' not found",
244 | extra={"emoji_key": "warning"}
245 | )
246 | return {"status": "not_found", "name": name}
247 |
248 | # Get metadata
249 | metadata = await self.vector_service.get_collection_metadata(name)
250 |
251 | if metadata.get("type") != "knowledge_base":
252 | logger.warning(
253 | f"Collection '{name}' is not a knowledge base",
254 | extra={"emoji_key": "warning"}
255 | )
256 | return {"status": "not_knowledge_base", "name": name}
257 |
258 | return {
259 | "status": "found",
260 | "name": name,
261 | "metadata": metadata
262 | }
263 |
264 | async def add_documents(
265 | self,
266 | knowledge_base_name: str,
267 | documents: List[str],
268 | metadatas: Optional[List[Dict[str, Any]]] = None,
269 | ids: Optional[List[str]] = None,
270 | embedding_model: Optional[str] = None,
271 | chunk_size: int = 1000,
272 | chunk_overlap: int = 200,
273 | chunk_method: str = "semantic"
274 | ) -> Dict[str, Any]:
275 | """Add documents to a knowledge base.
276 |
277 | Args:
278 | knowledge_base_name: Knowledge base name
279 | documents: List of document texts
280 | metadatas: Optional list of document metadata
281 | ids: Optional list of document IDs
282 | embedding_model: Optional embedding model name
283 | chunk_size: Chunk size for document processing
284 | chunk_overlap: Chunk overlap for document processing
285 | chunk_method: Chunking method (token, semantic, etc.)
286 |
287 | Returns:
288 | Document addition status
289 | """
290 | logger.debug(f"DEBUG: Adding documents to knowledge base '{knowledge_base_name}'")
291 | logger.debug(f"DEBUG: Document count: {len(documents)}")
292 | logger.debug(f"DEBUG: First document sample: {documents[0][:100]}...")
293 | logger.debug(f"DEBUG: Metadatas: {metadatas[:2] if metadatas else None}")
294 | logger.debug(f"DEBUG: Chunk settings - size: {chunk_size}, overlap: {chunk_overlap}, method: {chunk_method}")
295 |
296 | # Check if knowledge base exists
297 | kb_info = await self.get_knowledge_base(knowledge_base_name)
298 |
299 | if kb_info["status"] != "found":
300 | logger.warning(
301 | f"Knowledge base '{knowledge_base_name}' not found",
302 | extra={"emoji_key": "warning"}
303 | )
304 | return {"status": "not_found", "name": knowledge_base_name}
305 |
306 | try:
307 | # Add documents to vector store
308 | doc_ids = await self.vector_service.add_texts(
309 | collection_name=knowledge_base_name,
310 | texts=documents,
311 | metadatas=metadatas,
312 | ids=ids,
313 | embedding_model=embedding_model
314 | )
315 |
316 | # Update document count in metadata
317 | current_metadata = await self.vector_service.get_collection_metadata(knowledge_base_name)
318 | doc_count = current_metadata.get("doc_count", 0) + len(documents)
319 |
320 | # Prepare metadata updates
321 | metadata_updates = {"doc_count": doc_count}
322 |
323 | # Store embedding model in metadata if provided (for consistent retrieval)
324 | if embedding_model:
325 | metadata_updates["embedding_model"] = embedding_model
326 |
327 | # Update metadata
328 | await self.vector_service.update_collection_metadata(
329 | name=knowledge_base_name,
330 | metadata=metadata_updates
331 | )
332 |
333 | logger.info(
334 | f"Added {len(documents)} documents to knowledge base '{knowledge_base_name}'",
335 | extra={"emoji_key": "success"}
336 | )
337 |
338 | return {
339 | "status": "success",
340 | "name": knowledge_base_name,
341 | "added_count": len(documents),
342 | "ids": doc_ids
343 | }
344 | except Exception as e:
345 | logger.error(
346 | f"Error adding documents to knowledge base '{knowledge_base_name}': {str(e)}",
347 | extra={"emoji_key": "error"}
348 | )
349 | raise
```
--------------------------------------------------------------------------------
/ultimate_mcp_server/cli/helpers.py:
--------------------------------------------------------------------------------
```python
1 | """Helper functions for the Ultimate MCP Server CLI."""
2 | import json
3 | import sys
4 | from typing import Any, Dict, List, Optional, Union
5 |
6 | from rich.console import Console
7 | from rich.markdown import Markdown
8 | from rich.panel import Panel
9 | from rich.syntax import Syntax
10 | from rich.table import Table
11 |
12 | from ultimate_mcp_server.config import get_env
13 | from ultimate_mcp_server.constants import COST_PER_MILLION_TOKENS, Provider
14 | from ultimate_mcp_server.utils import get_logger
15 |
16 | logger = get_logger(__name__)
17 | console = Console(file=sys.stderr)
18 |
19 |
20 | def print_cost_table() -> None:
21 | """Display pricing information for all supported LLM models.
22 |
23 | This function creates and prints a formatted table showing the cost per million tokens
24 | for various LLM models across all supported providers (OpenAI, Anthropic, DeepSeek, etc.).
25 | The table separates input token costs from output token costs, as these are typically
26 | billed at different rates.
27 |
28 | Models are grouped by provider and sorted alphabetically for easy reference.
29 | This information is useful for cost planning, provider comparison, and
30 | understanding the financial implications of different model choices.
31 | """
32 | # Create table
33 | table = Table(title="Model Cost Per Million Tokens")
34 | table.add_column("Provider", style="cyan")
35 | table.add_column("Model", style="blue")
36 | table.add_column("Input ($/M)", style="green")
37 | table.add_column("Output ($/M)", style="yellow")
38 |
39 | # Group models by provider
40 | models_by_provider = {}
41 | for model, costs in COST_PER_MILLION_TOKENS.items():
42 | # Determine provider
43 | provider = None
44 | if "gpt" in model:
45 | provider = Provider.OPENAI.value
46 | elif "claude" in model:
47 | provider = Provider.ANTHROPIC.value
48 | elif "deepseek" in model:
49 | provider = Provider.DEEPSEEK.value
50 | elif "gemini" in model:
51 | provider = Provider.GEMINI.value
52 | else:
53 | provider = "other"
54 |
55 | if provider not in models_by_provider:
56 | models_by_provider[provider] = []
57 |
58 | models_by_provider[provider].append((model, costs))
59 |
60 | # Add rows for each provider's models
61 | for provider in sorted(models_by_provider.keys()):
62 | models = sorted(models_by_provider[provider], key=lambda x: x[0])
63 |
64 | for model, costs in models:
65 | table.add_row(
66 | provider,
67 | model,
68 | f"${costs['input']:.3f}",
69 | f"${costs['output']:.3f}"
70 | )
71 |
72 | # Print table
73 | console.print(table)
74 |
75 |
76 | def format_tokens(tokens: int) -> str:
77 | """Format token count with thousands separator for better readability.
78 |
79 | Converts raw token counts (e.g., 1234567) into a more human-readable format
80 | with commas as thousand separators (e.g., "1,234,567"). This improves
81 | the readability of token usage statistics in CLI outputs and reports.
82 |
83 | Args:
84 | tokens: Raw token count as an integer
85 |
86 | Returns:
87 | Formatted string with thousand separators (e.g., "1,234,567")
88 | """
89 | return f"{tokens:,}"
90 |
91 |
92 | def format_duration(seconds: float) -> str:
93 | """Format time duration in a human-friendly, adaptive format.
94 |
95 | Converts raw seconds into a more readable format, automatically selecting
96 | the appropriate unit based on the magnitude:
97 | - Milliseconds for durations under 0.1 seconds
98 | - Seconds with decimal precision for durations under 60 seconds
99 | - Minutes and seconds for longer durations
100 |
101 | This provides intuitive time displays in benchmarks and performance reports.
102 |
103 | Args:
104 | seconds: Duration in seconds (can be fractional)
105 |
106 | Returns:
107 | Formatted string like "50ms", "2.45s", or "1m 30.5s" depending on duration
108 | """
109 | if seconds < 0.1:
110 | return f"{seconds * 1000:.0f}ms"
111 | elif seconds < 60:
112 | return f"{seconds:.2f}s"
113 | else:
114 | minutes = int(seconds // 60)
115 | remaining_seconds = seconds % 60
116 | return f"{minutes}m {remaining_seconds:.1f}s"
117 |
118 |
119 | def save_output_to_file(text: str, file_path: str, mode: str = "w") -> bool:
120 | """Write text content to a file with error handling and user feedback.
121 |
122 | This utility function safely writes text to a file, handling encoding
123 | and providing user feedback on success or failure. It's commonly used
124 | to save LLM outputs, generated code, or other text data for later use.
125 |
126 | Args:
127 | text: The string content to write to the file
128 | file_path: Target file path (absolute or relative to current directory)
129 | mode: File open mode - "w" for overwrite or "a" for append to existing content
130 |
131 | Returns:
132 | Boolean indicating success (True) or failure (False)
133 | """
134 | try:
135 | with open(file_path, mode, encoding="utf-8") as f:
136 | f.write(text)
137 |
138 | console.print(f"[green]Output saved to {file_path}[/green]")
139 | return True
140 | except Exception as e:
141 | console.print(f"[red]Error saving output: {str(e)}[/red]")
142 | return False
143 |
144 |
145 | def load_file_content(file_path: str) -> Optional[str]:
146 | """Read and return the entire contents of a text file.
147 |
148 | This utility function safely reads text from a file with proper UTF-8 encoding,
149 | handling any errors that may occur during the process. It's useful for loading
150 | prompts, templates, or other text files needed for LLM operations.
151 |
152 | Args:
153 | file_path: Path to the file to read (absolute or relative to current directory)
154 |
155 | Returns:
156 | The file's contents as a string if successful, or None if an error occurred
157 | """
158 | try:
159 | with open(file_path, "r", encoding="utf-8") as f:
160 | return f.read()
161 | except Exception as e:
162 | console.print(f"[red]Error loading file: {str(e)}[/red]")
163 | return None
164 |
165 |
166 | def print_markdown(markdown_text: str) -> None:
167 | """Display Markdown content with proper formatting and styling.
168 |
169 | Renders Markdown text with appropriate styling (headings, bold, italic,
170 | lists, code blocks, etc.) in the terminal using Rich's Markdown renderer.
171 | This provides a more readable and visually appealing output for
172 | documentation, examples, or LLM responses that use Markdown formatting.
173 |
174 | Args:
175 | markdown_text: Raw Markdown-formatted text to render
176 | """
177 | md = Markdown(markdown_text)
178 | console.print(md)
179 |
180 |
181 | def print_json(json_data: Union[Dict, List]) -> None:
182 | """Display JSON data with syntax highlighting and proper formatting.
183 |
184 | Converts a Python dictionary or list into a properly indented JSON string
185 | and displays it with syntax highlighting for improved readability.
186 | This is useful for displaying API responses, configuration data,
187 | or other structured data in a human-friendly format.
188 |
189 | Args:
190 | json_data: Python dictionary or list to be displayed as formatted JSON
191 | """
192 | json_str = json.dumps(json_data, indent=2)
193 | syntax = Syntax(json_str, "json", theme="monokai", word_wrap=True)
194 | console.print(syntax)
195 |
196 |
197 | def print_code(code: str, language: str = "python") -> None:
198 | """Display source code with syntax highlighting and line numbers.
199 |
200 | Renders code with proper syntax highlighting based on the specified language,
201 | along with line numbers for easier reference. This improves readability
202 | when displaying code examples, LLM-generated code, or code snippets
203 | from files.
204 |
205 | Args:
206 | code: Source code text to display
207 | language: Programming language for syntax highlighting (e.g., "python",
208 | "javascript", "rust", "sql", etc.)
209 | """
210 | syntax = Syntax(code, language, theme="monokai", line_numbers=True)
211 | console.print(syntax)
212 |
213 |
214 | def print_model_comparison(
215 | provider: str,
216 | models: List[str],
217 | metrics: List[Dict[str, Any]]
218 | ) -> None:
219 | """Display a side-by-side comparison of multiple models from the same provider.
220 |
221 | Creates a formatted table comparing performance metrics for different models
222 | from the same LLM provider. This is useful for identifying the optimal model
223 | for specific use cases based on response time, throughput, and cost metrics.
224 |
225 | The comparison includes:
226 | - Response time (formatted appropriately for the magnitude)
227 | - Processing speed (tokens per second)
228 | - Cost per request
229 | - Total token usage
230 |
231 | Args:
232 | provider: Name of the LLM provider (e.g., "openai", "anthropic")
233 | models: List of model identifiers to compare
234 | metrics: List of dictionaries containing performance metrics for each model,
235 | with keys like "time", "tokens_per_second", "cost", "total_tokens"
236 | """
237 | # Create table
238 | table = Table(title=f"{provider.capitalize()} Model Comparison")
239 | table.add_column("Model", style="cyan")
240 | table.add_column("Response Time", style="green")
241 | table.add_column("Tokens/Sec", style="yellow")
242 | table.add_column("Cost", style="magenta")
243 | table.add_column("Total Tokens", style="dim")
244 |
245 | # Add rows for each model
246 | for model, metric in zip(models, metrics, strict=False):
247 | table.add_row(
248 | model,
249 | format_duration(metric.get("time", 0)),
250 | f"{metric.get('tokens_per_second', 0):.1f}",
251 | f"${metric.get('cost', 0):.6f}",
252 | format_tokens(metric.get("total_tokens", 0))
253 | )
254 |
255 | # Print table
256 | console.print(table)
257 |
258 |
259 | def print_environment_info() -> None:
260 | """Display current environment configuration for diagnostics.
261 |
262 | Creates a formatted table showing important environment variables and their
263 | current values, with a focus on API keys, logging configuration, and cache settings.
264 | This is useful for troubleshooting and verifying that the environment is
265 | configured correctly before running the server or other commands.
266 |
267 | The output includes:
268 | - Status of API keys for each supported provider (set or not set)
269 | - Logging level configuration
270 | - Cache settings
271 | - Other relevant environment variables
272 | """
273 | # Create table
274 | table = Table(title="Environment Information")
275 | table.add_column("Setting", style="cyan")
276 | table.add_column("Value", style="green")
277 |
278 | # Add API key info
279 | for provider in [p.value for p in Provider]:
280 | env_var = f"{provider.upper()}_API_KEY"
281 | has_key = bool(get_env(env_var))
282 | table.add_row(env_var, "✅ Set" if has_key else "❌ Not set")
283 |
284 | # Add other environment variables
285 | for var in ["LOG_LEVEL", "CACHE_ENABLED", "CACHE_DIR"]:
286 | value = get_env(var, "Not set")
287 | table.add_row(var, value)
288 |
289 | # Print table
290 | console.print(table)
291 |
292 |
293 | def print_examples() -> None:
294 | """Display common usage examples for the CLI commands.
295 |
296 | Shows a set of syntax-highlighted example commands demonstrating how to use
297 | the most common features of the Ultimate MCP Server CLI. This helps users
298 | quickly learn the command patterns and options available without having to
299 | consult the full documentation.
300 |
301 | Examples cover:
302 | - Starting the server
303 | - Listing and testing providers
304 | - Generating completions (with and without streaming)
305 | - Running benchmarks
306 | - Managing the cache
307 | """
308 | examples = """
309 | # Run the server
310 | ultimate-mcp-server run --host 0.0.0.0 --port 8013
311 |
312 | # List available providers
313 | ultimate-mcp-server providers --check
314 |
315 | # Test a provider
316 | ultimate-mcp-server test openai --model gpt-4.1-mini --prompt "Hello, world!"
317 |
318 | # Generate a completion
319 | ultimate-mcp-server complete --provider anthropic --model claude-3-5-haiku-20241022 --prompt "Explain quantum computing"
320 |
321 | # Stream a completion
322 | ultimate-mcp-server complete --provider openai --stream --prompt "Write a poem about AI"
323 |
324 | # Run benchmarks
325 | ultimate-mcp-server benchmark --providers openai anthropic --runs 3
326 |
327 | # Check cache status
328 | ultimate-mcp-server cache --status
329 |
330 | # Clear cache
331 | ultimate-mcp-server cache --clear
332 | """
333 |
334 | syntax = Syntax(examples, "bash", theme="monokai", word_wrap=True)
335 | console.print(Panel(syntax, title="CLI Examples", border_style="cyan"))
336 |
337 |
338 | def confirm_action(message: str, default: bool = False) -> bool:
339 | """Prompt the user for confirmation before performing a potentially destructive action.
340 |
341 | Displays a yes/no prompt with the specified message and waits for user input.
342 | This is used to confirm potentially destructive operations like clearing the cache
343 | or deleting files to prevent accidental data loss.
344 |
345 | Args:
346 | message: The question or confirmation message to display to the user
347 | default: The default response if the user just presses Enter without typing
348 | anything (True for yes, False for no)
349 |
350 | Returns:
351 | Boolean indicating whether the user confirmed (True) or canceled (False) the action
352 | """
353 | default_str = "Y/n" if default else "y/N"
354 | response = input(f"{message} [{default_str}]: ")
355 |
356 | if not response:
357 | return default
358 |
359 | return response.lower() in ["y", "yes"]
```
--------------------------------------------------------------------------------
/examples/measure_model_speeds.py:
--------------------------------------------------------------------------------
```python
1 | import argparse
2 | import asyncio
3 | import json
4 | import os
5 | import sys
6 | import time
7 | from typing import Any, Dict, List
8 |
9 | from rich.console import Console
10 | from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
11 | from rich.table import Table
12 |
13 | # --- Add project root to sys.path ---
14 | script_dir = os.path.dirname(os.path.abspath(__file__))
15 | project_root = os.path.dirname(script_dir)
16 | sys.path.insert(0, project_root)
17 | # -------------------------------------
18 |
19 | from ultimate_mcp_server.constants import ( # noqa: E402
20 | COST_PER_MILLION_TOKENS,
21 | Provider,
22 | )
23 | from ultimate_mcp_server.exceptions import ( # noqa: E402
24 | ProviderError,
25 | ToolError,
26 | )
27 | from ultimate_mcp_server.tools.completion import generate_completion # noqa: E402
28 | from ultimate_mcp_server.utils import get_logger # noqa: E402
29 | from ultimate_mcp_server.utils.display import CostTracker # noqa: E402
30 |
31 | # Use Rich Console for better output
32 | console = Console()
33 | logger = get_logger("measure_model_speeds")
34 |
35 | # --- Configuration ---
36 | DEFAULT_PROMPT = (
37 | "Explain the concept of Transfer Learning in Machine Learning in about 300 words. "
38 | "Detail its primary benefits, common use cases across different domains (like NLP and Computer Vision), "
39 | "and mention potential challenges or limitations when applying it."
40 | )
41 | DEFAULT_OUTPUT_FILENAME = "empirically_measured_model_speeds.json"
42 | # Exclude models known not to work well with simple completion or require specific setup
43 | EXCLUDED_MODELS_BY_DEFAULT = [
44 | "mistralai/mistral-nemo", # Often requires specific setup/endpoint
45 | # Add others if they consistently cause issues in this simple test
46 | ]
47 | DEFAULT_MODELS_TO_TEST = [
48 | m for m in COST_PER_MILLION_TOKENS.keys() if m not in EXCLUDED_MODELS_BY_DEFAULT
49 | ]
50 |
51 | # Re-introduce the provider extraction logic
52 | def extract_provider_model(model_identifier: str) -> tuple[str | None, str]:
53 | """Extracts provider and model name, always returning the model name without the prefix."""
54 | model_identifier = model_identifier.strip()
55 | provider: str | None = None
56 | model_name_only: str = model_identifier # Start with the original identifier
57 |
58 | # 1. Check for explicit provider prefix (using /)
59 | known_providers = [p.value for p in Provider] # Get list of known providers
60 | if '/' in model_identifier:
61 | parts = model_identifier.split('/', 1)
62 | # Patch: If the model is an OpenRouter model like 'mistralai/mistral-nemo', treat as openrouter
63 | if model_identifier.startswith('mistralai/') or model_identifier == 'mistralai/mistral-nemo':
64 | provider = Provider.OPENROUTER.value
65 | model_name_only = model_identifier
66 | elif len(parts) == 2 and parts[0] in known_providers and parts[1]:
67 | provider = parts[0]
68 | model_name_only = parts[1]
69 | # Handle potential nested OpenRouter names like openrouter/mistralai/mistral-7b
70 | # The current split('/', 1) already achieves this.
71 | else:
72 | # It has a slash, but doesn't match known provider format
73 | logger.warning(f"Invalid or unknown provider prefix in '{model_identifier}'. Cannot extract provider reliably.")
74 | return None, model_identifier # Return original identifier if prefix is invalid
75 |
76 | # 2. Infer provider from model name pattern if no prefix was found
77 | if provider is None:
78 | if model_identifier.startswith('claude-'):
79 | provider = Provider.ANTHROPIC.value
80 | elif model_identifier.startswith('gemini-'):
81 | provider = Provider.GEMINI.value
82 | elif model_identifier.startswith('deepseek-'):
83 | provider = Provider.DEEPSEEK.value
84 | elif model_identifier.startswith('grok-'): # Added Grok
85 | provider = Provider.GROK.value
86 | # Add other inferences if necessary
87 |
88 | # Assume OpenAI if it looks like an OpenAI model (common short names or gpt- prefix)
89 | openai_short_names = [
90 | 'gpt-4o', 'gpt-4o-mini', 'gpt-4.1', 'gpt-4.1-mini', 'gpt-4.1-nano',
91 | 'o1-preview', 'o3-mini', 'gpt-3.5-turbo'
92 | ]
93 | if provider is None and (model_identifier in openai_short_names or model_identifier.startswith('gpt-')):
94 | provider = Provider.OPENAI.value
95 |
96 | # If provider was inferred, model_name_only is already correct (the original identifier)
97 |
98 | # 3. Return provider and model_name_only (which has prefix removed if found)
99 | if provider:
100 | # Log the extracted provider and model name for clarity during debugging
101 | logger.debug(f"Extracted Provider: {provider}, Model Name: {model_name_only} from Input: {model_identifier}")
102 | return provider, model_name_only
103 | else:
104 | # If provider couldn't be determined even after inference
105 | logger.error(f"Could not determine provider for '{model_identifier}'. Skipping measurement.")
106 | return None, model_identifier # Return original identifier as model_name if provider is unknown
107 |
108 | async def measure_speed(model_identifier: str, prompt: str, tracker: CostTracker) -> Dict[str, Any]:
109 | """Measures the completion speed for a single model by calling the tool directly."""
110 | result_data: Dict[str, Any] = {}
111 |
112 | # Extract provider and model name using the helper
113 | provider, model_name = extract_provider_model(model_identifier)
114 |
115 | if provider is None:
116 | # Skip if provider could not be determined
117 | return {"error": f"Could not determine provider for '{model_identifier}'", "error_code": "INVALID_PARAMETER"}
118 |
119 | # logger.info(f"Testing model {provider}/{model_name}...", emoji_key="timer") # Progress bar shows this
120 |
121 | try:
122 | start_time = time.monotonic()
123 | # Call generate_completion with explicit provider and model name
124 | result = await generate_completion(
125 | provider=provider, # Pass the determined provider
126 | model=model_name, # Pass the model name (without prefix)
127 | prompt=prompt,
128 | # Optional: max_tokens=500
129 | )
130 | end_time = time.monotonic()
131 |
132 | if result and isinstance(result, dict) and result.get("success"):
133 | # Track cost for successful calls
134 | tracker.add_call(result)
135 |
136 | processing_time = result.get("processing_time")
137 | if processing_time is None:
138 | processing_time = end_time - start_time
139 |
140 | output_tokens = result.get("tokens", {}).get("output", 0)
141 |
142 | if processing_time > 0 and output_tokens > 0:
143 | tokens_per_second = output_tokens / processing_time
144 | result_data = {
145 | "total_time_s": round(processing_time, 3),
146 | "output_tokens": output_tokens,
147 | "output_tokens_per_second": round(tokens_per_second, 2),
148 | }
149 | elif output_tokens == 0:
150 | logger.warning(f"Warning: {model_identifier} - Completed but generated 0 output tokens.", emoji_key="warning")
151 | result_data = {"error": "Completed with 0 output tokens", "total_time_s": round(processing_time, 3)}
152 | else:
153 | logger.warning(f"Warning: {model_identifier} - Processing time reported as {processing_time:.4f}s. Cannot calculate tokens/s reliably.", emoji_key="warning")
154 | result_data = {"error": "Processing time too low to calculate speed", "total_time_s": round(processing_time, 3)}
155 | else:
156 | manual_time = end_time - start_time
157 | error_message = result.get("error", "Unknown error or unexpected result format")
158 | error_code = result.get("error_code", "UNKNOWN_ERROR")
159 | logger.error(f"Error: {model_identifier} - Tool call failed. Manual Time: {manual_time:.2f}s. Error: {error_message} ({error_code})", emoji_key="error")
160 | result_data = {"error": error_message, "error_code": error_code, "manual_time_s": round(manual_time, 3)}
161 |
162 | except ProviderError as e:
163 | logger.error(f"Error: {model_identifier} ({provider}) - Provider Error: {e}", emoji_key="error", exc_info=False)
164 | result_data = {"error": str(e), "error_code": getattr(e, 'error_code', 'PROVIDER_ERROR')}
165 | except ToolError as e:
166 | logger.error(f"Error: {model_identifier} ({provider}) - Tool Error: {e}", emoji_key="error", exc_info=False)
167 | result_data = {"error": str(e), "error_code": getattr(e, 'error_code', 'TOOL_ERROR')}
168 | except Exception as e:
169 | logger.error(f"Error: {model_identifier} ({provider}) - Unexpected error: {e}", emoji_key="error", exc_info=True)
170 | result_data = {"error": f"Unexpected error: {str(e)}"}
171 |
172 | return result_data
173 |
174 | async def main(models_to_test: List[str], output_file: str, prompt: str):
175 | """Main function to run speed tests and save results."""
176 | logger.info("Starting LLM speed measurement script...", emoji_key="rocket")
177 | tracker = CostTracker() # Instantiate tracker
178 | results: Dict[str, Dict[str, Any]] = {}
179 |
180 | # Use Rich Progress bar
181 | with Progress(
182 | SpinnerColumn(),
183 | "[progress.description]{task.description}",
184 | BarColumn(),
185 | "[progress.percentage]{task.percentage:>3.0f}%",
186 | TimeElapsedColumn(),
187 | TextColumn("[bold green]{task.completed} done"),
188 | console=console,
189 | transient=False, # Keep progress bar after completion
190 | ) as progress:
191 | task = progress.add_task("[cyan]Measuring speeds...", total=len(models_to_test))
192 |
193 | for model_id in models_to_test:
194 | progress.update(task, description=f"[cyan]Measuring speeds... [bold yellow]({model_id})[/]")
195 | if not model_id or not isinstance(model_id, str):
196 | logger.warning(f"Skipping invalid model entry: {model_id}")
197 | progress.update(task, advance=1)
198 | continue
199 |
200 | results[model_id] = await measure_speed(model_id, prompt, tracker)
201 | progress.update(task, advance=1)
202 | # await asyncio.sleep(0.1) # Reduce sleep time if desired
203 |
204 | # --- Display Results Table ---
205 | table = Table(title="LLM Speed Measurement Results", show_header=True, header_style="bold magenta")
206 | table.add_column("Model", style="dim cyan", width=40)
207 | table.add_column("Time (s)", justify="right", style="green")
208 | table.add_column("Output Tokens", justify="right", style="blue")
209 | table.add_column("Tokens/s", justify="right", style="bold yellow")
210 | table.add_column("Status/Error", style="red")
211 |
212 | for model_id, data in sorted(results.items()):
213 | if "error" in data:
214 | status = f"Error: {data['error']}"
215 | if 'error_code' in data:
216 | status += f" ({data['error_code']})"
217 | time_s = data.get("total_time_s") or data.get("manual_time_s")
218 | time_str = f"{time_s:.2f}" if time_s is not None else "-"
219 | table.add_row(model_id, time_str, "-", "-", status)
220 | else:
221 | table.add_row(
222 | model_id,
223 | f"{data.get('total_time_s', 0):.2f}",
224 | str(data.get('output_tokens', '-')),
225 | f"{data.get('output_tokens_per_second', 0):.2f}",
226 | "Success"
227 | )
228 | console.print(table)
229 |
230 | # Display cost summary
231 | tracker.display_summary(console)
232 |
233 | # --- Save Results --- (Saving logic remains the same)
234 | script_dir = os.path.dirname(os.path.abspath(__file__))
235 | project_root = os.path.dirname(script_dir)
236 | output_path = os.path.join(project_root, output_file)
237 |
238 | logger.info(f"Saving results to: {output_path}", emoji_key="save")
239 | try:
240 | with open(output_path, 'w') as f:
241 | json.dump(results, f, indent=4)
242 | logger.info("Results saved successfully.", emoji_key="success")
243 | except IOError as e:
244 | logger.error(f"Failed to write results to {output_path}: {e}", emoji_key="error", exc_info=True)
245 | console.print(f"[bold red]Error:[/bold red] Could not write results to {output_path}. Check permissions. Details: {e}")
246 |
247 | logger.info("Speed measurement script finished.", emoji_key="checkered_flag")
248 |
249 |
250 | if __name__ == "__main__":
251 | parser = argparse.ArgumentParser(description="Measure LLM completion speeds.")
252 | parser.add_argument(
253 | "--models",
254 | nargs='+',
255 | default=DEFAULT_MODELS_TO_TEST,
256 | help="Space-separated list of models to test (e.g., openai/gpt-4o-mini anthropic/claude-3-5-haiku-20241022). Defaults to available models."
257 | )
258 | parser.add_argument(
259 | "--output",
260 | default=DEFAULT_OUTPUT_FILENAME,
261 | help=f"Output JSON filename. Defaults to {DEFAULT_OUTPUT_FILENAME} in the project root."
262 | )
263 | parser.add_argument(
264 | "--prompt",
265 | default=DEFAULT_PROMPT,
266 | help="The prompt to use for testing."
267 | )
268 |
269 | args = parser.parse_args()
270 |
271 | if not args.models or not all(isinstance(m, str) and m for m in args.models):
272 | console.print("[bold red]Error:[/bold red] Invalid --models argument. Please provide a list of non-empty model names.")
273 | exit(1)
274 |
275 | models_unique = sorted(list(set(args.models)))
276 | # Use Rich print for startup info
277 | console.print("[bold blue]--- LLM Speed Measurement ---[/bold blue]")
278 | console.print(f"Models to test ({len(models_unique)}): [cyan]{', '.join(models_unique)}[/cyan]")
279 | console.print(f"Output file: [green]{args.output}[/green]")
280 | console.print(f"Prompt length: {len(args.prompt)} characters")
281 | console.print("[bold blue]-----------------------------[/bold blue]")
282 |
283 | asyncio.run(main(models_unique, args.output, args.prompt))
```
--------------------------------------------------------------------------------
/ultimate_mcp_server/core/providers/deepseek.py:
--------------------------------------------------------------------------------
```python
1 | """DeepSeek provider implementation."""
2 | import time
3 | from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple
4 |
5 | from openai import AsyncOpenAI
6 |
7 | from ultimate_mcp_server.constants import Provider
8 | from ultimate_mcp_server.core.providers.base import BaseProvider, ModelResponse
9 | from ultimate_mcp_server.utils import get_logger
10 |
11 | # Use the same naming scheme everywhere: logger at module level
12 | logger = get_logger("ultimate_mcp_server.providers.deepseek")
13 |
14 |
15 | class DeepSeekProvider(BaseProvider):
16 | """Provider implementation for DeepSeek API (using OpenAI-compatible interface)."""
17 |
18 | provider_name = Provider.DEEPSEEK.value
19 |
20 | def __init__(self, api_key: Optional[str] = None, **kwargs):
21 | """Initialize the DeepSeek provider.
22 |
23 | Args:
24 | api_key: DeepSeek API key
25 | **kwargs: Additional options
26 | """
27 | super().__init__(api_key=api_key, **kwargs)
28 | self.base_url = kwargs.get("base_url", "https://api.deepseek.com")
29 | self.models_cache = None
30 |
31 | async def initialize(self) -> bool:
32 | """Initialize the DeepSeek client.
33 |
34 | Returns:
35 | bool: True if initialization was successful
36 | """
37 | try:
38 | # DeepSeek uses OpenAI-compatible API
39 | self.client = AsyncOpenAI(
40 | api_key=self.api_key,
41 | base_url=self.base_url,
42 | )
43 |
44 | self.logger.success(
45 | "DeepSeek provider initialized successfully",
46 | emoji_key="provider"
47 | )
48 | return True
49 |
50 | except Exception as e:
51 | self.logger.error(
52 | f"Failed to initialize DeepSeek provider: {str(e)}",
53 | emoji_key="error"
54 | )
55 | return False
56 |
57 | async def generate_completion(
58 | self,
59 | prompt: Optional[str] = None,
60 | messages: Optional[List[Dict[str, Any]]] = None,
61 | model: Optional[str] = None,
62 | max_tokens: Optional[int] = None,
63 | temperature: float = 0.7,
64 | json_mode: bool = False,
65 | **kwargs
66 | ) -> ModelResponse:
67 | """Generate a completion using DeepSeek's API.
68 |
69 | Args:
70 | prompt: Text prompt to send to the model (optional if messages provided)
71 | messages: List of message dictionaries (optional if prompt provided)
72 | model: Model name to use
73 | max_tokens: Maximum tokens to generate
74 | temperature: Temperature parameter (0.0-1.0)
75 | json_mode: If True, attempt to generate JSON output
76 | **kwargs: Additional parameters
77 |
78 | Returns:
79 | ModelResponse with the completion result
80 | """
81 | if not self.client:
82 | await self.initialize()
83 |
84 | # Verify we have either prompt or messages
85 | if prompt is None and not messages:
86 | raise ValueError("Either prompt or messages must be provided")
87 |
88 | # Use default model if not specified
89 | model = model or self.get_default_model()
90 |
91 | # Prepare API parameters
92 | if messages:
93 | # Using chat completion with messages
94 | params = {
95 | "model": model,
96 | "messages": messages,
97 | "temperature": temperature
98 | }
99 | else:
100 | # Using completion with prompt
101 | # Convert prompt to messages format for DeepSeek
102 | params = {
103 | "model": model,
104 | "messages": [{"role": "user", "content": prompt}],
105 | "temperature": temperature
106 | }
107 |
108 | # Add max_tokens if provided
109 | if max_tokens is not None:
110 | params["max_tokens"] = max_tokens
111 |
112 | # Handle JSON mode via response_format for compatible models
113 | if json_mode:
114 | params["response_format"] = {"type": "json_object"}
115 | self.logger.debug("Setting response_format to JSON mode for DeepSeek")
116 |
117 | # Add any remaining parameters
118 | for key, value in kwargs.items():
119 | if key not in params:
120 | params[key] = value
121 |
122 | # Log request parameters
123 | prompt_length = len(prompt) if prompt else sum(len(m.get("content", "")) for m in messages)
124 | self.logger.info(
125 | f"Generating completion with DeepSeek model {model}",
126 | emoji_key=self.provider_name,
127 | prompt_length=prompt_length,
128 | json_mode=json_mode
129 | )
130 |
131 | try:
132 | # Start timer
133 | start_time = time.time()
134 |
135 | # Make API call
136 | response = await self.client.chat.completions.create(**params)
137 |
138 | # Calculate processing time
139 | processing_time = time.time() - start_time
140 |
141 | # Extract text from response
142 | completion_text = response.choices[0].message.content
143 |
144 | # Create ModelResponse
145 | result = ModelResponse(
146 | text=completion_text,
147 | model=f"{self.provider_name}/{model}",
148 | provider=self.provider_name,
149 | input_tokens=response.usage.prompt_tokens,
150 | output_tokens=response.usage.completion_tokens,
151 | total_tokens=response.usage.total_tokens,
152 | processing_time=processing_time,
153 | raw_response=response
154 | )
155 |
156 | # Add message for compatibility with chat_completion
157 | result.message = {"role": "assistant", "content": completion_text}
158 |
159 | # Log success
160 | self.logger.success(
161 | "DeepSeek completion successful",
162 | emoji_key="success",
163 | model=model,
164 | tokens={"input": result.input_tokens, "output": result.output_tokens},
165 | cost=result.cost,
166 | time=processing_time
167 | )
168 |
169 | return result
170 |
171 | except Exception as e:
172 | # Log error
173 | self.logger.error(
174 | f"DeepSeek completion failed: {str(e)}",
175 | emoji_key="error",
176 | model=model
177 | )
178 | raise
179 |
180 | async def generate_completion_stream(
181 | self,
182 | prompt: Optional[str] = None,
183 | messages: Optional[List[Dict[str, Any]]] = None,
184 | model: Optional[str] = None,
185 | max_tokens: Optional[int] = None,
186 | temperature: float = 0.7,
187 | json_mode: bool = False,
188 | **kwargs
189 | ) -> AsyncGenerator[Tuple[str, Dict[str, Any]], None]:
190 | """Generate a streaming completion using DeepSeek.
191 |
192 | Args:
193 | prompt: Text prompt to send to the model (optional if messages provided)
194 | messages: List of message dictionaries (optional if prompt provided)
195 | model: Model name to use
196 | max_tokens: Maximum tokens to generate
197 | temperature: Temperature parameter (0.0-1.0)
198 | json_mode: If True, attempt to generate JSON output
199 | **kwargs: Additional parameters
200 |
201 | Yields:
202 | Tuples of (text_chunk, metadata)
203 | """
204 | if not self.client:
205 | await self.initialize()
206 |
207 | # Verify we have either prompt or messages
208 | if prompt is None and not messages:
209 | raise ValueError("Either prompt or messages must be provided")
210 |
211 | # Use default model if not specified
212 | model = model or self.get_default_model()
213 |
214 | # Prepare API parameters
215 | if messages:
216 | # Using chat completion with messages
217 | params = {
218 | "model": model,
219 | "messages": messages,
220 | "temperature": temperature,
221 | "stream": True
222 | }
223 | else:
224 | # Using completion with prompt
225 | # Convert prompt to messages format for DeepSeek
226 | params = {
227 | "model": model,
228 | "messages": [{"role": "user", "content": prompt}],
229 | "temperature": temperature,
230 | "stream": True
231 | }
232 |
233 | # Add max_tokens if provided
234 | if max_tokens is not None:
235 | params["max_tokens"] = max_tokens
236 |
237 | # Handle JSON mode via response_format for compatible models
238 | if json_mode:
239 | params["response_format"] = {"type": "json_object"}
240 | self.logger.debug("Setting response_format to JSON mode for DeepSeek streaming")
241 |
242 | # Add any remaining parameters
243 | for key, value in kwargs.items():
244 | if key not in params and key != "stream": # Don't allow overriding stream
245 | params[key] = value
246 |
247 | # Log request parameters
248 | prompt_length = len(prompt) if prompt else sum(len(m.get("content", "")) for m in messages)
249 | self.logger.info(
250 | f"Generating streaming completion with DeepSeek model {model}",
251 | emoji_key=self.provider_name,
252 | prompt_length=prompt_length,
253 | json_mode=json_mode
254 | )
255 |
256 | start_time = time.time()
257 | total_chunks = 0
258 |
259 | try:
260 | # Make streaming API call
261 | stream = await self.client.chat.completions.create(**params)
262 |
263 | # Process the stream
264 | async for chunk in stream:
265 | total_chunks += 1
266 |
267 | # Extract content from the chunk
268 | delta = chunk.choices[0].delta
269 | content = delta.content or ""
270 |
271 | # Metadata for this chunk
272 | metadata = {
273 | "model": f"{self.provider_name}/{model}",
274 | "provider": self.provider_name,
275 | "chunk_index": total_chunks,
276 | "finish_reason": chunk.choices[0].finish_reason,
277 | }
278 |
279 | yield content, metadata
280 |
281 | # Log success
282 | processing_time = time.time() - start_time
283 | self.logger.success(
284 | "DeepSeek streaming completion successful",
285 | emoji_key="success",
286 | model=model,
287 | chunks=total_chunks,
288 | time=processing_time
289 | )
290 |
291 | # Yield final metadata chunk
292 | final_metadata = {
293 | "model": f"{self.provider_name}/{model}",
294 | "provider": self.provider_name,
295 | "chunk_index": total_chunks + 1,
296 | "processing_time": processing_time,
297 | "finish_reason": "stop"
298 | }
299 | yield "", final_metadata
300 |
301 | except Exception as e:
302 | processing_time = time.time() - start_time
303 | self.logger.error(
304 | f"DeepSeek streaming completion failed: {str(e)}",
305 | emoji_key="error",
306 | model=model
307 | )
308 |
309 | # Yield error metadata
310 | error_metadata = {
311 | "model": f"{self.provider_name}/{model}",
312 | "provider": self.provider_name,
313 | "chunk_index": total_chunks + 1,
314 | "error": f"{type(e).__name__}: {str(e)}",
315 | "processing_time": processing_time,
316 | "finish_reason": "error"
317 | }
318 | yield "", error_metadata
319 |
320 | async def list_models(self) -> List[Dict[str, Any]]:
321 | """List available DeepSeek models.
322 |
323 | Returns:
324 | List of model information dictionaries
325 | """
326 | # DeepSeek doesn't have a comprehensive models endpoint, so we return a static list
327 | if self.models_cache:
328 | return self.models_cache
329 |
330 | models = [
331 | {
332 | "id": "deepseek-chat",
333 | "provider": self.provider_name,
334 | "description": "General-purpose chat model",
335 | },
336 | {
337 | "id": "deepseek-reasoner",
338 | "provider": self.provider_name,
339 | "description": "Enhanced reasoning capabilities",
340 | },
341 | ]
342 |
343 | # Cache results
344 | self.models_cache = models
345 |
346 | return models
347 |
348 | def get_default_model(self) -> str:
349 | """Get the default DeepSeek model.
350 |
351 | Returns:
352 | Default model name
353 | """
354 | from ultimate_mcp_server.config import get_config
355 |
356 | # Safely get from config if available
357 | try:
358 | config = get_config()
359 | provider_config = getattr(config, 'providers', {}).get(self.provider_name, None)
360 | if provider_config and provider_config.default_model:
361 | return provider_config.default_model
362 | except (AttributeError, TypeError):
363 | # Handle case when providers attribute doesn't exist or isn't a dict
364 | pass
365 |
366 | # Otherwise return hard-coded default
367 | return "deepseek-chat"
368 |
369 | async def check_api_key(self) -> bool:
370 | """Check if the DeepSeek API key is valid.
371 |
372 | Returns:
373 | bool: True if API key is valid
374 | """
375 | try:
376 | # Try a simple completion to validate the API key
377 | await self.client.chat.completions.create(
378 | model=self.get_default_model(),
379 | messages=[{"role": "user", "content": "Hello"}],
380 | max_tokens=1,
381 | )
382 | return True
383 | except Exception:
384 | return False
```
--------------------------------------------------------------------------------
/ultimate_mcp_server/core/state_store.py:
--------------------------------------------------------------------------------
```python
1 | import asyncio
2 | import os
3 | import pickle
4 | from typing import Any, Dict, Optional
5 |
6 | import aiofiles
7 |
8 |
9 | class StateStore:
10 | """
11 | Thread-safe, async-compatible state management system with optional persistence.
12 |
13 | The StateStore provides a robust solution for managing application state in asynchronous
14 | environments. It organizes data into namespaces, each containing key-value pairs, and
15 | provides thread-safe access through asyncio.Lock-based concurrency control.
16 |
17 | Key features:
18 | - Namespace-based organization to separate different types of state data
19 | - Thread-safe async methods for all operations (get, set, delete)
20 | - Optional persistence to disk with automatic load/save
21 | - Granular locking per namespace to maximize concurrency
22 | - Graceful handling of corrupted or missing persistent data
23 |
24 | Usage example:
25 | ```python
26 | # Initialize with persistence
27 | store = StateStore(persistence_dir="./state")
28 |
29 | # Store values
30 | await store.set("user_preferences", "theme", "dark")
31 | await store.set("session_data", "user_id", 12345)
32 |
33 | # Retrieve values (with default if missing)
34 | theme = await store.get("user_preferences", "theme", default="light")
35 | user_id = await store.get("session_data", "user_id", default=None)
36 |
37 | # Delete values
38 | await store.delete("session_data", "temp_token")
39 | ```
40 |
41 | The StateStore is used internally by the Ultimate MCP Server to maintain state
42 | across multiple tools and components, and is exposed to tools via the
43 | with_state_management decorator.
44 | """
45 |
46 | def __init__(self, persistence_dir: Optional[str] = None):
47 | """
48 | Initialize a new StateStore instance.
49 |
50 | The StateStore provides a thread-safe, async-compatible key-value store organized
51 | by namespaces. It supports both in-memory operation and optional persistence to disk.
52 | The store is designed for use in multi-threaded or async applications where state
53 | needs to be shared safely between components.
54 |
55 | Each namespace acts as a separate dictionary with its own concurrency protection.
56 | Operations within a namespace are serialized using asyncio.Lock, while operations
57 | across different namespaces can proceed concurrently.
58 |
59 | Args:
60 | persistence_dir: Optional directory path where state data will be persisted.
61 | If provided, each namespace will be stored as a separate pickle
62 | file in this directory. If None, the store operates in memory-only
63 | mode and state is lost when the application stops.
64 |
65 | Notes:
66 | - The directory will be created if it doesn't exist
67 | - Each namespace is persisted as a separate file named "{namespace}.pickle"
68 | - Data is serialized using Python's pickle module, so stored values should be
69 | pickle-compatible
70 | - No automatic cleanup of old or unused namespaces is performed
71 | """
72 | self._in_memory_store: Dict[str, Dict[str, Any]] = {}
73 | self._locks: Dict[str, asyncio.Lock] = {}
74 | self._persistence_dir = persistence_dir
75 | if persistence_dir and not os.path.exists(persistence_dir):
76 | os.makedirs(persistence_dir)
77 |
78 | def _get_lock(self, namespace: str) -> asyncio.Lock:
79 | """
80 | Get or create an asyncio.Lock for a specific namespace.
81 |
82 | This private method manages the locks used for concurrency control. It maintains
83 | a dictionary of locks keyed by namespace name, creating new locks as needed.
84 | This ensures that operations on the same namespace are properly serialized to
85 | prevent race conditions, while allowing operations on different namespaces to
86 | proceed concurrently.
87 |
88 | Args:
89 | namespace: Name of the namespace for which to get or create a lock
90 |
91 | Returns:
92 | An asyncio.Lock instance specific to the requested namespace
93 |
94 | Notes:
95 | - Each namespace gets its own independent lock
96 | - Locks are created on-demand when a namespace is first accessed
97 | - Locks persist for the lifetime of the StateStore instance
98 | - This method is called by all public methods (get, set, delete) to
99 | ensure thread-safe access to namespaces
100 | """
101 | if namespace not in self._locks:
102 | self._locks[namespace] = asyncio.Lock()
103 | return self._locks[namespace]
104 |
105 | async def get(self, namespace: str, key: str, default: Any = None) -> Any:
106 | """
107 | Retrieve a value from the state store with thread-safe access control.
108 |
109 | This method provides a concurrency-safe way to retrieve state data from the specified
110 | namespace. If the namespace doesn't exist in memory, it attempts to load it from disk
111 | (if persistence is enabled) before returning the requested value or default.
112 |
113 | Retrieval behavior:
114 | - The method first acquires a lock for the specified namespace to ensure thread safety
115 | - If the namespace is not in memory, it attempts to load it from disk if persistence is enabled
116 | - If the namespace can't be loaded or doesn't exist, an empty namespace is created
117 | - Returns the value for the specified key, or the default value if the key is not found
118 |
119 | Args:
120 | namespace: Logical grouping for related state data (e.g., "tools", "user_settings")
121 | key: Unique identifier within the namespace for the data to retrieve
122 | default: Value to return if the key is not found in the namespace
123 |
124 | Returns:
125 | The stored value if found, otherwise the default value
126 |
127 | Notes:
128 | - Acquiring the namespace lock is an async operation and may block if another
129 | operation is currently accessing the same namespace
130 | - If persistence is enabled, this method may perform disk I/O when a namespace
131 | needs to be loaded from disk
132 | """
133 | async with self._get_lock(namespace):
134 | if namespace not in self._in_memory_store:
135 | # Try to load from disk if persistence is enabled
136 | if self._persistence_dir:
137 | await self._load_namespace(namespace)
138 | else:
139 | self._in_memory_store[namespace] = {}
140 |
141 | return self._in_memory_store[namespace].get(key, default)
142 |
143 | async def set(self, namespace: str, key: str, value: Any) -> None:
144 | """
145 | Store a value in the state store with thread-safe access control.
146 |
147 | This method provides a concurrency-safe way to store state data in the specified namespace.
148 | The implementation uses asyncio.Lock to ensure that concurrent access to the same namespace
149 | doesn't lead to race conditions or data corruption.
150 |
151 | Storage behavior:
152 | - Values are first stored in an in-memory dictionary
153 | - If persistence_dir is configured, values are also immediately persisted to disk
154 | - Each namespace is stored as a separate pickle file
155 | - Values can be any pickle-serializable Python object
156 |
157 | Args:
158 | namespace: Logical grouping for related state data (e.g., "tools", "user_settings")
159 | key: Unique identifier within the namespace for this piece of data
160 | value: Any pickle-serializable value to store
161 |
162 | Notes:
163 | - Acquiring the namespace lock is an async operation and may block if another
164 | operation is currently accessing the same namespace
165 | - If persistence is enabled, this method performs disk I/O which could take time
166 | depending on the value size and disk performance
167 | """
168 | async with self._get_lock(namespace):
169 | if namespace not in self._in_memory_store:
170 | self._in_memory_store[namespace] = {}
171 |
172 | self._in_memory_store[namespace][key] = value
173 |
174 | # Persist immediately if enabled
175 | if self._persistence_dir:
176 | await self._persist_namespace(namespace)
177 |
178 | async def delete(self, namespace: str, key: str) -> None:
179 | """
180 | Delete a value from the state store with thread-safe access control.
181 |
182 | This method safely removes a key-value pair from the specified namespace,
183 | and optionally persists the change to disk if persistence is enabled. The
184 | operation is concurrency-safe through the use of namespace-specific locks.
185 |
186 | Deletion behavior:
187 | - The method first acquires a lock for the specified namespace to ensure thread safety
188 | - If the namespace doesn't exist or the key is not found, the operation is a no-op
189 | - If persistence is enabled, the updated namespace state is written to disk
190 | immediately after deletion
191 |
192 | Args:
193 | namespace: Logical grouping for related state data (e.g., "tools", "user_settings")
194 | key: Unique identifier within the namespace for the data to delete
195 |
196 | Notes:
197 | - Acquiring the namespace lock is an async operation and may block if another
198 | operation is currently accessing the same namespace
199 | - If persistence is enabled, this method performs disk I/O when persisting
200 | the updated namespace after deletion
201 | - This method does not raise an exception if the key doesn't exist in the namespace
202 | """
203 | async with self._get_lock(namespace):
204 | if namespace in self._in_memory_store and key in self._in_memory_store[namespace]:
205 | del self._in_memory_store[namespace][key]
206 |
207 | # Persist the change if enabled
208 | if self._persistence_dir:
209 | await self._persist_namespace(namespace)
210 |
211 | async def _persist_namespace(self, namespace: str) -> None:
212 | """
213 | Persist a namespace's data to disk as a pickle file.
214 |
215 | This private method handles the actual disk I/O for saving state data. It serializes
216 | the entire namespace dictionary to a pickle file named after the namespace in the
217 | configured persistence directory.
218 |
219 | Args:
220 | namespace: Name of the namespace whose data should be persisted
221 |
222 | Notes:
223 | - This method is a no-op if persistence_dir is not configured
224 | - Uses aiofiles for non-blocking async file I/O
225 | - The file is named "{namespace}.pickle" and stored in the persistence_dir
226 | - The entire namespace is serialized in a single operation, which may be
227 | inefficient for very large namespaces
228 | - This method is called internally by set() and delete() methods
229 | after modifying namespace data
230 | """
231 | if not self._persistence_dir:
232 | return
233 |
234 | file_path = os.path.join(self._persistence_dir, f"{namespace}.pickle")
235 | async with aiofiles.open(file_path, 'wb') as f:
236 | await f.write(pickle.dumps(self._in_memory_store[namespace]))
237 |
238 | async def _load_namespace(self, namespace: str) -> None:
239 | """
240 | Load a namespace's data from disk into memory.
241 |
242 | This private method handles loading serialized state data from disk into the in-memory store.
243 | It is called automatically by the get() method when a namespace is requested but not yet
244 | loaded in memory. The method implements the lazy-loading pattern, only reading from disk
245 | when necessary.
246 |
247 | The loading process follows these steps:
248 | 1. Check if persistence is enabled; if not, initialize an empty namespace dictionary
249 | 2. Locate the pickle file for the namespace (named "{namespace}.pickle")
250 | 3. If the file doesn't exist, initialize an empty namespace dictionary
251 | 4. If the file exists, read and deserialize it using pickle
252 | 5. Handle potential serialization errors gracefully (corrupted files, version mismatches)
253 |
254 | Args:
255 | namespace: Name of the namespace whose data should be loaded. This corresponds
256 | directly to a "{namespace}.pickle" file in the persistence directory.
257 |
258 | Returns:
259 | None: The method modifies the internal self._in_memory_store dictionary directly.
260 |
261 | Notes:
262 | - Uses aiofiles for non-blocking async file I/O
263 | - In case of corrupt data (pickle errors), the namespace is initialized as empty
264 | rather than raising exceptions to the caller
265 | - Example of file path: /path/to/persistence_dir/user_settings.pickle for the
266 | "user_settings" namespace
267 | - This method is idempotent - calling it multiple times for the same namespace
268 | has no additional effect after the first call
269 |
270 | Examples:
271 | ```python
272 | # This method is called internally by get(), not typically called directly
273 | store = StateStore(persistence_dir="./state")
274 |
275 | # When this executes, _load_namespace("user_settings") will be called internally
276 | # if the namespace is not already in memory
277 | value = await store.get("user_settings", "theme")
278 | ```
279 | """
280 | if not self._persistence_dir:
281 | self._in_memory_store[namespace] = {}
282 | return
283 |
284 | file_path = os.path.join(self._persistence_dir, f"{namespace}.pickle")
285 | if not os.path.exists(file_path):
286 | self._in_memory_store[namespace] = {}
287 | return
288 |
289 | try:
290 | async with aiofiles.open(file_path, 'rb') as f:
291 | data = await f.read()
292 | self._in_memory_store[namespace] = pickle.loads(data)
293 | except (pickle.PickleError, EOFError):
294 | # Handle corrupt data
295 | self._in_memory_store[namespace] = {}
```
--------------------------------------------------------------------------------
/resource_annotations.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Resource annotations for Model Control Protocol (MCP) systems.
3 |
4 | This module implements the resource annotation system specified in the MCP protocol,
5 | which enables AI systems to make intelligent decisions about how to process, prioritize,
6 | and present different types of resources in multi-modal and multi-resource contexts.
7 |
8 | Resource annotations serve multiple critical functions in AI/LLM systems:
9 |
10 | 1. PRIORITIZATION: Help AI systems allocate attention optimally among multiple resources
11 | when token constraints prevent processing everything (e.g., which document to focus on)
12 |
13 | 2. VISIBILITY CONTROL: Determine which resources should be visible to different actors
14 | in the system (e.g., assistant-only resources vs. user-facing resources)
15 |
16 | 3. FORMAT PRESERVATION: Indicate when resources have structured formats that should be
17 | maintained (e.g., code, tables, JSON) rather than freely interpreted
18 |
19 | 4. CHUNKING GUIDANCE: Provide hints about how to divide large resources efficiently
20 | for processing within context window constraints
21 |
22 | The module provides:
23 | - The ResourceAnnotations class for creating annotation metadata
24 | - Pre-defined annotation templates for common resource types
25 | - Utilities for working with annotated resources (e.g., chunking)
26 |
27 | Usage example:
28 | ```python
29 | # Create custom annotations for a research paper
30 | paper_annotations = ResourceAnnotations(
31 | priority=0.8,
32 | audience=["assistant"],
33 | chunking_recommended=True,
34 | description="Research paper on quantum computing effects"
35 | )
36 |
37 | # Annotate and chunk a large document
38 | paper_content = open("quantum_paper.txt").read()
39 | chunks = format_chunked_content(paper_content, chunk_size=3000)
40 |
41 | # Use a predefined annotation template for code
42 | code_resource = {
43 | "content": "def calculate_entropy(data):\\n ...",
44 | "annotations": CODE_RESOURCE.to_dict()
45 | }
46 | ```
47 |
48 | These annotations integrate with the MCP protocol to help LLMs process resources
49 | more intelligently and efficiently in complex, multi-resource scenarios.
50 | """
51 | from typing import List, Optional
52 |
53 |
54 | class ResourceAnnotations:
55 | """
56 | Annotations that guide LLMs in handling and prioritizing resources within the MCP protocol.
57 |
58 | ResourceAnnotations provide crucial metadata that helps LLMs make intelligent decisions about:
59 | - IMPORTANCE: How critical a resource is to the current task (via priority)
60 | - AUDIENCE: Who should see or interact with the resource
61 | - FORMATTING: How the resource should be rendered or processed
62 | - CHUNKING: Whether and how to divide large resources into manageable pieces
63 |
64 | These annotations serve multiple purposes in the MCP ecosystem:
65 | 1. Help LLMs prioritize which resources to analyze first when multiple are available
66 | 2. Control visibility of resources between assistants and users
67 | 3. Preserve structural integrity of formatted content (code, tables, etc.)
68 | 4. Provide chunking guidance for efficient processing of large resources
69 |
70 | When resources are annotated appropriately, LLMs can make better decisions about:
71 | - Which resources deserve the most attention in token-constrained contexts
72 | - When to preserve formatting vs. when content structure is less important
73 | - How to efficiently process large documents while maintaining context
74 | - Whether certain resources are meant for the assistant's understanding only
75 |
76 | Usage example:
77 | ```python
78 | # For a source code file that should preserve formatting
79 | code_annotations = ResourceAnnotations(
80 | priority=0.8, # High importance
81 | audience=["assistant"], # Only the assistant needs to see this
82 | structured_format=True, # Preserve code formatting
83 | chunking_recommended=True, # Chunk if large
84 | max_recommended_chunk_size=2000,
85 | description="Python source code implementing the core algorithm"
86 | )
87 |
88 | # Apply annotations to a resource
89 | resource = {
90 | "id": "algorithm.py",
91 | "content": "def calculate(x, y):\n return x + y",
92 | "annotations": code_annotations.to_dict()
93 | }
94 | ```
95 | """
96 |
97 | def __init__(
98 | self,
99 | priority: float = 0.5,
100 | audience: List[str] = None,
101 | structured_format: bool = False,
102 | chunking_recommended: bool = False,
103 | max_recommended_chunk_size: Optional[int] = None,
104 | description: Optional[str] = None
105 | ):
106 | """
107 | Initialize resource annotations.
108 |
109 | Args:
110 | priority: How important this resource is (0.0-1.0, higher is more important).
111 | 0.0 = entirely optional, 1.0 = effectively required.
112 | Affects how much attention an LLM should give this resource when multiple
113 | resources are available but context limits prevent using all of them.
114 | Default: 0.5 (medium importance)
115 |
116 | audience: Who should see this resource, as a list of roles:
117 | - "assistant": The AI assistant should process this resource
118 | - "user": The human user should see this resource
119 | Both can be specified for resources relevant to both parties.
120 | Default: ["assistant"] (assistant-only)
121 |
122 | structured_format: Whether this resource has a structured format that
123 | should be preserved (e.g., code, JSON, tables). When True, the LLM should
124 | maintain the exact formatting, indentation, and structure of the content.
125 | Default: False
126 |
127 | chunking_recommended: Whether this resource should be chunked if large.
128 | Setting this to True signals that the content is suitable for being
129 | divided into smaller pieces for processing (e.g., long documents).
130 | Default: False
131 |
132 | max_recommended_chunk_size: Maximum recommended chunk size in characters.
133 | Provides guidance on how large each chunk should be if chunking is applied.
134 | Default: None (no specific recommendation)
135 |
136 | description: Optional description of the resource that provides context
137 | about its purpose, content, or importance.
138 | """
139 | self.priority = max(0.0, min(1.0, priority)) # Clamp between 0 and 1
140 | self.audience = audience or ["assistant"]
141 | self.structured_format = structured_format
142 | self.chunking_recommended = chunking_recommended
143 | self.max_recommended_chunk_size = max_recommended_chunk_size
144 | self.description = description
145 |
146 | def to_dict(self) -> dict:
147 | """Convert annotations to dictionary for MCP protocol."""
148 | result = {
149 | "priority": self.priority,
150 | "audience": self.audience
151 | }
152 |
153 | # Add extended properties
154 | if self.description:
155 | result["description"] = self.description
156 |
157 | # Add chunking metadata if recommended
158 | if self.chunking_recommended:
159 | result["chunking"] = {
160 | "recommended": True
161 | }
162 | if self.max_recommended_chunk_size:
163 | result["chunking"]["maxSize"] = self.max_recommended_chunk_size
164 |
165 | # Add format information
166 | if self.structured_format:
167 | result["format"] = {
168 | "structured": True
169 | }
170 |
171 | return result
172 |
173 |
174 | # Pre-defined annotation templates for common resource types
175 |
176 | # For critical resources that need immediate attention
177 | # Use for resources essential to the current task's success
178 | # Examples: Primary task instructions, critical context documents
179 | HIGH_PRIORITY_RESOURCE = ResourceAnnotations(
180 | priority=0.9,
181 | audience=["assistant", "user"],
182 | description="Critical resource that should be prioritized"
183 | )
184 |
185 | # For source code and programming-related content
186 | # Preserves indentation, formatting, and structure
187 | # Recommends chunking for large codebases
188 | # Examples: Source files, configuration files, scripts
189 | CODE_RESOURCE = ResourceAnnotations(
190 | priority=0.8,
191 | audience=["assistant"],
192 | structured_format=True,
193 | chunking_recommended=True,
194 | max_recommended_chunk_size=2000,
195 | description="Source code that should preserve formatting"
196 | )
197 |
198 | # For lengthy text resources that should be divided into smaller parts
199 | # Good for processing long documents without overwhelming context windows
200 | # Examples: Articles, documentation, books, long explanations
201 | LARGE_TEXT_RESOURCE = ResourceAnnotations(
202 | priority=0.6,
203 | audience=["assistant"],
204 | chunking_recommended=True,
205 | max_recommended_chunk_size=4000,
206 | description="Large text that should be chunked for processing"
207 | )
208 |
209 | # For data formats where structure is important
210 | # Preserves formatting but doesn't automatically suggest chunking
211 | # Examples: JSON data, database records, tabular data, XML
212 | STRUCTURED_DATA_RESOURCE = ResourceAnnotations(
213 | priority=0.7,
214 | audience=["assistant"],
215 | structured_format=True,
216 | description="Structured data like JSON or tables"
217 | )
218 |
219 | # For supplementary information that provides additional context
220 | # Low priority indicates it can be skipped if context is limited
221 | # Examples: Background information, history, tangential details
222 | OPTIONAL_RESOURCE = ResourceAnnotations(
223 | priority=0.2,
224 | audience=["assistant"],
225 | description="Supplementary information that isn't critical"
226 | )
227 |
228 | # For content meant to be shown to the user directly
229 | # Not intended for assistant's processing (assistant not in audience)
230 | # Examples: Final results, generated content, presentations
231 | USER_FACING_RESOURCE = ResourceAnnotations(
232 | priority=0.7,
233 | audience=["user"],
234 | description="Resource meant for user consumption"
235 | )
236 |
237 |
238 | def format_chunked_content(content: str, chunk_size: int = 4000, overlap: int = 200) -> List[dict]:
239 | """
240 | Format content into overlapping chunks with rich metadata for efficient LLM processing.
241 |
242 | This utility function implements a sliding window approach to divide large content
243 | into manageable, context-aware chunks. Each chunk is annotated with detailed positioning
244 | metadata, allowing LLMs to understand the chunk's relationship to the overall content
245 | and maintain coherence across chunk boundaries.
246 |
247 | Key features:
248 | - Consistent overlap between chunks preserves context and prevents information loss
249 | - Automatic metadata generation provides LLMs with crucial positioning information
250 | - Standard annotation format compatible with the MCP resource protocol
251 | - Configurable chunk size to adapt to different model context window limitations
252 |
253 | The overlap between chunks is particularly important as it helps LLMs maintain
254 | coherence when processing information that spans chunk boundaries. Without overlap,
255 | context might be lost at chunk transitions, leading to degraded performance on tasks
256 | that require understanding the full content.
257 |
258 | Args:
259 | content: The source text content to be chunked. This can be any string content
260 | like a document, article, code file, or other text-based resource.
261 | chunk_size: Maximum size of each chunk in characters (default: 4000).
262 | This should be set based on the target LLM's context window limitations,
263 | typically 25-50% less than the model's maximum to allow room for prompts.
264 | overlap: Number of characters to overlap between consecutive chunks (default: 200).
265 | Larger overlap values provide more context continuity between chunks but
266 | increase redundancy and total token usage.
267 |
268 | Returns:
269 | List of dictionaries, each representing a content chunk with metadata:
270 | - "text": The actual chunk content (substring of the original content)
271 | - "annotations": Metadata dictionary containing:
272 | - priority: Importance hint for the LLM (default: 0.7)
273 | - audience: Who should see this chunk (default: ["assistant"])
274 | - chunk_info: Detailed positioning metadata including:
275 | - index: Zero-based index of this chunk in the sequence
276 | - total_chunks: Total number of chunks in the complete content
277 | - start_position: Character offset where this chunk begins in the original content
278 | - end_position: Character offset where this chunk ends in the original content
279 | - has_more: Boolean indicating if more chunks follow this one
280 |
281 | Usage examples:
282 | # Basic usage with default parameters
283 | chunks = format_chunked_content("Long document text...")
284 |
285 | # Using smaller chunks for models with limited context windows
286 | small_chunks = format_chunked_content(
287 | content="Large article text...",
288 | chunk_size=1000,
289 | overlap=100
290 | )
291 |
292 | # Process chunks sequentially while maintaining context
293 | for chunk in chunks:
294 | response = await generate_completion(
295 | prompt=f"Analyze this text: {chunk['text']}",
296 | # Include chunk metadata so the LLM understands context
297 | additional_context=f"This is chunk {chunk['annotations']['chunk_info']['index']+1} "
298 | f"of {chunk['annotations']['chunk_info']['total_chunks']}"
299 | )
300 | """
301 | chunks = []
302 |
303 | # Create chunks with overlap
304 | for i in range(0, len(content), chunk_size - overlap):
305 | chunk_text = content[i:i + chunk_size]
306 | if chunk_text:
307 | # Create chunk with annotations
308 | chunk = {
309 | "text": chunk_text,
310 | "annotations": {
311 | "priority": 0.7,
312 | "audience": ["assistant"],
313 | "chunk_info": {
314 | "index": len(chunks),
315 | "total_chunks": (len(content) + chunk_size - 1) // (chunk_size - overlap),
316 | "start_position": i,
317 | "end_position": min(i + chunk_size, len(content)),
318 | "has_more": i + chunk_size < len(content)
319 | }
320 | }
321 | }
322 | chunks.append(chunk)
323 |
324 | return chunks
```
--------------------------------------------------------------------------------
/ultimate_mcp_server/tools/__init__.py:
--------------------------------------------------------------------------------
```python
1 | """MCP Tools for Ultimate MCP Server."""
2 |
3 | import inspect
4 | import sys
5 | from typing import Any, Dict
6 |
7 | from ultimate_mcp_server.tools.base import (
8 | BaseTool, # Keep BaseTool in case other modules use it
9 | register_tool,
10 | with_error_handling,
11 | with_retry,
12 | with_tool_metrics,
13 | )
14 | from ultimate_mcp_server.utils import get_logger
15 |
16 | # from .audio_transcription import (
17 | # chat_with_transcript,
18 | # extract_audio_transcript_key_points,
19 | # transcribe_audio,
20 | # )
21 | # Import base decorators/classes that might be used by other tool modules
22 | from .completion import chat_completion, generate_completion, multi_completion, stream_completion
23 | from .document_conversion_and_processing import (
24 | analyze_pdf_structure,
25 | batch_format_texts,
26 | canonicalise_entities,
27 | chunk_document,
28 | clean_and_format_text_as_markdown,
29 | convert_document,
30 | detect_content_type,
31 | enhance_ocr_text,
32 | extract_entities,
33 | extract_metrics,
34 | extract_tables,
35 | flag_risks,
36 | generate_qa_pairs,
37 | identify_sections,
38 | ocr_image,
39 | optimize_markdown_formatting,
40 | process_document_batch,
41 | summarize_document,
42 | )
43 |
44 | # from .docstring_refiner import refine_tool_documentation
45 | # from .entity_relation_graph import extract_entity_graph
46 | # from .extraction import (
47 | # extract_code_from_response,
48 | # extract_json,
49 | # extract_key_value_pairs,
50 | # extract_semantic_schema,
51 | # extract_table,
52 | # )
53 | from .filesystem import (
54 | create_directory,
55 | directory_tree,
56 | edit_file,
57 | get_file_info,
58 | get_unique_filepath,
59 | list_allowed_directories,
60 | list_directory,
61 | move_file,
62 | read_file,
63 | read_multiple_files,
64 | search_files,
65 | write_file,
66 | )
67 | from .local_text_tools import (
68 | get_workspace_dir,
69 | run_awk,
70 | run_awk_stream,
71 | run_jq,
72 | run_jq_stream,
73 | run_ripgrep,
74 | run_ripgrep_stream,
75 | run_sed,
76 | run_sed_stream,
77 | )
78 |
79 | # from .marqo_fused_search import marqo_fused_search
80 | # from .meta_api_tool import register_api_meta_tools
81 | from .optimization import (
82 | compare_models,
83 | estimate_cost,
84 | execute_optimized_workflow,
85 | recommend_model,
86 | )
87 | from .provider import get_provider_status, list_models
88 | from .python_sandbox import (
89 | execute_python,
90 | repl_python,
91 | )
92 |
93 | # from .rag import (
94 | # add_documents,
95 | # create_knowledge_base,
96 | # delete_knowledge_base,
97 | # generate_with_rag,
98 | # list_knowledge_bases,
99 | # retrieve_context,
100 | # )
101 | from .sentiment_analysis import analyze_business_sentiment, analyze_business_text_batch
102 |
103 | # from .single_shot_synthesis import single_shot_synthesis
104 | from .smart_browser import (
105 | autopilot,
106 | browse,
107 | click,
108 | collect_documentation,
109 | download,
110 | download_site_pdfs,
111 | parallel,
112 | run_macro,
113 | search,
114 | type_text,
115 | )
116 |
117 | # from .sql_databases import access_audit_log, execute_sql, explore_database, manage_database
118 | # from .text_classification import text_classification
119 | # from .text_redline_tools import (
120 | # compare_documents_redline,
121 | # create_html_redline,
122 | # )
123 | # from .tournament import (
124 | # cancel_tournament,
125 | # create_tournament,
126 | # get_tournament_results,
127 | # get_tournament_status,
128 | # list_tournaments,
129 | # )
130 | from .unified_memory_system import (
131 | add_tag_to_memory,
132 | consolidate_memories,
133 | create_embedding,
134 | create_goal,
135 | create_memory_link,
136 | create_workflow,
137 | decay_link_strengths,
138 | diagnose_file_access_issues,
139 | focus_memory,
140 | generate_reflection,
141 | generate_workflow_report,
142 | get_artifact_by_id,
143 | get_artifacts,
144 | get_contradictions,
145 | get_embedding,
146 | get_goal_details,
147 | get_linked_memories,
148 | get_memory_by_id,
149 | get_memory_metadata,
150 | get_memory_tags,
151 | get_recent_actions,
152 | get_rich_context_package,
153 | get_similar_memories,
154 | get_subgraph,
155 | get_thought_chain,
156 | get_workflow_details,
157 | get_workflow_metadata,
158 | get_working_memory,
159 | hybrid_search_memories,
160 | load_cognitive_state,
161 | optimize_working_memory,
162 | promote_memory_level,
163 | query_goals,
164 | query_memories,
165 | record_action_completion,
166 | record_action_start,
167 | record_artifact,
168 | save_cognitive_state,
169 | store_memory,
170 | update_goal_status,
171 | update_memory,
172 | update_memory_link_metadata,
173 | update_memory_metadata,
174 | vector_similarity,
175 | )
176 |
177 | __all__ = [
178 | # Base decorators/classes
179 | "BaseTool",
180 | "with_tool_metrics",
181 | "with_retry",
182 | "with_error_handling",
183 | "register_tool",
184 |
185 | # LLM Completion tools
186 | "generate_completion",
187 | "stream_completion",
188 | "chat_completion",
189 | "multi_completion",
190 | "get_provider_status",
191 | "list_models",
192 |
193 | # Extraction tools
194 | # "extract_json",
195 | # "extract_table",
196 | # "extract_key_value_pairs",
197 | # "extract_semantic_schema",
198 | # "extract_entity_graph",
199 | # "extract_code_from_response",
200 |
201 | # Knowledge base tools
202 | # "create_knowledge_base",
203 | # "list_knowledge_bases",
204 | # "delete_knowledge_base",
205 | # "add_documents",
206 | # "retrieve_context",
207 | # "generate_with_rag",
208 | # "text_classification",
209 |
210 | # Cost optimization tools
211 | "estimate_cost",
212 | "compare_models",
213 | "recommend_model",
214 | "execute_optimized_workflow",
215 | "refine_tool_documentation",
216 |
217 | # Filesystem tools
218 | "read_file",
219 | "read_multiple_files",
220 | "write_file",
221 | "edit_file",
222 | "create_directory",
223 | "list_directory",
224 | "directory_tree",
225 | "move_file",
226 | "search_files",
227 | "get_file_info",
228 | "list_allowed_directories",
229 | "get_unique_filepath",
230 |
231 | # Local Text Tools
232 | "run_ripgrep",
233 | "run_awk",
234 | "run_sed",
235 | "run_jq",
236 | "run_ripgrep_stream",
237 | "run_awk_stream",
238 | "run_sed_stream",
239 | "run_jq_stream",
240 | "get_workspace_dir",
241 |
242 | # SQL databases tools
243 | # "manage_database",
244 | # "execute_sql",
245 | # "explore_database",
246 | # "access_audit_log",
247 |
248 | # Python sandbox tools
249 | "execute_python",
250 | "repl_python",
251 |
252 | # Smart Browser Standalone Functions
253 | "click",
254 | "browse",
255 | "type_text",
256 | "search",
257 | "download",
258 | "download_site_pdfs",
259 | "collect_documentation",
260 | "parallel",
261 | "run_macro",
262 | "autopilot",
263 |
264 | # Document conversion and processing tools
265 | "convert_document",
266 | "chunk_document",
267 | "clean_and_format_text_as_markdown",
268 | "detect_content_type",
269 | "batch_format_texts",
270 | "optimize_markdown_formatting",
271 | "identify_sections",
272 | "generate_qa_pairs",
273 | "summarize_document",
274 | "extract_metrics",
275 | "flag_risks",
276 | "canonicalise_entities",
277 | "ocr_image",
278 | "enhance_ocr_text",
279 | "analyze_pdf_structure",
280 | "process_document_batch",
281 | "extract_entities",
282 | "extract_tables",
283 |
284 | # Text Redline tools
285 | # "compare_documents_redline",
286 | # "create_html_redline",
287 |
288 | # Meta API tools
289 | # "register_api_meta_tools",
290 |
291 | # Marqo tool
292 | # "marqo_fused_search",
293 |
294 | # Tournament tools
295 | # "create_tournament",
296 | # "get_tournament_status",
297 | # "list_tournaments",
298 | # "get_tournament_results",
299 | # "cancel_tournament",
300 |
301 | # Audio tools
302 | # "transcribe_audio",
303 | # "extract_audio_transcript_key_points",
304 | # "chat_with_transcript",
305 |
306 | # Sentiment analysis tool
307 | "analyze_business_sentiment",
308 | "analyze_business_text_batch",
309 |
310 | # Unified Memory System tools
311 | "create_workflow",
312 | "get_workflow_details",
313 | "record_action_start",
314 | "record_action_completion",
315 | "get_recent_actions",
316 | "get_thought_chain",
317 | "store_memory",
318 | "get_memory_by_id",
319 | "get_memory_metadata",
320 | "get_memory_tags",
321 | "update_memory_metadata",
322 | "update_memory_link_metadata",
323 | "create_memory_link",
324 | "get_workflow_metadata",
325 | "get_contradictions",
326 | "query_memories",
327 | "update_memory",
328 | "get_linked_memories",
329 | "add_tag_to_memory",
330 | "create_embedding",
331 | "get_embedding",
332 | "get_working_memory",
333 | "focus_memory",
334 | "optimize_working_memory",
335 | "promote_memory_level",
336 | "save_cognitive_state",
337 | "load_cognitive_state",
338 | "decay_link_strengths",
339 | "generate_reflection",
340 | "get_rich_context_package",
341 | "get_goal_details",
342 | "create_goal",
343 | "update_goal_status",
344 | "vector_similarity",
345 | "record_artifact",
346 | "get_artifacts",
347 | "get_artifact_by_id",
348 | "get_similar_memories",
349 | "query_goals",
350 | "consolidate_memories",
351 | "diagnose_file_access_issues",
352 | "generate_workflow_report",
353 | "hybrid_search_memories",
354 | "get_subgraph",
355 | ]
356 |
357 | logger = get_logger("ultimate_mcp_server.tools")
358 |
359 |
360 | # --- Tool Registration ---
361 |
362 | # Generate STANDALONE_TOOL_FUNCTIONS by filtering __all__ for actual function objects
363 | # This eliminates the redundancy between __all__ and STANDALONE_TOOL_FUNCTIONS
364 | def _get_standalone_tool_functions():
365 | """Dynamically generates list of standalone tool functions from __all__."""
366 | current_module = sys.modules[__name__]
367 | standalone_functions = []
368 |
369 | for item_name in __all__:
370 | if item_name in ["BaseTool", "with_tool_metrics", "with_retry",
371 | "with_error_handling", "register_tool"]:
372 | # Skip base classes and decorators
373 | continue
374 |
375 | # Get the actual item from the module
376 | item = getattr(current_module, item_name, None)
377 |
378 | # Only include callable async functions (not classes or other exports)
379 | if callable(item) and inspect.iscoroutinefunction(item):
380 | standalone_functions.append(item)
381 |
382 | return standalone_functions
383 |
384 | # Get the list of standalone functions to register
385 | STANDALONE_TOOL_FUNCTIONS = _get_standalone_tool_functions()
386 |
387 |
388 | def register_all_tools(mcp_server) -> Dict[str, Any]:
389 | """Registers all tools (standalone and class-based) with the MCP server.
390 |
391 | Args:
392 | mcp_server: The MCP server instance.
393 |
394 | Returns:
395 | Dictionary containing information about registered tools.
396 | """
397 | from ultimate_mcp_server.config import get_config
398 | cfg = get_config()
399 | filter_enabled = cfg.tool_registration.filter_enabled
400 | included_tools = cfg.tool_registration.included_tools
401 | excluded_tools = cfg.tool_registration.excluded_tools
402 |
403 | logger.info("Registering tools based on configuration...")
404 | if filter_enabled:
405 | if included_tools:
406 | logger.info(f"Tool filtering enabled: including only {len(included_tools)} specified tools")
407 | if excluded_tools:
408 | logger.info(f"Tool filtering enabled: excluding {len(excluded_tools)} specified tools")
409 |
410 | registered_tools: Dict[str, Any] = {}
411 |
412 | # --- Register Standalone Functions ---
413 | standalone_count = 0
414 | for tool_func in STANDALONE_TOOL_FUNCTIONS:
415 | if not callable(tool_func) or not inspect.iscoroutinefunction(tool_func):
416 | logger.warning(f"Item {getattr(tool_func, '__name__', repr(tool_func))} in STANDALONE_TOOL_FUNCTIONS is not a callable async function.")
417 | continue
418 |
419 | tool_name = tool_func.__name__
420 |
421 | # Apply tool filtering logic
422 | if filter_enabled:
423 | # Skip if not in included_tools when included_tools is specified
424 | if included_tools and tool_name not in included_tools:
425 | logger.debug(f"Skipping tool {tool_name} (not in included_tools)")
426 | continue
427 |
428 | # Skip if in excluded_tools
429 | if tool_name in excluded_tools:
430 | logger.debug(f"Skipping tool {tool_name} (in excluded_tools)")
431 | continue
432 |
433 | # Register the tool
434 | mcp_server.tool(name=tool_name)(tool_func)
435 | registered_tools[tool_name] = {
436 | "description": inspect.getdoc(tool_func) or "",
437 | "type": "standalone_function"
438 | }
439 | logger.info(f"Registered tool function: {tool_name}", emoji_key="⚙️")
440 | standalone_count += 1
441 |
442 |
443 | # --- Register Class-Based Tools ---
444 |
445 | # Register Meta API Tool
446 | if (not filter_enabled or
447 | "meta_api_tool" in included_tools or
448 | (not included_tools and "meta_api_tool" not in excluded_tools)):
449 | try:
450 | from ultimate_mcp_server.tools.meta_api_tool import register_api_meta_tools
451 | register_api_meta_tools(mcp_server)
452 | logger.info("Registered API Meta-Tool functions", emoji_key="⚙️")
453 | standalone_count += 1
454 | except ImportError:
455 | logger.warning("Meta API tools not found (ultimate_mcp_server.tools.meta_api_tool)")
456 | except Exception as e:
457 | logger.error(f"Failed to register Meta API tools: {e}", exc_info=True)
458 |
459 | # Register Excel Spreadsheet Automation Tool
460 | if (not filter_enabled or
461 | "excel_spreadsheet_automation" in included_tools or
462 | (not included_tools and "excel_spreadsheet_automation" not in excluded_tools)):
463 | try:
464 | from ultimate_mcp_server.tools.excel_spreadsheet_automation import (
465 | WINDOWS_EXCEL_AVAILABLE,
466 | register_excel_spreadsheet_tools,
467 | )
468 | if WINDOWS_EXCEL_AVAILABLE:
469 | register_excel_spreadsheet_tools(mcp_server)
470 | logger.info("Registered Excel spreadsheet tools", emoji_key="⚙️")
471 | standalone_count += 1
472 | else:
473 | # Automatically exclude Excel tools if not available
474 | logger.warning("Excel automation tools are only available on Windows with Excel installed. These tools will not be registered.")
475 | # If not already explicitly excluded, add to excluded_tools
476 | if "excel_spreadsheet_automation" not in excluded_tools:
477 | if not cfg.tool_registration.filter_enabled:
478 | cfg.tool_registration.filter_enabled = True
479 | if not hasattr(cfg.tool_registration, "excluded_tools"):
480 | cfg.tool_registration.excluded_tools = []
481 | cfg.tool_registration.excluded_tools.append("excel_spreadsheet_automation")
482 | except ImportError:
483 | logger.warning("Excel spreadsheet tools not found (ultimate_mcp_server.tools.excel_spreadsheet_automation)")
484 | except Exception as e:
485 | logger.error(f"Failed to register Excel spreadsheet tools: {e}", exc_info=True)
486 |
487 | logger.info(
488 | f"Completed tool registration. Registered {standalone_count} tools.",
489 | emoji_key="⚙️"
490 | )
491 |
492 | # Return info about registered tools
493 | return registered_tools
494 |
```
--------------------------------------------------------------------------------
/examples/advanced_vector_search_demo.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python
2 | """Demo of advanced vector search capabilities using real Ultimate MCP Server tools."""
3 | import asyncio
4 | import sys
5 | import time
6 | from pathlib import Path
7 |
8 | # Add project root to path for imports when running as script
9 | sys.path.insert(0, str(Path(__file__).parent.parent))
10 |
11 | from rich.markup import escape
12 | from rich.rule import Rule
13 |
14 | from ultimate_mcp_server.core.server import Gateway
15 | from ultimate_mcp_server.services.vector import get_vector_db_service
16 | from ultimate_mcp_server.services.vector.embeddings import cosine_similarity, get_embedding_service
17 |
18 | # --- Add Marqo Tool Import ---
19 | from ultimate_mcp_server.tools.marqo_fused_search import marqo_fused_search
20 | from ultimate_mcp_server.utils import get_logger
21 | from ultimate_mcp_server.utils.display import (
22 | display_embedding_generation_results,
23 | display_text_content_result,
24 | display_vector_similarity_results,
25 | parse_and_display_result,
26 | )
27 |
28 | # ---------------------------
29 | # --- Add Rich Imports ---
30 | from ultimate_mcp_server.utils.logging.console import console
31 |
32 | # ----------------------
33 |
34 | # Initialize logger
35 | logger = get_logger("example.advanced_vector_search")
36 |
37 | # Initialize global gateway
38 | gateway = None
39 | vector_service = None
40 | embedding_service = None
41 |
42 | async def setup_services():
43 | """Set up the gateway and vector service for demonstration."""
44 | global gateway, vector_service, embedding_service
45 |
46 | logger.info("Initializing gateway and services...", emoji_key="start")
47 | gateway = Gateway("vector-demo", register_tools=False)
48 | await gateway._initialize_providers()
49 |
50 | embedding_service = get_embedding_service() # Gateway will provide API keys through provider system
51 | vector_service = get_vector_db_service()
52 |
53 | logger.success("Services initialized.", emoji_key="success")
54 |
55 |
56 | async def embedding_generation_demo():
57 | """Demonstrate embedding generation with real providers using Rich."""
58 | console.print(Rule("[bold blue]Embedding Generation Demo[/bold blue]"))
59 | logger.info("Starting embedding generation demo", emoji_key="start")
60 |
61 | text_samples = [
62 | "Quantum computing leverages quantum mechanics to perform computations",
63 | "Artificial intelligence systems can learn from data and improve over time",
64 | "Cloud infrastructure enables scalable and flexible computing resources"
65 | ]
66 | console.print("Input Text Samples:")
67 | for i, sample in enumerate(text_samples):
68 | console.print(f" {i+1}. {escape(sample)}")
69 |
70 | # Define models to test (ensure they are supported by your embedding_service config)
71 | models_to_test = [
72 | "text-embedding-3-small",
73 | "text-embedding-3-large",
74 | "text-embedding-ada-002"
75 | ]
76 |
77 | # Collect results for display
78 | results_data = {"models": []}
79 |
80 | for model_name in models_to_test:
81 | try:
82 | logger.info(f"Generating embeddings with {model_name}...", emoji_key="processing")
83 | start_time = time.time()
84 | embeddings = await embedding_service.create_embeddings(
85 | texts=text_samples
86 | )
87 | processing_time = time.time() - start_time
88 |
89 | model_result = {
90 | "name": model_name,
91 | "success": embeddings and len(embeddings) > 0,
92 | "time": processing_time,
93 | "cost": embedding_service.last_request_cost if hasattr(embedding_service, 'last_request_cost') else 0.0,
94 | }
95 |
96 | if embeddings and len(embeddings) > 0:
97 | dims = len(embeddings[0])
98 | model_result["dimensions"] = dims
99 | model_result["embedding_sample"] = embeddings[0][:3]
100 | logger.success(f"Generated {len(embeddings)} embeddings ({dims} dims) for {model_name}", emoji_key="success")
101 | else:
102 | logger.warning(f"No embeddings returned for {model_name}", emoji_key="warning")
103 |
104 | results_data["models"].append(model_result)
105 |
106 | except Exception as e:
107 | logger.error(f"Error generating embeddings with {model_name}: {e}", emoji_key="error", exc_info=True)
108 | results_data["models"].append({
109 | "name": model_name,
110 | "success": False,
111 | "error": str(e)
112 | })
113 |
114 | # Use the shared display utility to show results
115 | display_embedding_generation_results(results_data)
116 |
117 |
118 | async def vector_search_demo():
119 | """Demonstrate vector search capabilities using Rich."""
120 | console.print(Rule("[bold blue]Vector Search Demo[/bold blue]"))
121 | logger.info("Starting vector search demo", emoji_key="start")
122 |
123 | documents = [
124 | "Quantum computing uses quantum bits or qubits to perform calculations.",
125 | "Machine learning algorithms learn patterns from data without explicit programming.",
126 | "Blockchain technology creates a distributed and immutable ledger of transactions.",
127 | "Cloud computing delivers computing services over the internet on demand.",
128 | "Natural language processing helps computers understand and interpret human language.",
129 | "Artificial intelligence systems can simulate human intelligence in machines.",
130 | "Edge computing processes data closer to where it is generated rather than in a centralized location.",
131 | "Cybersecurity involves protecting systems from digital attacks and unauthorized access.",
132 | "Internet of Things (IoT) connects everyday devices to the internet for data sharing.",
133 | "Virtual reality creates an immersive computer-generated environment."
134 | ]
135 | document_metadata = [
136 | {"id": "doc1", "category": "quantum", "level": "advanced"},
137 | {"id": "doc2", "category": "ai", "level": "intermediate"},
138 | {"id": "doc3", "category": "blockchain", "level": "beginner"},
139 | {"id": "doc4", "category": "cloud", "level": "intermediate"},
140 | {"id": "doc5", "category": "ai", "level": "advanced"},
141 | {"id": "doc6", "category": "ai", "level": "beginner"},
142 | {"id": "doc7", "category": "cloud", "level": "advanced"},
143 | {"id": "doc8", "category": "security", "level": "intermediate"},
144 | {"id": "doc9", "category": "iot", "level": "beginner"},
145 | {"id": "doc10", "category": "vr", "level": "intermediate"}
146 | ]
147 |
148 | collection_name = "demo_vector_store_rich"
149 | embedding_dimension = 1536 # Default for text-embedding-ada-002 / 3-small
150 |
151 | try:
152 | logger.info(f"Creating/resetting collection: {collection_name}", emoji_key="db")
153 | await vector_service.create_collection(
154 | name=collection_name,
155 | dimension=embedding_dimension,
156 | overwrite=True,
157 | metadata={"description": "Demo collection for Rich vector search"}
158 | )
159 |
160 | logger.info("Adding documents to vector store...", emoji_key="processing")
161 | ids = await vector_service.add_texts(
162 | collection_name=collection_name,
163 | texts=documents,
164 | metadatas=document_metadata,
165 | batch_size=5
166 | )
167 | logger.success(f"Added {len(ids)} documents.", emoji_key="success")
168 |
169 | # --- Perform Searches ---
170 | search_queries = [
171 | "How does quantum computing work?",
172 | "Machine learning for image recognition",
173 | "Secure blockchain implementation"
174 | ]
175 |
176 | console.print(Rule("[green]Vector Search Results[/green]"))
177 | for query in search_queries:
178 | logger.info(f'Searching for: "{escape(query)}"...', emoji_key="search")
179 | search_start_time = time.time()
180 | results = await vector_service.search_by_text(
181 | collection_name=collection_name,
182 | query_text=query,
183 | top_k=3,
184 | include_vectors=False,
185 | # Example filter: metadata_filter={"category": "ai"}
186 | )
187 | search_time = time.time() - search_start_time
188 |
189 | # Format the results for the display utility
190 | search_result = {
191 | "processing_time": search_time,
192 | "results": results,
193 | "query": query
194 | }
195 |
196 | # Use the shared display utility
197 | parse_and_display_result(
198 | title=f"Search: {query}",
199 | input_data={"query": query},
200 | result=search_result
201 | )
202 |
203 | except Exception as e:
204 | logger.error(f"Error during vector search demo: {e}", emoji_key="error", exc_info=True)
205 | console.print(f"[bold red]Error:[/bold red] {escape(str(e))}")
206 | finally:
207 | # Clean up the collection
208 | try:
209 | logger.info(f"Deleting collection: {collection_name}", emoji_key="db")
210 | await vector_service.delete_collection(collection_name)
211 | except Exception as delete_err:
212 | logger.warning(f"Could not delete collection {collection_name}: {delete_err}", emoji_key="warning")
213 | console.print()
214 |
215 |
216 | async def hybrid_search_demo():
217 | """Demonstrate hybrid search using the marqo_fused_search tool."""
218 | console.print(Rule("[bold blue]Hybrid Search Demo (using Marqo Fused Search Tool)[/bold blue]"))
219 | logger.info("Starting hybrid search demo (conceptual)", emoji_key="start")
220 |
221 | # This demo uses the marqo_fused_search tool, which performs hybrid search.
222 | # It requires a running Marqo instance and a configured index
223 | # as defined in marqo_index_config.json.
224 |
225 | # Note: For this demo to work correctly, the configured Marqo index
226 | # should contain documents related to the query, potentially including
227 | # metadata fields like 'tags' if filtering is intended.
228 | # The setup below is removed as the data needs to be pre-indexed in Marqo.
229 | # collection_name = "demo_hybrid_store_rich"
230 | # try:
231 | # logger.info(f"Creating/resetting collection: {collection_name}", emoji_key="db")
232 | # # ... [Code to create collection and add documents would go here if using local DB] ...
233 | # except Exception as setup_e:
234 | # logger.error(f"Failed to setup data for hybrid demo: {setup_e}", emoji_key="error")
235 | # console.print(f"[bold red]Error setting up demo data: {escape(str(setup_e))}[/bold red]")
236 | # return
237 |
238 | try:
239 | # --- Perform Hybrid Search (Simulated) ---
240 | query = "cloud semantic search techniques"
241 | # keywords = ["cloud", "semantic"] # Keywords can be included in query or filters
242 | semantic_weight_param = 0.6 # Weight for semantic search (alpha)
243 |
244 | logger.info(f'Hybrid search for: "{escape(query)}" with semantic weight {semantic_weight_param}', emoji_key="search")
245 |
246 | # Call the marqo_fused_search tool directly
247 | hybrid_result = await marqo_fused_search(
248 | query=query,
249 | limit=3, # Request top 3 results
250 | semantic_weight=semantic_weight_param
251 | # Add filters={}, date_range=None etc. if needed based on schema
252 | )
253 |
254 | display_text_content_result(
255 | f"Hybrid Search Results (Weight={semantic_weight_param})",
256 | hybrid_result # Pass the result dict directly
257 | )
258 |
259 | except Exception as e:
260 | logger.error(f"Error during hybrid search demo: {e}", emoji_key="error", exc_info=True)
261 | console.print(f"[bold red]Error:[/bold red] {escape(str(e))}")
262 | # Removed cleanup as we assume Marqo index exists independently
263 | console.print()
264 |
265 | async def semantic_similarity_demo():
266 | """Demonstrate calculating semantic similarity using Rich."""
267 | console.print(Rule("[bold blue]Semantic Similarity Demo[/bold blue]"))
268 | logger.info("Starting semantic similarity demo", emoji_key="start")
269 |
270 | text_pairs = [
271 | ("The cat sat on the mat", "A feline was resting upon the rug"),
272 | ("AI is transforming industries", "Artificial intelligence drives innovation"),
273 | ("Cloud computing offers scalability", "The weather today is sunny")
274 | ]
275 |
276 | model_name = "text-embedding-ada-002" # Use a consistent model
277 | logger.info(f"Calculating similarity using model: {model_name}", emoji_key="model")
278 |
279 | # Prepare data structure for the shared display utility
280 | similarity_data = {
281 | "pairs": [],
282 | "model": model_name
283 | }
284 |
285 | try:
286 | all_texts = [text for pair in text_pairs for text in pair]
287 | embeddings = await embedding_service.create_embeddings(
288 | texts=all_texts
289 | )
290 |
291 | if len(embeddings) == len(all_texts):
292 | for i, pair in enumerate(text_pairs):
293 | idx1 = i * 2
294 | idx2 = i * 2 + 1
295 | score = cosine_similarity(embeddings[idx1], embeddings[idx2])
296 |
297 | similarity_data["pairs"].append({
298 | "text1": pair[0],
299 | "text2": pair[1],
300 | "score": score
301 | })
302 |
303 | # Use the specialized display function for similarity results
304 | display_vector_similarity_results(similarity_data)
305 | else:
306 | logger.error("Mismatch between number of texts and embeddings received.", emoji_key="error")
307 | console.print("[red]Error calculating similarities: Embedding count mismatch.[/red]")
308 |
309 | except Exception as e:
310 | logger.error(f"Error calculating semantic similarity: {e}", emoji_key="error", exc_info=True)
311 | console.print(f"[bold red]Error:[/bold red] {escape(str(e))}")
312 |
313 | console.print()
314 |
315 | async def main():
316 | """Run all advanced vector search demonstrations."""
317 | await setup_services()
318 | console.print(Rule("[bold magenta]Advanced Vector Search Demos Starting[/bold magenta]"))
319 |
320 | try:
321 | await embedding_generation_demo()
322 | await vector_search_demo()
323 | await hybrid_search_demo()
324 | await semantic_similarity_demo()
325 |
326 | except Exception as e:
327 | logger.critical(f"Vector search demo failed: {str(e)}", emoji_key="critical", exc_info=True)
328 | console.print(f"[bold red]Critical Demo Error:[/bold red] {escape(str(e))}")
329 | return 1
330 |
331 | logger.success("Advanced Vector Search Demos Finished Successfully!", emoji_key="complete")
332 | console.print(Rule("[bold magenta]Advanced Vector Search Demos Complete[/bold magenta]"))
333 | return 0
334 |
335 |
336 | if __name__ == "__main__":
337 | exit_code = asyncio.run(main())
338 | sys.exit(exit_code)
```
--------------------------------------------------------------------------------
/test_client.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 | """
3 | Simple test client for Ultimate MCP Server
4 | Tests basic functionality over streamable-HTTP transport
5 | """
6 |
7 | import asyncio
8 | import json
9 |
10 | from fastmcp import Client
11 |
12 |
13 | async def test_server_connection():
14 | """Test basic server connection and functionality."""
15 | server_url = "http://127.0.0.1:8013/mcp"
16 |
17 | print(f"🔗 Connecting to Ultimate MCP Server at {server_url}")
18 |
19 | try:
20 | async with Client(server_url) as client:
21 | print("✅ Successfully connected to server")
22 |
23 | # Test 1: List available tools
24 | print("\n📋 Listing available tools...")
25 | tools = await client.list_tools()
26 | print(f"Found {len(tools)} tools:")
27 | for i, tool in enumerate(tools[:10]): # Show first 10
28 | print(f" {i + 1:2d}. {tool.name}")
29 | if len(tools) > 10:
30 | print(f" ... and {len(tools) - 10} more tools")
31 |
32 | # Test 2: List available resources
33 | print("\n📚 Listing available resources...")
34 | resources = await client.list_resources()
35 | print(f"Found {len(resources)} resources:")
36 | for resource in resources:
37 | print(f" - {resource.uri}")
38 |
39 | # Test 3: Test echo tool (should be available)
40 | print("\n🔊 Testing echo tool...")
41 | try:
42 | echo_result = await client.call_tool("echo", {"message": "Hello from test client!"})
43 | if echo_result:
44 | print(f"✅ Echo response: {echo_result[0].text}")
45 | else:
46 | print("❌ Echo tool returned no response")
47 | except Exception as e:
48 | print(f"❌ Echo tool failed: {e}")
49 |
50 | # Test 4: Test provider status tool
51 | print("\n🔌 Testing provider status...")
52 | try:
53 | provider_result = await client.call_tool("get_provider_status", {})
54 | if provider_result:
55 | provider_data = json.loads(provider_result[0].text)
56 | providers = provider_data.get("providers", {})
57 | print(f"✅ Found {len(providers)} providers:")
58 | for name, status in providers.items():
59 | available = "✅" if status.get("available") else "❌"
60 | model_count = len(status.get("models", []))
61 | enabled = status.get("enabled", False)
62 | api_key_configured = status.get("api_key_configured", False)
63 |
64 | status_str = f"{available} {name}: {model_count} models"
65 | if not enabled:
66 | status_str += " (disabled)"
67 | elif not api_key_configured:
68 | status_str += " (no API key)"
69 | elif status.get("error"):
70 | status_str += f" (error: {status['error'][:50]}...)"
71 |
72 | print(f" {status_str}")
73 | except Exception as e:
74 | print(f"❌ Provider status failed: {e}")
75 |
76 | # Test 5: Read a resource
77 | print("\n📖 Testing resource reading...")
78 | if resources:
79 | try:
80 | resource_uri = resources[0].uri
81 | resource_content = await client.read_resource(resource_uri)
82 | if resource_content:
83 | # FastMCP uses 'text' attribute, not 'content'
84 | content = resource_content[0].text if hasattr(resource_content[0], 'text') else str(resource_content[0])
85 | preview = content[:200] + "..." if len(content) > 200 else content
86 | print(f"✅ Resource {resource_uri} content preview:")
87 | print(f" {preview}")
88 | except Exception as e:
89 | print(f"❌ Resource reading failed: {e}")
90 |
91 | # Test 6: Test a completion tool
92 | print("\n🤖 Testing completion tool...")
93 | try:
94 | completion_result = await client.call_tool(
95 | "generate_completion",
96 | {
97 | "prompt": "Say hello in a creative way",
98 | "provider": "ollama", # Using local Ollama since it's available
99 | "model": "mix_77/gemma3-qat-tools:27b",
100 | "max_tokens": 100, # Increased for better response
101 | },
102 | )
103 | if completion_result:
104 | result_data = json.loads(completion_result[0].text)
105 | response_text = result_data.get('text', '').strip()
106 | if response_text:
107 | print(f"✅ Completion response: {response_text}")
108 | else:
109 | print("⚠️ Completion succeeded but returned empty text")
110 | print(f" Model: {result_data.get('model', 'unknown')}")
111 | print(f" Processing time: {result_data.get('processing_time', 0):.2f}s")
112 | print(f" Tokens: {result_data.get('tokens', {})}")
113 | except Exception as e:
114 | print(f"❌ Completion tool failed: {e}")
115 | # Try with a different provider
116 | try:
117 | completion_result = await client.call_tool(
118 | "generate_completion",
119 | {
120 | "prompt": "Say hello in a creative way",
121 | "provider": "anthropic",
122 | "model": "claude-3-haiku-20240307",
123 | "max_tokens": 100,
124 | },
125 | )
126 | if completion_result:
127 | result_data = json.loads(completion_result[0].text)
128 | response_text = result_data.get('text', '').strip()
129 | if response_text:
130 | print(f"✅ Completion response (anthropic): {response_text}")
131 | else:
132 | print("⚠️ Anthropic completion succeeded but returned empty text")
133 | except Exception as e2:
134 | print(f"❌ Completion with anthropic also failed: {e2}")
135 |
136 | print("\n🎉 Basic functionality test completed!")
137 |
138 | except Exception as e:
139 | print(f"❌ Failed to connect to server: {e}")
140 | print("Make sure the server is running at the correct address.")
141 |
142 |
143 | async def test_specific_tools():
144 | """Test some specific tools that should be available."""
145 | server_url = "http://127.0.0.1:8013/mcp"
146 |
147 | print("\n🔧 Testing specific tools...")
148 |
149 | try:
150 | async with Client(server_url) as client:
151 | # Test filesystem tools
152 | print("\n📁 Testing filesystem tools...")
153 | try:
154 | # List allowed directories
155 | dirs_result = await client.call_tool("list_allowed_directories", {})
156 | if dirs_result:
157 | print(f"✅ Allowed directories: {dirs_result[0].text}")
158 |
159 | # Try to list current directory
160 | ls_result = await client.call_tool("list_directory", {"path": "."})
161 | if ls_result:
162 | ls_data = json.loads(ls_result[0].text)
163 | files = ls_data.get("files", [])
164 | print(f"✅ Current directory has {len(files)} items")
165 | except Exception as e:
166 | print(f"❌ Filesystem tools failed: {e}")
167 |
168 | # Test Python execution
169 | print("\n🐍 Testing Python execution...")
170 | try:
171 | python_result = await client.call_tool(
172 | "execute_python",
173 | {
174 | "code": "print('Hello from Python!'); result = 2 + 2; print(f'2 + 2 = {result}')"
175 | },
176 | )
177 | if python_result:
178 | print("✅ Python execution result:")
179 | result_data = json.loads(python_result[0].text)
180 | # The field is called 'stdout', not 'output'
181 | print(f" Output: {result_data.get('stdout', 'No output')}")
182 | print(f" Success: {result_data.get('success', False)}")
183 | if result_data.get('result') is not None:
184 | print(f" Result: {result_data.get('result')}")
185 | except Exception as e:
186 | print(f"❌ Python execution failed: {e}")
187 |
188 | # Test text processing tools
189 | print("\n📝 Testing text processing tools...")
190 | try:
191 | # Test ripgrep if available - tool expects args_str parameter
192 | ripgrep_result = await client.call_tool(
193 | "run_ripgrep", {
194 | "args_str": "'FastMCP' . -t py",
195 | "input_dir": True # Since we're searching in a directory
196 | }
197 | )
198 | if ripgrep_result:
199 | result_data = json.loads(ripgrep_result[0].text)
200 | if result_data.get('success'):
201 | print("✅ Ripgrep executed successfully")
202 | stdout = result_data.get('stdout', '')
203 | if stdout.strip():
204 | print(f" Found matches: {len(stdout.strip().splitlines())} lines")
205 | else:
206 | print(" No matches found")
207 | else:
208 | print(f"❌ Ripgrep failed: {result_data.get('error', 'Unknown error')}")
209 | except Exception as e:
210 | print(f"❌ Text processing tools failed: {e}")
211 |
212 | except Exception as e:
213 | print(f"❌ Failed during specific tool testing: {e}")
214 |
215 |
216 | async def interactive_mode():
217 | """Interactive mode for testing tools manually."""
218 | server_url = "http://127.0.0.1:8013/mcp"
219 |
220 | print("\n🎮 Entering interactive mode...")
221 | print("Type 'list' to see available tools, 'quit' to exit")
222 |
223 | try:
224 | async with Client(server_url) as client:
225 | tools = await client.list_tools()
226 | tool_names = [tool.name for tool in tools]
227 |
228 | while True:
229 | try:
230 | command = input("\n> ").strip()
231 |
232 | if command.lower() in ["quit", "exit", "q"]:
233 | break
234 | elif command.lower() == "list":
235 | print("Available tools:")
236 | for i, name in enumerate(tool_names[:20]): # Show first 20
237 | print(f" {i + 1:2d}. {name}")
238 | if len(tool_names) > 20:
239 | print(f" ... and {len(tool_names) - 20} more")
240 | elif command.lower() == "resources":
241 | resources = await client.list_resources()
242 | print("Available resources:")
243 | for resource in resources:
244 | print(f" - {resource.uri}")
245 | elif command.startswith("call "):
246 | # Parse tool call: call tool_name {"param": "value"}
247 | parts = command[5:].split(" ", 1)
248 | tool_name = parts[0]
249 | params = {}
250 | if len(parts) > 1:
251 | try:
252 | params = json.loads(parts[1])
253 | except json.JSONDecodeError:
254 | print("❌ Invalid JSON for parameters")
255 | continue
256 |
257 | if tool_name in tool_names:
258 | try:
259 | result = await client.call_tool(tool_name, params)
260 | if result:
261 | print(f"✅ Result: {result[0].text}")
262 | else:
263 | print("❌ No result returned")
264 | except Exception as e:
265 | print(f"❌ Tool call failed: {e}")
266 | else:
267 | print(f"❌ Tool '{tool_name}' not found")
268 | elif command.startswith("read "):
269 | # Read resource: read resource_uri
270 | resource_uri = command[5:].strip()
271 | try:
272 | result = await client.read_resource(resource_uri)
273 | if result:
274 | # FastMCP uses 'text' attribute, not 'content'
275 | content = result[0].text if hasattr(result[0], 'text') else str(result[0])
276 | preview = content[:500] + "..." if len(content) > 500 else content
277 | print(f"✅ Resource content: {preview}")
278 | else:
279 | print("❌ No content returned")
280 | except Exception as e:
281 | print(f"❌ Resource read failed: {e}")
282 | else:
283 | print("Commands:")
284 | print(" list - List available tools")
285 | print(" resources - List available resources")
286 | print(" call <tool> <json_params> - Call a tool")
287 | print(" read <resource_uri> - Read a resource")
288 | print(" quit - Exit interactive mode")
289 |
290 | except KeyboardInterrupt:
291 | break
292 | except EOFError:
293 | break
294 |
295 | except Exception as e:
296 | print(f"❌ Interactive mode failed: {e}")
297 |
298 |
299 | async def main():
300 | """Main test function."""
301 | print("🚀 Ultimate MCP Server Test Client")
302 | print("=" * 50)
303 |
304 | # Run basic connection test
305 | await test_server_connection()
306 |
307 | # Run specific tool tests
308 | await test_specific_tools()
309 |
310 | # Ask if user wants interactive mode
311 | try:
312 | response = input("\nWould you like to enter interactive mode? (y/n): ").strip().lower()
313 | if response in ["y", "yes"]:
314 | await interactive_mode()
315 | except (KeyboardInterrupt, EOFError):
316 | pass
317 |
318 | print("\n👋 Test client finished!")
319 |
320 |
321 | if __name__ == "__main__":
322 | asyncio.run(main())
323 |
```