This is page 10 of 45. Use http://codebase.md/dicklesworthstone/llm_gateway_mcp_server?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .cursorignore
├── .env.example
├── .envrc
├── .gitignore
├── additional_features.md
├── check_api_keys.py
├── completion_support.py
├── comprehensive_test.py
├── docker-compose.yml
├── Dockerfile
├── empirically_measured_model_speeds.json
├── error_handling.py
├── example_structured_tool.py
├── examples
│ ├── __init__.py
│ ├── advanced_agent_flows_using_unified_memory_system_demo.py
│ ├── advanced_extraction_demo.py
│ ├── advanced_unified_memory_system_demo.py
│ ├── advanced_vector_search_demo.py
│ ├── analytics_reporting_demo.py
│ ├── audio_transcription_demo.py
│ ├── basic_completion_demo.py
│ ├── cache_demo.py
│ ├── claude_integration_demo.py
│ ├── compare_synthesize_demo.py
│ ├── cost_optimization.py
│ ├── data
│ │ ├── sample_event.txt
│ │ ├── Steve_Jobs_Introducing_The_iPhone_compressed.md
│ │ └── Steve_Jobs_Introducing_The_iPhone_compressed.mp3
│ ├── docstring_refiner_demo.py
│ ├── document_conversion_and_processing_demo.py
│ ├── entity_relation_graph_demo.py
│ ├── filesystem_operations_demo.py
│ ├── grok_integration_demo.py
│ ├── local_text_tools_demo.py
│ ├── marqo_fused_search_demo.py
│ ├── measure_model_speeds.py
│ ├── meta_api_demo.py
│ ├── multi_provider_demo.py
│ ├── ollama_integration_demo.py
│ ├── prompt_templates_demo.py
│ ├── python_sandbox_demo.py
│ ├── rag_example.py
│ ├── research_workflow_demo.py
│ ├── sample
│ │ ├── article.txt
│ │ ├── backprop_paper.pdf
│ │ ├── buffett.pdf
│ │ ├── contract_link.txt
│ │ ├── legal_contract.txt
│ │ ├── medical_case.txt
│ │ ├── northwind.db
│ │ ├── research_paper.txt
│ │ ├── sample_data.json
│ │ └── text_classification_samples
│ │ ├── email_classification.txt
│ │ ├── news_samples.txt
│ │ ├── product_reviews.txt
│ │ └── support_tickets.txt
│ ├── sample_docs
│ │ └── downloaded
│ │ └── attention_is_all_you_need.pdf
│ ├── sentiment_analysis_demo.py
│ ├── simple_completion_demo.py
│ ├── single_shot_synthesis_demo.py
│ ├── smart_browser_demo.py
│ ├── sql_database_demo.py
│ ├── sse_client_demo.py
│ ├── test_code_extraction.py
│ ├── test_content_detection.py
│ ├── test_ollama.py
│ ├── text_classification_demo.py
│ ├── text_redline_demo.py
│ ├── tool_composition_examples.py
│ ├── tournament_code_demo.py
│ ├── tournament_text_demo.py
│ ├── unified_memory_system_demo.py
│ ├── vector_search_demo.py
│ ├── web_automation_instruction_packs.py
│ └── workflow_delegation_demo.py
├── LICENSE
├── list_models.py
├── marqo_index_config.json.example
├── mcp_protocol_schema_2025-03-25_version.json
├── mcp_python_lib_docs.md
├── mcp_tool_context_estimator.py
├── model_preferences.py
├── pyproject.toml
├── quick_test.py
├── README.md
├── resource_annotations.py
├── run_all_demo_scripts_and_check_for_errors.py
├── storage
│ └── smart_browser_internal
│ ├── locator_cache.db
│ ├── readability.js
│ └── storage_state.enc
├── test_client.py
├── test_connection.py
├── TEST_README.md
├── test_sse_client.py
├── test_stdio_client.py
├── tests
│ ├── __init__.py
│ ├── conftest.py
│ ├── integration
│ │ ├── __init__.py
│ │ └── test_server.py
│ ├── manual
│ │ ├── test_extraction_advanced.py
│ │ └── test_extraction.py
│ └── unit
│ ├── __init__.py
│ ├── test_cache.py
│ ├── test_providers.py
│ └── test_tools.py
├── TODO.md
├── tool_annotations.py
├── tools_list.json
├── ultimate_mcp_banner.webp
├── ultimate_mcp_logo.webp
├── ultimate_mcp_server
│ ├── __init__.py
│ ├── __main__.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── commands.py
│ │ ├── helpers.py
│ │ └── typer_cli.py
│ ├── clients
│ │ ├── __init__.py
│ │ ├── completion_client.py
│ │ └── rag_client.py
│ ├── config
│ │ └── examples
│ │ └── filesystem_config.yaml
│ ├── config.py
│ ├── constants.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── evaluation
│ │ │ ├── base.py
│ │ │ └── evaluators.py
│ │ ├── providers
│ │ │ ├── __init__.py
│ │ │ ├── anthropic.py
│ │ │ ├── base.py
│ │ │ ├── deepseek.py
│ │ │ ├── gemini.py
│ │ │ ├── grok.py
│ │ │ ├── ollama.py
│ │ │ ├── openai.py
│ │ │ └── openrouter.py
│ │ ├── server.py
│ │ ├── state_store.py
│ │ ├── tournaments
│ │ │ ├── manager.py
│ │ │ ├── tasks.py
│ │ │ └── utils.py
│ │ └── ums_api
│ │ ├── __init__.py
│ │ ├── ums_database.py
│ │ ├── ums_endpoints.py
│ │ ├── ums_models.py
│ │ └── ums_services.py
│ ├── exceptions.py
│ ├── graceful_shutdown.py
│ ├── services
│ │ ├── __init__.py
│ │ ├── analytics
│ │ │ ├── __init__.py
│ │ │ ├── metrics.py
│ │ │ └── reporting.py
│ │ ├── cache
│ │ │ ├── __init__.py
│ │ │ ├── cache_service.py
│ │ │ ├── persistence.py
│ │ │ ├── strategies.py
│ │ │ └── utils.py
│ │ ├── cache.py
│ │ ├── document.py
│ │ ├── knowledge_base
│ │ │ ├── __init__.py
│ │ │ ├── feedback.py
│ │ │ ├── manager.py
│ │ │ ├── rag_engine.py
│ │ │ ├── retriever.py
│ │ │ └── utils.py
│ │ ├── prompts
│ │ │ ├── __init__.py
│ │ │ ├── repository.py
│ │ │ └── templates.py
│ │ ├── prompts.py
│ │ └── vector
│ │ ├── __init__.py
│ │ ├── embeddings.py
│ │ └── vector_service.py
│ ├── tool_token_counter.py
│ ├── tools
│ │ ├── __init__.py
│ │ ├── audio_transcription.py
│ │ ├── base.py
│ │ ├── completion.py
│ │ ├── docstring_refiner.py
│ │ ├── document_conversion_and_processing.py
│ │ ├── enhanced-ums-lookbook.html
│ │ ├── entity_relation_graph.py
│ │ ├── excel_spreadsheet_automation.py
│ │ ├── extraction.py
│ │ ├── filesystem.py
│ │ ├── html_to_markdown.py
│ │ ├── local_text_tools.py
│ │ ├── marqo_fused_search.py
│ │ ├── meta_api_tool.py
│ │ ├── ocr_tools.py
│ │ ├── optimization.py
│ │ ├── provider.py
│ │ ├── pyodide_boot_template.html
│ │ ├── python_sandbox.py
│ │ ├── rag.py
│ │ ├── redline-compiled.css
│ │ ├── sentiment_analysis.py
│ │ ├── single_shot_synthesis.py
│ │ ├── smart_browser.py
│ │ ├── sql_databases.py
│ │ ├── text_classification.py
│ │ ├── text_redline_tools.py
│ │ ├── tournament.py
│ │ ├── ums_explorer.html
│ │ └── unified_memory_system.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── async_utils.py
│ │ ├── display.py
│ │ ├── logging
│ │ │ ├── __init__.py
│ │ │ ├── console.py
│ │ │ ├── emojis.py
│ │ │ ├── formatter.py
│ │ │ ├── logger.py
│ │ │ ├── panels.py
│ │ │ ├── progress.py
│ │ │ └── themes.py
│ │ ├── parse_yaml.py
│ │ ├── parsing.py
│ │ ├── security.py
│ │ └── text.py
│ └── working_memory_api.py
├── unified_memory_system_technical_analysis.md
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/examples/data/Steve_Jobs_Introducing_The_iPhone_compressed.md:
--------------------------------------------------------------------------------
```markdown
1 | # Transcript: Steve_Jobs_Introducing_The_iPhone_compressed.mp3
2 |
3 | ## Metadata
4 | - **Duration:** 14:00.46
5 | - **Language:** en (confidence: 1.00)
6 | - **Transcription Model:** large-v3
7 | - **Device:** cuda
8 | - **Processing Time:** 99.07 seconds
9 |
10 | ## Full Transcript
11 |
12 | This is a day I've been looking forward to for two and a half years. Every once in a while, a revolutionary product comes along that changes everything. And Apple has been, well, first of all, one's very fortunate if you get to work on just one of these in your career. Apple's been very fortunate. It's been able to introduce a few of these into the world. In 1984, we introduced the Macintosh. It didn't just change Apple. It changed the whole computer industry. In 2001, we introduced the first iPod. And it didn't just change the way we all listen to music. It changed the entire music industry. Well, today. Today. We're introducing three revolutionary products of this class. The first one is a widescreen iPod with touch controls. The second is a revolutionary mobile phone. And the third is a breakthrough internet communications device. So, three things. A widescreen iPod with touch controls. A revolutionary mobile phone. And a breakthrough internet communications device. An iPod. A phone. And an internet communicator. An iPod. A phone. These are not three separate devices. This is one device. We are calling it iPhone. Today. Today. Today, Apple is going to reinvent the phone. And here it is. Actually, here it is, but we're going to leave it there for now. So, before we get into it, let me talk about a category of things. The most advanced phones are called smart phones. So they say. And they typically combine a phone plus some email capability. Plus, they say it's the internet, sort of the baby internet. Into one device. And they all have these plastic little keyboards on them. And the problem is that they're not so smart. And they're not so easy to use. So, if you kind of make a, you know, business school 101 graph of the smart axis and the easy to use axis. Phones, regular cell phones are kind of right there. They're not so smart. And they're, you know, not so easy to use. But smart phones are definitely a little smarter. But they actually are harder to use. They're really complicated. Just for the basic stuff, people have a hard time figuring out how to use them. Well, we don't want to do either one of these things. What we want to do is make a leapfrog product that is way smarter than any mobile device has ever been. And super easy to use. This is what iPhone is. Okay. So, we're going to reinvent the phone. Now, we're going to start. We're going to start with a revolutionary user interface. Is the result of years of research and development. And, of course, it's an interplay of hardware and software. Now, why do we need a revolutionary user interface? I mean, here's four smart phones, right? Motorola Q, the Blackberry, Palm Treo, Nokia E62, the usual suspects. And what's wrong with their user interfaces? Well, the problem with them. Is really sort of in the bottom 40 there. It's this stuff right here. They all have these keyboards that are there whether you need them or not to be there. And they all have these control buttons that are fixed in plastic. And are the same for every application. Well, every application wants a slightly different user interface. A slightly optimized set of buttons just for it. And what happens if you think of a great idea six months from now? You can't run around and add a button to these things. They're already shipped. So, what do you do? It doesn't work because the buttons and the controls can't change. They can't change for each application. And they can't change down the road if you think of another great idea you want to add to this product. Well, how do you solve this? Hmm. It turns out we have solved it. We solved it in computers 20 years ago. We solved it with a bitmap screen that could display anything we want. Put any user interface. And a pointing device. We solved it with the mouse. Right? We solved this problem. So, how are we going to take this to a mobile device? Well, what we're going to do is get rid of all these buttons and just make a giant screen. A giant screen. Now, how are we going to communicate this? We don't want to carry around a mouse. Right? So, what are we going to do? Oh, a stylus. Right? We're going to use a stylus. No. Who wants a stylus? You have to get them and put them away and you lose them. Yuck. Nobody wants a stylus. So, let's not use a stylus. We're going to use the best pointing device in the world. We're going to use a pointing device that we're all born with. We're born with 10 of them. We're going to use our fingers. We're going to touch this with our fingers. And we have invented a new technology called multi-touch, which is phenomenal. It works like magic. You don't need a stylus. It's far more accurate than any touch display that's ever been shipped. It ignores unintended touches. It's super smart. You can do multi-finger gestures on it. And boy, have we patented it. We've been very lucky to have brought a few revolutionary user interfaces to the market in our time. First was the mouse. The second was the click wheel. And now we're going to bring multi-touch to the market. And each of these revolutionary user interfaces has made possible a revolutionary product. The Mac, the iPod, and now the iPhone. So, a revolutionary user interface. We're going to build on top of that with software. Now, software on mobile phones is like software. It's like baby software. It's not so powerful. And today, we're going to show you a software breakthrough. Software that's at least five years ahead of what's on any other phone. Now, how do we do this? Well, we start with a strong foundation. iPhone runs OS X. Now, why would we want to run such a sophisticated operating system on a mobile device? Well, because it's got everything we need. It's got multi-tasking. It's got the best networking. It already knows how to power manage. We've been doing this on mobile computers for years. It's got awesome security. And to write apps. It's got everything from Coco and the graphics. And it's got core animation built in. And it's got the audio and video that OS X is famous for. It's got all the stuff we want. And it's built right in to iPhone. And that has let us create desktop class applications and networking. Right? Not the crippled stuff that you find on most phones. This is real desktop class applications. Now, you know, one of the pioneers of our industry, Alan Kay, has had a lot of great quotes throughout the years. And I ran across one of them recently that explains how we look at this. Explains why we go about doing things the way we do. Because we love software. And here's the quote. People who are really serious about software should make their own hardware. You know? Alan said this 30 years ago. And this is how we feel about it. And so we're bringing breakthrough software to a mobile device for the first time. It's five years ahead of anything on any other phone. The second thing we're doing is we're learning from the iPod. Syncing with iTunes. You know, we're going to ship our hundred millionth iPod this year. And that's tens of millions of people that know how to sync these devices with their PCs or Mac and sync all of their media right on to their iPod. Right? So you just drop your iPod in and it automatically syncs. You're going to do the same thing with iPhone. It automatically syncs to your PC or Mac right through iTunes. And iTunes is going to sync all your media onto your iPhone. Your music, your audiobooks, podcasts, movies, TV shows, music videos. But it also syncs a ton of data. Your contacts, your calendars, and your photos, which you can get on your iPod today, your notes, your bookmarks from your web browser, your email accounts, your whole email setup, all that stuff can be moved over to iPhone completely automatically. It's really nice. And we do it through iTunes. Again, you go to iTunes and you set it up, just like you'd set up an iPod or an Apple TV. And you set up what you want synced to your iPhone. And it's just like an iPod. Charge and sync. So sync with iTunes. Third thing I want to talk about a little is design. We've designed something wonderful for your hand. Just wonderful. And this is what it looks like. It's got a three and a half inch screen. It's really big. And it's the highest resolution screen we've ever shipped. It's 160 pixels per inch. Highest we've ever shipped. It's gorgeous. And on the front, there's only one button down there. We call it the home button. It takes you home from wherever you are. And that's it. Let's take a look at the side. It's really thin. It's thinner than any smartphone out there at 11.6 millimeters. Thinner than the Q, thinner than the Blackjack, thinner than all of them. It's really nice. And we've got some controls on the side. We've got a little switch for ring and silent. We've got a volume up and down control. Let's look at the back. On the back, the biggest thing of note is we've got a two megapixel camera built right in. The other side, and we're back on the front. So let's take a look at the top now. We've got a headset jack. Three and a half millimeter. All your iPod headphones fit right in. We've got a place, a little tray for your SIM card. And we've got one switch for sleep and wake. Just push it to go to sleep, push it to wake up. Let's take a look at the bottom. We've got a speaker. We've got a microphone. And we've got our 30-pin iPod connector. So that's the bottom. Now, we've also got some stuff you can't see. We've got three really advanced sensors built into this phone. The first one is a proximity sensor. It senses when physical objects get close. So when you bring iPhone up to your ear, take a phone call, it turns off the display, and it turns off the touch sensor instantly. Why do you want to do that? Well, one, to save battery, but two, so you don't get spurious inputs from your face into the touchscreen. It just automatically turns them off. Take it away, boom, it's back on. So it's got a proximity sensor built in. It's got an ambient light sensor as well. We sense the ambient lighting conditions and adjust the brightness of the display to match the ambient lighting conditions. Again, better user experience saves power. And the third thing we've got is an accelerometer so that we can tell when you switch from portrait to landscape.
13 |
14 | ## Segments
15 |
16 | **[00:04.11 → 00:08.89]** This is a day I've been looking forward to for two and a half years.
17 |
18 | **[00:15.64 → 00:21.86]** Every once in a while, a revolutionary product comes along that changes everything.
19 |
20 | **[00:25.66 → 00:33.30]** And Apple has been, well, first of all, one's very fortunate if you get to work on just one of these in your career.
21 |
22 | **[00:35.67 → 00:37.01]** Apple's been very fortunate.
23 |
24 | **[00:37.81 → 00:41.91]** It's been able to introduce a few of these into the world.
25 |
26 | **[00:41.91 → 00:46.87]** In 1984, we introduced the Macintosh.
27 |
28 | **[00:47.39 → 00:49.05]** It didn't just change Apple.
29 |
30 | **[00:49.53 → 00:51.49]** It changed the whole computer industry.
31 |
32 | **[01:03.15 → 01:07.53]** In 2001, we introduced the first iPod.
33 |
34 | **[01:09.55 → 01:15.37]** And it didn't just change the way we all listen to music.
35 |
36 | **[01:15.65 → 01:17.85]** It changed the entire music industry.
37 |
38 | **[01:20.00 → 01:22.68]** Well, today.
39 |
40 | **[01:22.78 → 01:23.18]** Today.
41 |
42 | **[01:24.68 → 01:29.66]** We're introducing three revolutionary products of this class.
43 |
44 | **[01:32.32 → 01:39.27]** The first one is a widescreen iPod with touch controls.
45 |
46 | **[01:39.61 → 01:54.70]** The second is a revolutionary mobile phone.
47 |
48 | **[02:04.40 → 02:10.24]** And the third is a breakthrough internet communications device.
49 |
50 | **[02:14.88 → 02:17.46]** So, three things.
51 |
52 | **[02:17.74 → 02:20.56]** A widescreen iPod with touch controls.
53 |
54 | **[02:20.56 → 02:22.90]** A revolutionary mobile phone.
55 |
56 | **[02:23.04 → 02:26.26]** And a breakthrough internet communications device.
57 |
58 | **[02:26.60 → 02:29.55]** An iPod.
59 |
60 | **[02:30.33 → 02:31.65]** A phone.
61 |
62 | **[02:33.16 → 02:35.24]** And an internet communicator.
63 |
64 | **[02:36.10 → 02:37.84]** An iPod.
65 |
66 | **[02:38.38 → 02:39.56]** A phone.
67 |
68 | **[02:39.88 → 02:49.96]** These are not three separate devices.
69 |
70 | **[02:50.54 → 02:52.34]** This is one device.
71 |
72 | **[02:52.70 → 03:00.14]** We are calling it iPhone.
73 |
74 | **[03:02.07 → 03:02.95]** Today.
75 |
76 | **[03:05.06 → 03:05.50]** Today.
77 |
78 | **[03:05.50 → 03:08.64]** Today, Apple is going to reinvent the phone.
79 |
80 | **[03:10.35 → 03:11.29]** And here it is.
81 |
82 | **[03:20.06 → 03:22.86]** Actually, here it is, but we're going to leave it there for now.
83 |
84 | **[03:24.36 → 03:33.58]** So, before we get into it, let me talk about a category of things.
85 |
86 | **[03:33.68 → 03:36.10]** The most advanced phones are called smart phones.
87 |
88 | **[03:37.28 → 03:38.16]** So they say.
89 |
90 | **[03:39.24 → 03:43.64]** And they typically combine a phone plus some email capability.
91 |
92 | **[03:43.96 → 03:46.68]** Plus, they say it's the internet, sort of the baby internet.
93 |
94 | **[03:46.68 → 03:47.82]** Into one device.
95 |
96 | **[03:47.96 → 03:50.78]** And they all have these plastic little keyboards on them.
97 |
98 | **[03:51.48 → 03:55.46]** And the problem is that they're not so smart.
99 |
100 | **[03:55.82 → 03:57.68]** And they're not so easy to use.
101 |
102 | **[03:57.74 → 04:04.70]** So, if you kind of make a, you know, business school 101 graph of the smart axis and the easy to use axis.
103 |
104 | **[04:05.14 → 04:07.24]** Phones, regular cell phones are kind of right there.
105 |
106 | **[04:07.32 → 04:08.38]** They're not so smart.
107 |
108 | **[04:08.44 → 04:10.84]** And they're, you know, not so easy to use.
109 |
110 | **[04:12.44 → 04:14.72]** But smart phones are definitely a little smarter.
111 |
112 | **[04:14.72 → 04:16.80]** But they actually are harder to use.
113 |
114 | **[04:17.12 → 04:18.60]** They're really complicated.
115 |
116 | **[04:18.86 → 04:22.44]** Just for the basic stuff, people have a hard time figuring out how to use them.
117 |
118 | **[04:23.44 → 04:25.82]** Well, we don't want to do either one of these things.
119 |
120 | **[04:26.04 → 04:33.46]** What we want to do is make a leapfrog product that is way smarter than any mobile device has ever been.
121 |
122 | **[04:33.68 → 04:35.50]** And super easy to use.
123 |
124 | **[04:35.64 → 04:37.48]** This is what iPhone is.
125 |
126 | **[04:37.82 → 04:38.36]** Okay.
127 |
128 | **[04:40.49 → 04:43.09]** So, we're going to reinvent the phone.
129 |
130 | **[04:44.45 → 04:46.09]** Now, we're going to start.
131 |
132 | **[04:46.35 → 04:52.85]** We're going to start with a revolutionary user interface.
133 |
134 | **[04:54.23 → 04:58.55]** Is the result of years of research and development.
135 |
136 | **[05:00.19 → 05:03.61]** And, of course, it's an interplay of hardware and software.
137 |
138 | **[05:04.67 → 05:07.49]** Now, why do we need a revolutionary user interface?
139 |
140 | **[05:07.77 → 05:10.15]** I mean, here's four smart phones, right?
141 |
142 | **[05:10.25 → 05:15.09]** Motorola Q, the Blackberry, Palm Treo, Nokia E62, the usual suspects.
143 |
144 | **[05:15.41 → 05:18.15]** And what's wrong with their user interfaces?
145 |
146 | **[05:18.19 → 05:19.95]** Well, the problem with them.
147 |
148 | **[05:20.45 → 05:22.57]** Is really sort of in the bottom 40 there.
149 |
150 | **[05:23.05 → 05:24.67]** It's this stuff right here.
151 |
152 | **[05:25.69 → 05:30.13]** They all have these keyboards that are there whether you need them or not to be there.
153 |
154 | **[05:30.49 → 05:34.41]** And they all have these control buttons that are fixed in plastic.
155 |
156 | **[05:34.71 → 05:36.93]** And are the same for every application.
157 |
158 | **[05:37.21 → 05:39.93]** Well, every application wants a slightly different user interface.
159 |
160 | **[05:40.17 → 05:43.35]** A slightly optimized set of buttons just for it.
161 |
162 | **[05:43.81 → 05:46.77]** And what happens if you think of a great idea six months from now?
163 |
164 | **[05:46.93 → 05:49.21]** You can't run around and add a button to these things.
165 |
166 | **[05:49.21 → 05:49.89]** They're already shipped.
167 |
168 | **[05:50.19 → 05:51.27]** So, what do you do?
169 |
170 | **[05:51.96 → 05:56.41]** It doesn't work because the buttons and the controls can't change.
171 |
172 | **[05:56.67 → 05:58.27]** They can't change for each application.
173 |
174 | **[05:58.55 → 06:03.87]** And they can't change down the road if you think of another great idea you want to add to this product.
175 |
176 | **[06:04.55 → 06:05.99]** Well, how do you solve this?
177 |
178 | **[06:06.43 → 06:06.81]** Hmm.
179 |
180 | **[06:07.09 → 06:09.37]** It turns out we have solved it.
181 |
182 | **[06:09.43 → 06:12.19]** We solved it in computers 20 years ago.
183 |
184 | **[06:12.81 → 06:17.55]** We solved it with a bitmap screen that could display anything we want.
185 |
186 | **[06:17.71 → 06:19.11]** Put any user interface.
187 |
188 | **[06:19.11 → 06:21.79]** And a pointing device.
189 |
190 | **[06:22.23 → 06:23.57]** We solved it with the mouse.
191 |
192 | **[06:24.07 → 06:24.45]** Right?
193 |
194 | **[06:25.01 → 06:26.13]** We solved this problem.
195 |
196 | **[06:26.25 → 06:28.61]** So, how are we going to take this to a mobile device?
197 |
198 | **[06:29.35 → 06:34.51]** Well, what we're going to do is get rid of all these buttons and just make a giant screen.
199 |
200 | **[06:35.47 → 06:36.57]** A giant screen.
201 |
202 | **[06:38.39 → 06:40.39]** Now, how are we going to communicate this?
203 |
204 | **[06:40.45 → 06:41.81]** We don't want to carry around a mouse.
205 |
206 | **[06:41.95 → 06:42.19]** Right?
207 |
208 | **[06:42.31 → 06:43.31]** So, what are we going to do?
209 |
210 | **[06:43.91 → 06:44.95]** Oh, a stylus.
211 |
212 | **[06:45.03 → 06:45.25]** Right?
213 |
214 | **[06:45.65 → 06:46.81]** We're going to use a stylus.
215 |
216 | **[06:47.83 → 06:48.23]** No.
217 |
218 | **[06:50.35 → 06:51.69]** Who wants a stylus?
219 |
220 | **[06:52.69 → 06:55.15]** You have to get them and put them away and you lose them.
221 |
222 | **[06:55.27 → 06:55.67]** Yuck.
223 |
224 | **[06:56.27 → 06:57.57]** Nobody wants a stylus.
225 |
226 | **[06:57.71 → 06:59.11]** So, let's not use a stylus.
227 |
228 | **[07:00.19 → 07:02.73]** We're going to use the best pointing device in the world.
229 |
230 | **[07:02.79 → 07:05.47]** We're going to use a pointing device that we're all born with.
231 |
232 | **[07:05.57 → 07:06.67]** We're born with 10 of them.
233 |
234 | **[07:06.71 → 07:07.53]** We're going to use our fingers.
235 |
236 | **[07:08.49 → 07:09.95]** We're going to touch this with our fingers.
237 |
238 | **[07:10.13 → 07:15.47]** And we have invented a new technology called multi-touch, which is phenomenal.
239 |
240 | **[07:16.27 → 07:17.83]** It works like magic.
241 |
242 | **[07:19.45 → 07:21.31]** You don't need a stylus.
243 |
244 | **[07:22.05 → 07:26.09]** It's far more accurate than any touch display that's ever been shipped.
245 |
246 | **[07:26.65 → 07:29.03]** It ignores unintended touches.
247 |
248 | **[07:29.15 → 07:30.07]** It's super smart.
249 |
250 | **[07:31.13 → 07:33.79]** You can do multi-finger gestures on it.
251 |
252 | **[07:34.39 → 07:36.41]** And boy, have we patented it.
253 |
254 | **[07:46.21 → 07:53.01]** We've been very lucky to have brought a few revolutionary user interfaces to the market in our time.
255 |
256 | **[07:53.73 → 07:54.73]** First was the mouse.
257 |
258 | **[07:55.73 → 07:58.55]** The second was the click wheel.
259 |
260 | **[07:58.75 → 08:02.11]** And now we're going to bring multi-touch to the market.
261 |
262 | **[08:02.61 → 08:09.01]** And each of these revolutionary user interfaces has made possible a revolutionary product.
263 |
264 | **[08:09.23 → 08:12.79]** The Mac, the iPod, and now the iPhone.
265 |
266 | **[08:13.25 → 08:15.85]** So, a revolutionary user interface.
267 |
268 | **[08:16.09 → 08:20.09]** We're going to build on top of that with software.
269 |
270 | **[08:20.29 → 08:23.93]** Now, software on mobile phones is like software.
271 |
272 | **[08:23.93 → 08:25.55]** It's like baby software.
273 |
274 | **[08:26.47 → 08:28.17]** It's not so powerful.
275 |
276 | **[08:28.65 → 08:32.01]** And today, we're going to show you a software breakthrough.
277 |
278 | **[08:32.27 → 08:37.15]** Software that's at least five years ahead of what's on any other phone.
279 |
280 | **[08:37.41 → 08:38.45]** Now, how do we do this?
281 |
282 | **[08:38.51 → 08:41.03]** Well, we start with a strong foundation.
283 |
284 | **[08:41.79 → 08:43.87]** iPhone runs OS X.
285 |
286 | **[08:47.68 → 08:57.76]** Now, why would we want to run such a sophisticated operating system
287 |
288 | **[08:57.76 → 08:59.26]** on a mobile device?
289 |
290 | **[08:59.46 → 09:01.38]** Well, because it's got everything we need.
291 |
292 | **[09:01.94 → 09:03.50]** It's got multi-tasking.
293 |
294 | **[09:03.72 → 09:05.18]** It's got the best networking.
295 |
296 | **[09:05.52 → 09:07.70]** It already knows how to power manage.
297 |
298 | **[09:07.78 → 09:09.96]** We've been doing this on mobile computers for years.
299 |
300 | **[09:10.30 → 09:11.74]** It's got awesome security.
301 |
302 | **[09:12.06 → 09:13.28]** And to write apps.
303 |
304 | **[09:13.72 → 09:17.30]** It's got everything from Coco and the graphics.
305 |
306 | **[09:17.38 → 09:19.50]** And it's got core animation built in.
307 |
308 | **[09:19.66 → 09:23.58]** And it's got the audio and video that OS X is famous for.
309 |
310 | **[09:23.66 → 09:25.10]** It's got all the stuff we want.
311 |
312 | **[09:25.10 → 09:27.52]** And it's built right in to iPhone.
313 |
314 | **[09:27.76 → 09:32.82]** And that has let us create desktop class applications and networking.
315 |
316 | **[09:33.92 → 09:34.46]** Right?
317 |
318 | **[09:35.66 → 09:38.78]** Not the crippled stuff that you find on most phones.
319 |
320 | **[09:38.94 → 09:41.58]** This is real desktop class applications.
321 |
322 | **[09:42.28 → 09:46.74]** Now, you know, one of the pioneers of our industry, Alan Kay,
323 |
324 | **[09:46.98 → 09:49.44]** has had a lot of great quotes throughout the years.
325 |
326 | **[09:49.60 → 09:55.00]** And I ran across one of them recently that explains how we look at this.
327 |
328 | **[09:56.06 → 09:59.18]** Explains why we go about doing things the way we do.
329 |
330 | **[09:59.30 → 10:00.72]** Because we love software.
331 |
332 | **[10:01.90 → 10:02.96]** And here's the quote.
333 |
334 | **[10:03.44 → 10:07.50]** People who are really serious about software should make their own hardware.
335 |
336 | **[10:08.32 → 10:08.98]** You know?
337 |
338 | **[10:09.46 → 10:11.40]** Alan said this 30 years ago.
339 |
340 | **[10:11.96 → 10:13.76]** And this is how we feel about it.
341 |
342 | **[10:13.82 → 10:18.64]** And so we're bringing breakthrough software to a mobile device for the first time.
343 |
344 | **[10:18.76 → 10:22.14]** It's five years ahead of anything on any other phone.
345 |
346 | **[10:22.84 → 10:24.14]** The second thing we're doing
347 |
348 | **[10:24.14 → 10:25.84]** is we're learning from the iPod.
349 |
350 | **[10:26.24 → 10:27.30]** Syncing with iTunes.
351 |
352 | **[10:27.48 → 10:30.76]** You know, we're going to ship our hundred millionth iPod this year.
353 |
354 | **[10:31.20 → 10:34.78]** And that's tens of millions of people
355 |
356 | **[10:34.78 → 10:38.10]** that know how to sync these devices with their PCs or Mac
357 |
358 | **[10:38.10 → 10:41.72]** and sync all of their media right on to their iPod.
359 |
360 | **[10:41.82 → 10:42.34]** Right?
361 |
362 | **[10:42.46 → 10:44.72]** So you just drop your iPod in
363 |
364 | **[10:44.72 → 10:46.38]** and it automatically syncs.
365 |
366 | **[10:46.50 → 10:48.68]** You're going to do the same thing with iPhone.
367 |
368 | **[10:48.96 → 10:51.20]** It automatically syncs to your PC or Mac
369 |
370 | **[10:51.96 → 10:52.74]** right through iTunes.
371 |
372 | **[10:52.74 → 10:56.34]** And iTunes is going to sync all your media onto your iPhone.
373 |
374 | **[10:56.50 → 11:01.64]** Your music, your audiobooks, podcasts, movies, TV shows, music videos.
375 |
376 | **[11:01.92 → 11:03.96]** But it also syncs a ton of data.
377 |
378 | **[11:04.64 → 11:06.84]** Your contacts, your calendars, and your photos,
379 |
380 | **[11:06.94 → 11:08.28]** which you can get on your iPod today,
381 |
382 | **[11:08.44 → 11:11.86]** your notes, your bookmarks from your web browser,
383 |
384 | **[11:12.34 → 11:14.68]** your email accounts, your whole email setup,
385 |
386 | **[11:14.74 → 11:17.62]** all that stuff can be moved over to iPhone completely automatically.
387 |
388 | **[11:18.44 → 11:19.46]** It's really nice.
389 |
390 | **[11:19.64 → 11:22.44]** And we do it through iTunes.
391 |
392 | **[11:23.76 → 11:26.16]** Again, you go to iTunes and you set it up,
393 |
394 | **[11:26.22 → 11:28.46]** just like you'd set up an iPod or an Apple TV.
395 |
396 | **[11:28.96 → 11:31.46]** And you set up what you want synced to your iPhone.
397 |
398 | **[11:32.30 → 11:34.26]** And it's just like an iPod.
399 |
400 | **[11:35.14 → 11:36.30]** Charge and sync.
401 |
402 | **[11:36.78 → 11:37.92]** So sync with iTunes.
403 |
404 | **[11:40.02 → 11:42.24]** Third thing I want to talk about a little is design.
405 |
406 | **[11:43.08 → 11:46.38]** We've designed something wonderful for your hand.
407 |
408 | **[11:47.02 → 11:47.86]** Just wonderful.
409 |
410 | **[11:48.60 → 11:49.90]** And this is what it looks like.
411 |
412 | **[11:52.64 → 11:54.52]** It's got a three and a half inch screen.
413 |
414 | **[11:54.52 → 11:55.90]** It's really big.
415 |
416 | **[11:56.98 → 12:00.24]** And it's the highest resolution screen we've ever shipped.
417 |
418 | **[12:00.38 → 12:02.18]** It's 160 pixels per inch.
419 |
420 | **[12:02.82 → 12:04.14]** Highest we've ever shipped.
421 |
422 | **[12:04.26 → 12:04.82]** It's gorgeous.
423 |
424 | **[12:05.22 → 12:07.78]** And on the front, there's only one button down there.
425 |
426 | **[12:07.88 → 12:08.80]** We call it the home button.
427 |
428 | **[12:08.90 → 12:10.54]** It takes you home from wherever you are.
429 |
430 | **[12:10.70 → 12:11.56]** And that's it.
431 |
432 | **[12:12.50 → 12:13.66]** Let's take a look at the side.
433 |
434 | **[12:14.02 → 12:15.40]** It's really thin.
435 |
436 | **[12:15.60 → 12:20.26]** It's thinner than any smartphone out there at 11.6 millimeters.
437 |
438 | **[12:20.42 → 12:22.94]** Thinner than the Q, thinner than the Blackjack,
439 |
440 | **[12:23.02 → 12:24.18]** thinner than all of them.
441 |
442 | **[12:25.06 → 12:25.92]** It's really nice.
443 |
444 | **[12:26.54 → 12:28.28]** And we've got some controls on the side.
445 |
446 | **[12:28.40 → 12:29.88]** We've got a little switch for ring and silent.
447 |
448 | **[12:29.98 → 12:31.74]** We've got a volume up and down control.
449 |
450 | **[12:33.14 → 12:33.96]** Let's look at the back.
451 |
452 | **[12:34.76 → 12:36.44]** On the back, the biggest thing of note
453 |
454 | **[12:36.44 → 12:38.42]** is we've got a two megapixel camera built right in.
455 |
456 | **[12:42.40 → 12:44.12]** The other side, and we're back on the front.
457 |
458 | **[12:44.24 → 12:45.46]** So let's take a look at the top now.
459 |
460 | **[12:46.34 → 12:48.58]** We've got a headset jack.
461 |
462 | **[12:49.18 → 12:50.18]** Three and a half millimeter.
463 |
464 | **[12:50.38 → 12:52.02]** All your iPod headphones fit right in.
465 |
466 | **[12:53.32 → 12:55.62]** We've got a place, a little tray for your SIM card.
467 |
468 | **[12:56.20 → 12:57.44]** And we've got one switch
469 |
470 | **[12:57.78 → 12:58.36]** for sleep and wake.
471 |
472 | **[12:58.52 → 13:00.64]** Just push it to go to sleep, push it to wake up.
473 |
474 | **[13:01.84 → 13:02.86]** Let's take a look at the bottom.
475 |
476 | **[13:04.74 → 13:05.82]** We've got a speaker.
477 |
478 | **[13:07.12 → 13:08.20]** We've got a microphone.
479 |
480 | **[13:08.82 → 13:11.32]** And we've got our 30-pin iPod connector.
481 |
482 | **[13:12.40 → 13:13.46]** So that's the bottom.
483 |
484 | **[13:14.40 → 13:16.62]** Now, we've also got some stuff you can't see.
485 |
486 | **[13:17.32 → 13:20.88]** We've got three really advanced sensors built into this phone.
487 |
488 | **[13:21.14 → 13:22.84]** The first one is a proximity sensor.
489 |
490 | **[13:23.22 → 13:25.48]** It senses when physical objects get close.
491 |
492 | **[13:25.58 → 13:27.62]** So when you bring iPhone up to your ear,
493 |
494 | **[13:28.08 → 13:30.82]** take a phone call, it turns off the display,
495 |
496 | **[13:31.04 → 13:33.02]** and it turns off the touch sensor instantly.
497 |
498 | **[13:33.36 → 13:34.38]** Why do you want to do that?
499 |
500 | **[13:34.48 → 13:35.90]** Well, one, to save battery, but two,
501 |
502 | **[13:36.04 → 13:38.98]** so you don't get spurious inputs from your face into the touchscreen.
503 |
504 | **[13:39.22 → 13:40.40]** It just automatically turns them off.
505 |
506 | **[13:40.42 → 13:41.50]** Take it away, boom, it's back on.
507 |
508 | **[13:41.82 → 13:43.76]** So it's got a proximity sensor built in.
509 |
510 | **[13:43.78 → 13:45.28]** It's got an ambient light sensor as well.
511 |
512 | **[13:45.58 → 13:47.42]** We sense the ambient lighting conditions
513 |
514 | **[13:47.42 → 13:49.50]** and adjust the brightness of the display
515 |
516 | **[13:49.50 → 13:51.04]** to match the ambient lighting conditions.
517 |
518 | **[13:51.10 → 13:53.52]** Again, better user experience saves power.
519 |
520 | **[13:53.96 → 13:56.58]** And the third thing we've got is an accelerometer
521 |
522 | **[13:56.58 → 13:59.88]** so that we can tell when you switch from portrait to landscape.
523 |
```
--------------------------------------------------------------------------------
/completion_support.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Completion support for MCP servers.
3 |
4 | This module implements the argument completion system for the MCP (Model Control Protocol)
5 | servers, enabling interactive, context-aware autocompletion for tool arguments. The completion
6 | system helps users and LLMs efficiently use tools by suggesting valid values for arguments
7 | based on the current context.
8 |
9 | Key Components:
10 | - CompletionProvider (abstract): Base class for all completion providers
11 | - StaticCompletionProvider: Provides completions from predefined, static lists
12 | - DynamicCompletionProvider: Generates completions on-demand through callback functions
13 | - CompletionRegistry: Central registry managing providers for different tools
14 | - Utility functions: Helper functions for common completion scenarios (file paths, etc.)
15 |
16 | The module supports a flexible, extensible architecture where:
17 | 1. Each tool can register its own providers for different arguments
18 | 2. Static lists can be used for enumerated options (e.g., formats, modes)
19 | 3. Dynamic functions can be used for context-dependent values (files, users, etc.)
20 | 4. A fallback provider can handle common arguments across tools
21 |
22 | Usage Example:
23 | ```python
24 | # Create completion registry
25 | registry = CompletionRegistry()
26 |
27 | # Register dynamic provider for a tool
28 | registry.register_provider("database_tool", DynamicCompletionProvider({
29 | "table_name": async_db_tables_function,
30 | "column_name": async_table_columns_function
31 | }))
32 |
33 | # Set default provider for common arguments across all tools
34 | registry.set_default_provider(COMMON_COMPLETIONS)
35 |
36 | # Later, when handling MCP completion requests:
37 | completions = await registry.get_completions(
38 | tool_name="document_tool",
39 | argument_name="format",
40 | current_value="pd" # User has typed "pd" so far
41 | )
42 | # Returns: {"values": ["pdf"], "hasMore": False, "total": 1}
43 | ```
44 |
45 | This system integrates with the MCP server to provide real-time completion
46 | suggestions as users type, significantly improving usability and reducing errors.
47 | """
48 | from typing import Any, Callable, Dict, List, Optional
49 |
50 |
51 | class CompletionProvider:
52 | """
53 | Abstract base class defining the interface for argument completion providers.
54 |
55 | CompletionProvider serves as the foundation for all completion mechanisms in the MCP
56 | system. It defines a consistent interface that all provider implementations must follow,
57 | ensuring that consumers of completions can work with different providers interchangeably.
58 |
59 | To implement a custom completion provider:
60 | 1. Subclass CompletionProvider
61 | 2. Implement the get_completions() method to return suggestions
62 | 3. Implement the supports_argument() method to indicate what arguments your provider handles
63 |
64 | The framework includes two standard implementations:
65 | - StaticCompletionProvider: Uses predefined lists of values
66 | - DynamicCompletionProvider: Generates values dynamically via callback functions
67 |
68 | Custom implementations might include providers that:
69 | - Query external APIs for suggestions
70 | - Read from databases or other data sources
71 | - Implement complex filtering or ranking logic
72 | - Cache results for performance optimization
73 |
74 | Providers should handle any internal errors gracefully and return an empty list rather
75 | than raising exceptions that would disrupt the completion flow.
76 | """
77 |
78 | async def get_completions(self, argument_name: str, current_value: str, **context) -> List[str]:
79 | """
80 | Get completion suggestions for an argument.
81 |
82 | This method is called when the MCP system needs completions for an argument.
83 | It should return relevant suggestions based on the provided information.
84 |
85 | Args:
86 | argument_name: Name of the argument being completed (e.g., "file_path", "format")
87 | current_value: Current partial value entered by the user (may be empty)
88 | **context: Additional context that may affect completions, such as:
89 | - tool_name: The name of the tool requesting completions
90 | - Other argument values from the same tool call
91 | - User information, preferences, or permissions
92 | - Environment information
93 |
94 | Returns:
95 | List of string suggestions that are valid completions for the current_value.
96 | Return an empty list if no suggestions are available.
97 |
98 | Raises:
99 | NotImplementedError: If not implemented by a subclass
100 | """
101 | raise NotImplementedError("Subclasses must implement get_completions")
102 |
103 | def supports_argument(self, argument_name: str) -> bool:
104 | """
105 | Check if this provider supports completion for a given argument.
106 |
107 | This method allows the CompletionRegistry to quickly determine if this
108 | provider can handle completions for a specific argument without having
109 | to call get_completions and risk exceptions or empty results.
110 |
111 | Args:
112 | argument_name: Name of the argument to check for support
113 |
114 | Returns:
115 | True if this provider can provide completions for the argument,
116 | False otherwise
117 |
118 | Raises:
119 | NotImplementedError: If not implemented by a subclass
120 | """
121 | raise NotImplementedError("Subclasses must implement supports_argument")
122 |
123 |
124 | class StaticCompletionProvider(CompletionProvider):
125 | """
126 | Completion provider that returns predefined, static suggestion lists for arguments.
127 |
128 | This provider implements a straightforward approach to argument completion using
129 | predefined lists of suggestions for each supported argument name. When queried,
130 | it filters these static lists based on the current user input prefix. This makes it
131 | ideal for arguments with a fixed, known set of possible values like:
132 |
133 | - Enumerated options (e.g., file formats, provider names)
134 | - Common settings or modes (e.g., analysis types, priority levels)
135 | - Frequently used values that rarely change
136 |
137 | The provider automatically performs case-insensitive prefix matching on the
138 | predefined suggestions when a partial value is provided. For example, if
139 | completions include ["openai", "anthropic"] and the current_value is "open",
140 | only "openai" will be returned.
141 |
142 | Usage example:
143 | ```python
144 | # Create a provider with predefined completions for common arguments
145 | provider = StaticCompletionProvider({
146 | "format": ["json", "csv", "xml", "yaml"],
147 | "provider": ["openai", "anthropic", "cohere", "azure"],
148 | "priority": ["low", "medium", "high"]
149 | })
150 |
151 | # Later, get all format options
152 | completions = await provider.get_completions("format", "") # Returns all formats
153 |
154 | # Or get filtered provider options
155 | completions = await provider.get_completions("provider", "co") # Returns ["cohere"]
156 | ```
157 |
158 | For arguments that require dynamic or context-sensitive completions (like file paths
159 | or current database tables), use DynamicCompletionProvider instead.
160 | """
161 |
162 | def __init__(self, completions: Dict[str, List[str]]):
163 | """
164 | Initialize with static completion values.
165 |
166 | Args:
167 | completions: Dictionary mapping argument names to suggestion lists
168 | """
169 | self.completions = completions
170 |
171 | async def get_completions(self, argument_name: str, current_value: str, **context) -> List[str]:
172 | """Get completion suggestions from static values."""
173 | if not self.supports_argument(argument_name):
174 | return []
175 |
176 | # Filter suggestions based on current value
177 | suggestions = self.completions.get(argument_name, [])
178 | if current_value:
179 | return [s for s in suggestions if s.lower().startswith(current_value.lower())]
180 | return suggestions
181 |
182 | def supports_argument(self, argument_name: str) -> bool:
183 | """Check if static completions exist for this argument."""
184 | return argument_name in self.completions
185 |
186 |
187 | class DynamicCompletionProvider(CompletionProvider):
188 | """
189 | Completion provider that generates suggestions dynamically using callback functions.
190 |
191 | Unlike the StaticCompletionProvider which uses fixed lists, this provider calls
192 | specialized functions to generate completion suggestions on demand. This approach
193 | is essential for arguments whose valid values:
194 |
195 | - Depend on the current system state (e.g., existing files, running processes)
196 | - Vary based on user context or previous selections
197 | - Are too numerous to predefine (e.g., all possible file paths)
198 | - Require external API calls or database queries to determine
199 |
200 | The provider maps argument names to async callback functions that are responsible
201 | for generating appropriate suggestions based on the current partial input and context.
202 | Each callback function should accept at least two parameters:
203 | - current_value: The current partial input string
204 | - **context: Additional context information that may be useful for generating completions
205 |
206 | Usage example:
207 | ```python
208 | # Define completion functions
209 | async def complete_files(current_value, **context):
210 | # Custom logic to find matching files
211 | return ["file1.txt", "file2.txt", "folder/"]
212 |
213 | async def complete_users(current_value, **context):
214 | # Query database for matching users
215 | db = context.get("database")
216 | users = await db.query(f"SELECT username FROM users WHERE username LIKE '{current_value}%'")
217 | return [user.username for user in users]
218 |
219 | # Create dynamic provider with these functions
220 | provider = DynamicCompletionProvider({
221 | "file_path": complete_files,
222 | "username": complete_users
223 | })
224 | ```
225 |
226 | Each completion function should handle errors gracefully and return an empty list
227 | rather than raising exceptions. The provider will log errors but won't propagate them
228 | to the MCP completion API response.
229 | """
230 |
231 | def __init__(self, completion_functions: Dict[str, Callable]):
232 | """
233 | Initialize with dynamic completion functions.
234 |
235 | Args:
236 | completion_functions: Dictionary mapping argument names to completion functions.
237 | Each function should be an async function that accepts at least
238 | (current_value: str, **context) and returns a List[str] of suggestions.
239 | """
240 | self.completion_functions = completion_functions
241 |
242 | async def get_completions(self, argument_name: str, current_value: str, **context) -> List[str]:
243 | """Get completion suggestions by calling appropriate function."""
244 | if not self.supports_argument(argument_name):
245 | return []
246 |
247 | # Call the function to get suggestions
248 | func = self.completion_functions.get(argument_name)
249 | if func:
250 | try:
251 | suggestions = await func(current_value, **context)
252 | return suggestions
253 | except Exception as e:
254 | # Log error and return empty list
255 | print(f"Error getting completions for {argument_name}: {str(e)}")
256 | return []
257 | return []
258 |
259 | def supports_argument(self, argument_name: str) -> bool:
260 | """Check if a completion function exists for this argument."""
261 | return argument_name in self.completion_functions
262 |
263 |
264 | class CompletionRegistry:
265 | """
266 | Central registry managing completion providers for different tools and arguments.
267 |
268 | The CompletionRegistry serves as the core orchestration component of the MCP completion
269 | system, providing a unified interface for registration, management, and access to
270 | completion providers. It implements:
271 |
272 | 1. Tool-specific provider registration and lookup
273 | 2. A fallback mechanism through a default provider
274 | 3. Standardized response formatting according to the MCP specification
275 | 4. Error handling and graceful degradation
276 |
277 | This registry is designed to be the single entry point for all completion requests
278 | in an MCP server. Client code can register different providers for different tools,
279 | set up a default provider for common arguments, and then handle all completion requests
280 | through a single, consistent interface.
281 |
282 | Each tool in the system can have its own dedicated completion provider that understands
283 | the specific requirements and valid values for that tool's arguments. When no specific
284 | provider is registered for a tool, the registry falls back to the default provider,
285 | which typically handles common arguments like formats, providers, and models.
286 |
287 | Usage workflow:
288 | 1. Create a registry during server initialization
289 | 2. Register tool-specific providers for specialized tools
290 | 3. Set a default provider for common arguments
291 | 4. Use get_completions() to handle MCP completion protocol requests
292 |
293 | The registry ensures that all responses follow the MCP completion protocol format,
294 | even when errors occur or no completions are available, providing a consistent
295 | experience for clients.
296 | """
297 |
298 | def __init__(self):
299 | """Initialize the registry."""
300 | self.tool_providers = {} # Map of tool_name -> provider
301 | self.default_provider = None
302 |
303 | def register_provider(self, tool_name: str, provider: CompletionProvider):
304 | """
305 | Register a completion provider for a specific tool.
306 |
307 | This method associates a completion provider with a specific tool name in the registry.
308 | When the system receives completion requests for this tool, the registered provider
309 | will be used to generate suggestions for its arguments.
310 |
311 | Each tool can have exactly one registered provider. If a provider is already
312 | registered for the specified tool, it will be replaced by the new provider.
313 | This allows for dynamic reconfiguration of completion sources as needed.
314 |
315 | The provider can be either a StaticCompletionProvider for fixed option lists,
316 | a DynamicCompletionProvider for context-dependent suggestions, or any custom
317 | implementation of the CompletionProvider interface.
318 |
319 | Args:
320 | tool_name: Identifier of the tool to register a provider for. This should match
321 | the tool name used in MCP requests (e.g., "search_documents",
322 | "analyze_image").
323 | provider: The completion provider instance that will handle suggestion requests
324 | for this tool's arguments. Must implement the CompletionProvider interface.
325 |
326 | Note:
327 | If a tool requires completions for only some of its arguments, the provider
328 | should still be registered here, and its supports_argument() method should
329 | return False for unsupported arguments.
330 |
331 | Example:
332 | ```python
333 | # Register provider for search_documents tool
334 | registry.register_provider(
335 | "search_documents",
336 | StaticCompletionProvider({
337 | "source": ["web", "database", "local_files"],
338 | "sort_by": ["relevance", "date", "title"]
339 | })
340 | )
341 | ```
342 | """
343 | self.tool_providers[tool_name] = provider
344 |
345 | def set_default_provider(self, provider: CompletionProvider):
346 | """
347 | Set a default provider to handle arguments for tools without specific providers.
348 |
349 | This method establishes a fallback completion provider that is used when:
350 | 1. No tool-specific provider is registered for a requested tool
351 | 2. A registered provider exists but doesn't support the requested argument
352 |
353 | The default provider is typically configured to handle common arguments that
354 | appear across multiple tools, such as:
355 | - Provider names (e.g., "openai", "anthropic", "azure")
356 | - Model identifiers (e.g., "gpt-4o", "claude-3-5-haiku-20241022")
357 | - Common formats (e.g., "json", "csv", "markdown")
358 | - Universal settings (e.g., "temperature", "max_tokens")
359 |
360 | Only one default provider can be active at a time. Setting a new default
361 | provider replaces any previously set default.
362 |
363 | Args:
364 | provider: The completion provider instance to use as the default fallback
365 | for all tools without specific providers or for arguments not
366 | supported by their specific providers.
367 |
368 | Note:
369 | When no default provider is set and no tool-specific provider is found,
370 | the system will return an empty completion result rather than raising
371 | an error.
372 |
373 | Example:
374 | ```python
375 | # Set default provider for common arguments across all tools
376 | registry.set_default_provider(
377 | StaticCompletionProvider({
378 | "provider": ["openai", "anthropic", "cohere"],
379 | "temperature": ["0.0", "0.5", "0.7", "1.0"],
380 | "format": ["json", "text", "markdown"]
381 | })
382 | )
383 | ```
384 | """
385 | self.default_provider = provider
386 |
387 | def get_provider(self, tool_name: str) -> Optional[CompletionProvider]:
388 | """
389 | Retrieve the appropriate completion provider for a specific tool.
390 |
391 | This method implements the provider resolution logic for the registry,
392 | determining which completion provider should handle a given tool's
393 | argument completions. It follows this resolution sequence:
394 |
395 | 1. Look for a provider specifically registered for the requested tool
396 | 2. If no tool-specific provider exists, fall back to the default provider
397 | 3. If neither exists, return None
398 |
399 | This lookup process encapsulates the registry's fallback mechanism,
400 | allowing tool-specific providers to take precedence while ensuring
401 | that common arguments can still be handled by a default provider.
402 |
403 | Args:
404 | tool_name: The identifier of the tool to find a provider for, matching
405 | the name used when registering the provider.
406 |
407 | Returns:
408 | CompletionProvider: The appropriate provider to handle completions for the tool.
409 | This will be either:
410 | - The tool's specifically registered provider, if one exists
411 | - The default provider, if no tool-specific provider exists
412 | - None, if no applicable provider is found
413 |
414 | Note:
415 | This method is primarily used internally by get_completions(), but can also
416 | be called directly to check provider availability without requesting completions.
417 | """
418 | return self.tool_providers.get(tool_name, self.default_provider)
419 |
420 | async def get_completions(
421 | self,
422 | tool_name: str,
423 | argument_name: str,
424 | current_value: str,
425 | **context
426 | ) -> Dict[str, Any]:
427 | """
428 | Get completion suggestions for a tool argument with standardized response format.
429 |
430 | This method serves as the main API endpoint for the MCP completion protocol.
431 | It provides argument suggestions by:
432 | 1. Finding the appropriate provider for the requested tool
433 | 2. Checking if that provider supports the requested argument
434 | 3. Calling the provider's get_completions method if supported
435 | 4. Formatting the results according to the MCP specification
436 |
437 | If no provider exists for the tool, or the provider doesn't support the
438 | argument, an empty result structure is returned rather than raising an error.
439 | Additionally, any exceptions in the provider's completion logic are caught
440 | and result in an empty response with error logging.
441 |
442 | Args:
443 | tool_name: Name of the tool requesting completion suggestions
444 | argument_name: Name of the argument within the tool to provide completions for
445 | current_value: Current value or partial input entered by the user
446 | **context: Additional context that may be useful for generating completions,
447 | such as values of other arguments, user preferences, or environment info
448 |
449 | Returns:
450 | Dictionary conforming to the MCP completion protocol with these keys:
451 | - values: List of suggested completion values (strings), limited to 100 items
452 | - hasMore: Boolean indicating if more than 100 suggestions were available
453 | - total: Total number of suggestions actually included in the 'values' list
454 |
455 | Example response:
456 | ```python
457 | {
458 | "values": ["option1", "option2", "option3"],
459 | "hasMore": False,
460 | "total": 3
461 | }
462 | ```
463 |
464 | Note:
465 | The response is always structured as a valid MCP completion response, even
466 | when errors occur or no suggestions are available. This ensures clients
467 | always receive a predictable format.
468 | """
469 | provider = self.get_provider(tool_name)
470 |
471 | if not provider or not provider.supports_argument(argument_name):
472 | return {
473 | "values": [],
474 | "hasMore": False,
475 | "total": 0
476 | }
477 |
478 | try:
479 | # Get suggestions from provider
480 | suggestions = await provider.get_completions(
481 | argument_name=argument_name,
482 | current_value=current_value,
483 | tool_name=tool_name,
484 | **context
485 | )
486 |
487 | # Limit to 100 items as per MCP spec
488 | has_more = len(suggestions) > 100
489 | suggestions = suggestions[:100]
490 |
491 | return {
492 | "values": suggestions,
493 | "hasMore": has_more,
494 | "total": len(suggestions)
495 | }
496 | except Exception as e:
497 | # Log error and return empty result
498 | print(f"Error getting completions: {str(e)}")
499 | return {
500 | "values": [],
501 | "hasMore": False,
502 | "total": 0
503 | }
504 |
505 |
506 | # Example usage for file path completion
507 | async def complete_file_paths(current_value: str, **context) -> List[str]:
508 | """
509 | Generate filesystem path completion suggestions based on the current input.
510 |
511 | This utility function provides intelligent path suggestions for file-related arguments,
512 | making it easier for users to navigate and select files or directories in the filesystem.
513 | It handles various path formats including relative paths, absolute paths, and user home
514 | directory references.
515 |
516 | Behavior:
517 | - For empty input: Returns common starting points ("./", "../", "/")
518 | - For partial paths: Performs glob matching to find all matching files and directories
519 | - For directories: Appends a trailing slash to distinguish them from files
520 | - Expands user directory references (e.g., "~/documents" becomes "/home/user/documents")
521 |
522 | Path matching is case-sensitive or case-insensitive depending on the underlying filesystem.
523 | On Windows, matching is typically case-insensitive, while on Unix-like systems it's case-sensitive.
524 |
525 | The function handles permission errors gracefully - if a directory cannot be accessed due to
526 | permission restrictions, it will be excluded from results without raising an exception.
527 |
528 | Args:
529 | current_value: The current path string provided by the user (can be empty, partial, or complete)
530 | **context: Additional context that may influence completions:
531 | - working_directory: Optional alternative directory to use as the base for relative paths
532 | - file_extensions: Optional list of file extensions to filter results (e.g., [".py", ".txt"])
533 | - include_hidden: Optional boolean to include hidden files/directories (default: False)
534 |
535 | Returns:
536 | List of path suggestions that match or extend the current_value. Each suggestion is formatted as:
537 | - Regular files: The full path to the file (e.g., "./src/main.py")
538 | - Directories: The full path with a trailing slash (e.g., "./src/utils/")
539 |
540 | Examples:
541 | - Input: "" → Output: ["./", "../", "/"]
542 | - Input: "doc" → Output: ["documents/", "docker-compose.yml", "dockerfile"]
543 | - Input: "~/Down" → Output: ["/home/user/Downloads/"]
544 | - Input: "./src/" → Output: ["./src/main.py", "./src/utils/", "./src/tests/"]
545 |
546 | Edge Cases:
547 | - Symlinks: Followed to their target with the symlink path in the results
548 | - Special files: Included in results, treated as regular files
549 | - Non-existent paths: Returns partial matches based on the parent directory, if any
550 | - Permission errors: Silently skips directories that cannot be accessed
551 |
552 | Notes:
553 | - Results are capped at 100 items to comply with MCP specification
554 | - Directory suggestions always end with a trailing slash
555 | - The function handles filesystem errors gracefully, returning empty list on access errors
556 | """
557 | import glob
558 | import os
559 |
560 | # Handle empty value
561 | if not current_value:
562 | return ["./", "../", "/"]
563 |
564 | # Expand user directory if needed
565 | path = os.path.expanduser(current_value)
566 |
567 | # Get the directory to search in
568 | directory = os.path.dirname(path) if os.path.basename(path) else path
569 | if not directory:
570 | directory = "."
571 |
572 | # Get matching files/directories
573 | pattern = os.path.join(directory, f"{os.path.basename(path)}*")
574 | matches = glob.glob(pattern)
575 |
576 | # Format results
577 | results = []
578 | for match in matches:
579 | if os.path.isdir(match):
580 | results.append(f"{match}/")
581 | else:
582 | results.append(match)
583 |
584 | return results
585 |
586 |
587 | # Example completions for common arguments
588 | """
589 | Predefined completion provider for common argument types used across multiple tools.
590 |
591 | This global provider instance contains standardized suggestion lists for frequently
592 | used arguments in the MCP system. It serves as a convenient default provider that
593 | can be registered with the CompletionRegistry for tools that don't need specialized
594 | completion providers.
595 |
596 | The included completion lists cover:
597 | - provider: Common LLM provider names (e.g., "openai", "anthropic")
598 | - model: Popular model identifiers from various providers
599 | - format: Standard data formats for input/output
600 | - source_type: Common data source types for analysis tools
601 | - analysis_type: Standard categories of analysis operations
602 |
603 | Usage example:
604 | ```python
605 | # Set as the default provider in a registry to handle common arguments
606 | registry = CompletionRegistry()
607 | registry.set_default_provider(COMMON_COMPLETIONS)
608 |
609 | # Later, even for tools without specific providers, common arguments will work:
610 | await registry.get_completions(
611 | tool_name="any_tool",
612 | argument_name="provider",
613 | current_value="open" # Will match "openai"
614 | )
615 | ```
616 |
617 | This provider can be extended with additional arguments by creating a new
618 | StaticCompletionProvider that combines these defaults with tool-specific completions.
619 | """
620 | COMMON_COMPLETIONS = StaticCompletionProvider({
621 | "provider": ["openai", "anthropic", "gemini", "mistral", "custom"],
622 | "model": [
623 | "gpt-4-turbo", "gpt-4o", "claude-3-5-sonnet", "claude-3-opus",
624 | "gemini-1.5-pro", "gemini-1.5-flash", "mistral-large"
625 | ],
626 | "format": ["json", "text", "markdown", "html", "csv"],
627 | "source_type": ["csv", "json", "excel", "database", "api"],
628 | "analysis_type": ["general", "sentiment", "entities", "summary"]
629 | })
```
--------------------------------------------------------------------------------
/ultimate_mcp_server/core/ums_api/ums_services.py:
--------------------------------------------------------------------------------
```python
1 | """Business logic and service functions for UMS API."""
2 |
3 | import json
4 | import math
5 | import sqlite3
6 | from collections import Counter, defaultdict, deque
7 | from datetime import datetime
8 | from pathlib import Path
9 | from threading import Lock
10 | from typing import Any, Dict, List, Optional
11 |
12 | from .ums_models import (
13 | MemoryDetail,
14 | PreviewMemory,
15 | CriticalPathAction,
16 | FlameGraphNode,
17 | )
18 | from .ums_database import get_db_connection
19 |
20 |
21 | # ---------- Utility Functions ----------
22 |
23 | def format_file_size(size_bytes: int) -> str:
24 | """Format file size in human readable format"""
25 | if size_bytes == 0:
26 | return "0 B"
27 |
28 | size_names = ["B", "KB", "MB", "GB", "TB"]
29 | i = int(math.floor(math.log(size_bytes, 1024)))
30 | p = math.pow(1024, i)
31 | s = round(size_bytes / p, 2)
32 | return f"{s} {size_names[i]}"
33 |
34 |
35 | def _dict_depth(d: Dict[str, Any], depth: int = 0) -> int:
36 | if not isinstance(d, dict) or not d:
37 | return depth
38 | return max(_dict_depth(v, depth + 1) for v in d.values())
39 |
40 |
41 | def _count_values(d: Dict[str, Any]) -> int:
42 | cnt = 0
43 | for v in d.values():
44 | if isinstance(v, dict):
45 | cnt += _count_values(v)
46 | elif isinstance(v, list):
47 | cnt += len(v)
48 | else:
49 | cnt += 1
50 | return cnt
51 |
52 |
53 | def calculate_state_complexity(state_data: Dict[str, Any]) -> float:
54 | if not state_data:
55 | return 0.0
56 | comp = (
57 | len(state_data) * 5 + _dict_depth(state_data) * 10 + _count_values(state_data) * 0.5
58 | )
59 | return round(min(100.0, comp), 2)
60 |
61 |
62 | def compute_state_diff(a: Dict[str, Any], b: Dict[str, Any]) -> Dict[str, Any]:
63 | diff = {"added": {}, "removed": {}, "modified": {}, "magnitude": 0.0}
64 | keys = set(a) | set(b)
65 | changed = 0
66 | for k in keys:
67 | if k not in a:
68 | diff["added"][k] = b[k]
69 | changed += 1
70 | elif k not in b:
71 | diff["removed"][k] = a[k]
72 | changed += 1
73 | elif a[k] != b[k]:
74 | diff["modified"][k] = {"before": a[k], "after": b[k]}
75 | changed += 1
76 | if keys:
77 | diff["magnitude"] = (changed / len(keys)) * 100
78 | return diff
79 |
80 |
81 | # ---------- Action Monitor Helper Functions ----------
82 |
83 | def get_action_status_indicator(status: str, execution_time: float) -> dict:
84 | """Get status indicator with color and icon for action status"""
85 | indicators = {
86 | "running": {"color": "blue", "icon": "play", "label": "Running"},
87 | "executing": {"color": "blue", "icon": "cpu", "label": "Executing"},
88 | "in_progress": {"color": "orange", "icon": "clock", "label": "In Progress"},
89 | "completed": {"color": "green", "icon": "check", "label": "Completed"},
90 | "failed": {"color": "red", "icon": "x", "label": "Failed"},
91 | "cancelled": {"color": "gray", "icon": "stop", "label": "Cancelled"},
92 | "timeout": {"color": "yellow", "icon": "timer-off", "label": "Timeout"},
93 | }
94 |
95 | indicator = indicators.get(
96 | status, {"color": "gray", "icon": "help", "label": "Unknown"}
97 | )
98 |
99 | # Add urgency flag for long-running actions
100 | if (
101 | status in ["running", "executing", "in_progress"] and execution_time > 120
102 | ): # 2 minutes
103 | indicator["urgency"] = "high"
104 | elif (
105 | status in ["running", "executing", "in_progress"] and execution_time > 60
106 | ): # 1 minute
107 | indicator["urgency"] = "medium"
108 | else:
109 | indicator["urgency"] = "low"
110 |
111 | return indicator
112 |
113 |
114 | def categorize_action_performance(execution_time: float, estimated_duration: float) -> str:
115 | """Categorize action performance based on execution time vs estimate"""
116 | if estimated_duration <= 0:
117 | return "unknown"
118 |
119 | ratio = execution_time / estimated_duration
120 |
121 | if ratio <= 0.5:
122 | return "excellent"
123 | elif ratio <= 0.8:
124 | return "good"
125 | elif ratio <= 1.2:
126 | return "acceptable"
127 | elif ratio <= 2.0:
128 | return "slow"
129 | else:
130 | return "very_slow"
131 |
132 |
133 | def get_action_resource_usage(action_id: str) -> dict:
134 | """Get resource usage for an action (placeholder implementation)"""
135 | # This is a placeholder - in a real implementation, you'd fetch actual metrics
136 | return {"cpu_usage": 0.0, "memory_usage": 0.0, "network_io": 0.0, "disk_io": 0.0}
137 |
138 |
139 | def estimate_wait_time(position: int, queue: list) -> float:
140 | """Estimate wait time based on queue position and historical data"""
141 | if position == 0:
142 | return 0.0
143 | # Average action time of 30 seconds (this could be calculated from historical data)
144 | avg_action_time = 30.0
145 | return position * avg_action_time
146 |
147 |
148 | def get_priority_label(priority: int) -> str:
149 | """Get human-readable priority label"""
150 | if priority <= 1:
151 | return "Critical"
152 | elif priority <= 3:
153 | return "High"
154 | elif priority <= 5:
155 | return "Normal"
156 | elif priority <= 7:
157 | return "Low"
158 | else:
159 | return "Very Low"
160 |
161 |
162 | def calculate_action_performance_score(action: dict) -> float:
163 | """Calculate performance score for a completed action"""
164 | if action["status"] != "completed":
165 | return 0.0
166 |
167 | execution_time = action.get("execution_duration", 0)
168 | if execution_time <= 0:
169 | return 100.0
170 |
171 | if execution_time <= 5:
172 | return 100.0
173 | elif execution_time <= 15:
174 | return 90.0
175 | elif execution_time <= 30:
176 | return 80.0
177 | elif execution_time <= 60:
178 | return 70.0
179 | elif execution_time <= 120:
180 | return 60.0
181 | else:
182 | return max(50.0, 100.0 - (execution_time / 10))
183 |
184 |
185 | def calculate_efficiency_rating(execution_time: float, result_size: int) -> str:
186 | """Calculate efficiency rating based on time and output"""
187 | if execution_time <= 0:
188 | return "unknown"
189 |
190 | efficiency_score = result_size / execution_time if execution_time > 0 else 0
191 |
192 | if efficiency_score >= 100:
193 | return "excellent"
194 | elif efficiency_score >= 50:
195 | return "good"
196 | elif efficiency_score >= 20:
197 | return "fair"
198 | else:
199 | return "poor"
200 |
201 |
202 | def calculate_performance_summary(actions: list) -> dict:
203 | """Calculate performance summary from action history"""
204 | if not actions:
205 | return {
206 | "avg_score": 0.0,
207 | "top_performer": None,
208 | "worst_performer": None,
209 | "efficiency_distribution": {},
210 | }
211 |
212 | scores = [a.get("performance_score", 0) for a in actions]
213 | avg_score = sum(scores) / len(scores)
214 |
215 | best_action = max(actions, key=lambda a: a.get("performance_score", 0))
216 | worst_action = min(actions, key=lambda a: a.get("performance_score", 0))
217 |
218 | efficiency_counts = Counter(a.get("efficiency_rating", "unknown") for a in actions)
219 |
220 | return {
221 | "avg_score": round(avg_score, 2),
222 | "top_performer": {
223 | "tool_name": best_action.get("tool_name", ""),
224 | "score": best_action.get("performance_score", 0),
225 | },
226 | "worst_performer": {
227 | "tool_name": worst_action.get("tool_name", ""),
228 | "score": worst_action.get("performance_score", 0),
229 | },
230 | "efficiency_distribution": dict(efficiency_counts),
231 | }
232 |
233 |
234 | def generate_performance_insights(
235 | overall_stats: dict, tool_stats: list, hourly_metrics: list
236 | ) -> list:
237 | """Generate actionable performance insights"""
238 | insights = []
239 |
240 | success_rate = (
241 | overall_stats.get("successful_actions", 0) / overall_stats.get("total_actions", 1)
242 | ) * 100
243 | if success_rate < 80:
244 | insights.append(
245 | {
246 | "type": "warning",
247 | "title": "Low Success Rate",
248 | "message": f"Current success rate is {success_rate:.1f}%. Consider investigating failing tools.",
249 | "severity": "high",
250 | }
251 | )
252 |
253 | if tool_stats:
254 | slowest_tool = max(tool_stats, key=lambda t: t.get("avg_duration", 0))
255 | if slowest_tool.get("avg_duration", 0) > 60:
256 | insights.append(
257 | {
258 | "type": "info",
259 | "title": "Performance Optimization",
260 | "message": f"{slowest_tool['tool_name']} is taking {slowest_tool['avg_duration']:.1f}s on average. Consider optimization.",
261 | "severity": "medium",
262 | }
263 | )
264 |
265 | if hourly_metrics:
266 | peak_hour = max(hourly_metrics, key=lambda h: h.get("action_count", 0))
267 | insights.append(
268 | {
269 | "type": "info",
270 | "title": "Peak Usage",
271 | "message": f"Peak usage occurs at {peak_hour['hour']}:00 with {peak_hour['action_count']} actions.",
272 | "severity": "low",
273 | }
274 | )
275 |
276 | return insights
277 |
278 |
279 | # ---------- Memory Quality Functions ----------
280 |
281 | def find_cognitive_patterns(
282 | states: List[Dict[str, Any]], min_length: int, similarity_threshold: float
283 | ) -> List[Dict[str, Any]]:
284 | """Find recurring patterns in cognitive states"""
285 | patterns = []
286 | type_sequences = defaultdict(list)
287 | for state in states:
288 | type_sequences[state["state_type"]].append(state)
289 | for state_type, sequence in type_sequences.items():
290 | if len(sequence) >= min_length * 2:
291 | for length in range(min_length, len(sequence) // 2 + 1):
292 | for start in range(len(sequence) - length * 2 + 1):
293 | subseq1 = sequence[start : start + length]
294 | subseq2 = sequence[start + length : start + length * 2]
295 | similarity = calculate_sequence_similarity(subseq1, subseq2)
296 | if similarity >= similarity_threshold:
297 | patterns.append(
298 | {
299 | "type": f"repeating_{state_type}",
300 | "length": length,
301 | "similarity": similarity,
302 | "occurrences": 2,
303 | "first_occurrence": subseq1[0]["timestamp"],
304 | "pattern_description": f"Repeating {state_type} sequence of {length} states",
305 | }
306 | )
307 | return sorted(patterns, key=lambda p: p["similarity"], reverse=True)
308 |
309 |
310 | def calculate_sequence_similarity(
311 | seq1: List[Dict[str, Any]], seq2: List[Dict[str, Any]]
312 | ) -> float:
313 | """Calculate similarity between two state sequences"""
314 | if len(seq1) != len(seq2):
315 | return 0.0
316 | total_similarity = 0.0
317 | for s1, s2 in zip(seq1, seq2, strict=False):
318 | state_sim = calculate_single_state_similarity(s1, s2)
319 | total_similarity += state_sim
320 | return total_similarity / len(seq1)
321 |
322 |
323 | def calculate_single_state_similarity(
324 | state1: Dict[str, Any], state2: Dict[str, Any]
325 | ) -> float:
326 | """Calculate similarity between two individual states"""
327 | data1 = state1.get("state_data", {})
328 | data2 = state2.get("state_data", {})
329 | if not data1 and not data2:
330 | return 1.0
331 | if not data1 or not data2:
332 | return 0.0
333 | keys1 = set(data1.keys())
334 | keys2 = set(data2.keys())
335 | key_similarity = len(keys1 & keys2) / len(keys1 | keys2) if keys1 | keys2 else 1.0
336 | common_keys = keys1 & keys2
337 | value_similarity = 0.0
338 | if common_keys:
339 | matching_values = sum(1 for key in common_keys if data1[key] == data2[key])
340 | value_similarity = matching_values / len(common_keys)
341 | return (key_similarity + value_similarity) / 2
342 |
343 |
344 | def analyze_state_transitions(states: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
345 | """Analyze transitions between cognitive states"""
346 | transitions = defaultdict(int)
347 | for i in range(len(states) - 1):
348 | current_type = states[i]["state_type"]
349 | next_type = states[i + 1]["state_type"]
350 | transition = f"{current_type} → {next_type}"
351 | transitions[transition] += 1
352 | sorted_transitions = sorted(transitions.items(), key=lambda x: x[1], reverse=True)
353 | return [
354 | {
355 | "transition": transition,
356 | "count": count,
357 | "percentage": (count / (len(states) - 1)) * 100 if len(states) > 1 else 0,
358 | }
359 | for transition, count in sorted_transitions
360 | ]
361 |
362 |
363 | def detect_cognitive_anomalies(states: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
364 | """Detect anomalous cognitive states"""
365 | anomalies = []
366 | if len(states) < 3:
367 | return anomalies
368 | complexities = [calculate_state_complexity(s.get("state_data", {})) for s in states]
369 | avg_complexity = sum(complexities) / len(complexities)
370 | std_complexity = (
371 | sum((c - avg_complexity) ** 2 for c in complexities) / len(complexities)
372 | ) ** 0.5
373 | for i, state in enumerate(states):
374 | complexity = complexities[i]
375 | z_score = (
376 | (complexity - avg_complexity) / std_complexity if std_complexity > 0 else 0
377 | )
378 | if abs(z_score) > 2:
379 | anomalies.append(
380 | {
381 | "state_id": state["state_id"],
382 | "timestamp": state["timestamp"],
383 | "anomaly_type": "complexity_outlier",
384 | "z_score": z_score,
385 | "description": f"Unusual complexity: {complexity:.1f} (avg: {avg_complexity:.1f})",
386 | "severity": "high" if abs(z_score) > 3 else "medium",
387 | }
388 | )
389 | return anomalies
390 |
391 |
392 | # ---------- Working Memory System ----------
393 |
394 | class WorkingMemorySystem:
395 | """
396 | Working memory system for managing active memories with focus capabilities.
397 |
398 | This system maintains a pool of recent memories with relevance scoring
399 | and focus mode for filtering based on keywords or patterns.
400 | """
401 |
402 | def __init__(self, capacity: int = 100, focus_threshold: float = 0.7):
403 | self.capacity = capacity
404 | self.focus_threshold = focus_threshold
405 | self.memory_pool = deque(maxlen=capacity)
406 | self.focus_mode_enabled = False
407 | self.focus_keywords = []
408 | self.memory_index = {} # memory_id -> memory mapping
409 | self.category_index = defaultdict(list) # category -> [memory_ids]
410 | self.access_counts = defaultdict(int) # memory_id -> access count
411 | self.relevance_scores = {} # memory_id -> relevance score
412 | self.initialized_at = datetime.now()
413 | self.last_optimization = datetime.now()
414 | self.optimization_count = 0
415 |
416 | def add_memory(self, memory_id: str, content: str, category: str, importance: float = 5.0):
417 | """Add a memory to the working pool"""
418 | memory = {
419 | 'memory_id': memory_id,
420 | 'content': content,
421 | 'category': category,
422 | 'importance': importance,
423 | 'added_at': datetime.now().timestamp(),
424 | 'last_accessed': datetime.now().timestamp()
425 | }
426 |
427 | # Remove old memory if exists
428 | if memory_id in self.memory_index:
429 | self.remove_memory(memory_id)
430 |
431 | # Add to pool
432 | self.memory_pool.append(memory)
433 | self.memory_index[memory_id] = memory
434 | self.category_index[category].append(memory_id)
435 |
436 | # Calculate initial relevance
437 | self._calculate_relevance(memory)
438 |
439 | def remove_memory(self, memory_id: str):
440 | """Remove a memory from the working pool"""
441 | if memory_id in self.memory_index:
442 | memory = self.memory_index[memory_id]
443 | self.memory_pool.remove(memory)
444 | del self.memory_index[memory_id]
445 | self.category_index[memory['category']].remove(memory_id)
446 | if memory_id in self.relevance_scores:
447 | del self.relevance_scores[memory_id]
448 | if memory_id in self.access_counts:
449 | del self.access_counts[memory_id]
450 |
451 | def access_memory(self, memory_id: str):
452 | """Record memory access and update relevance"""
453 | if memory_id in self.memory_index:
454 | self.access_counts[memory_id] += 1
455 | self.memory_index[memory_id]['last_accessed'] = datetime.now().timestamp()
456 | self._calculate_relevance(self.memory_index[memory_id])
457 |
458 | def set_focus_mode(self, enabled: bool, keywords: List[str] = None):
459 | """Enable or disable focus mode with optional keywords"""
460 | self.focus_mode_enabled = enabled
461 | self.focus_keywords = keywords or []
462 |
463 | # Recalculate relevance for all memories
464 | for memory in self.memory_pool:
465 | self._calculate_relevance(memory)
466 |
467 | def _calculate_relevance(self, memory: dict):
468 | """Calculate relevance score for a memory"""
469 | base_score = memory['importance'] / 10.0 # Normalize to 0-1
470 |
471 | # Recency factor
472 | age_hours = (datetime.now().timestamp() - memory['added_at']) / 3600
473 | recency_factor = max(0.1, 1.0 - (age_hours / 24)) # Decay over 24 hours
474 |
475 | # Access frequency factor
476 | access_factor = min(1.0, self.access_counts[memory['memory_id']] / 10.0)
477 |
478 | # Focus mode factor
479 | focus_factor = 1.0
480 | if self.focus_mode_enabled and self.focus_keywords:
481 | content_lower = memory['content'].lower()
482 | keyword_matches = sum(1 for kw in self.focus_keywords if kw.lower() in content_lower)
483 | focus_factor = min(2.0, 1.0 + (keyword_matches * 0.5))
484 |
485 | # Calculate final score
486 | relevance = base_score * recency_factor * (0.5 + 0.5 * access_factor) * focus_factor
487 | self.relevance_scores[memory['memory_id']] = min(1.0, relevance)
488 |
489 | def get_active_memories(self, limit: int = None) -> List[dict]:
490 | """Get active memories sorted by relevance"""
491 | memories = list(self.memory_pool)
492 |
493 | # Filter by focus threshold if in focus mode
494 | if self.focus_mode_enabled:
495 | memories = [m for m in memories if self.relevance_scores.get(m['memory_id'], 0) >= self.focus_threshold]
496 |
497 | # Sort by relevance
498 | memories.sort(key=lambda m: self.relevance_scores.get(m['memory_id'], 0), reverse=True)
499 |
500 | if limit:
501 | memories = memories[:limit]
502 |
503 | return memories
504 |
505 | def get_statistics(self) -> dict:
506 | """Get working memory statistics"""
507 | active_memories = self.get_active_memories()
508 |
509 | # Category distribution
510 | category_dist = {}
511 | for category, memory_ids in self.category_index.items():
512 | category_dist[category] = len(memory_ids)
513 |
514 | # Calculate average relevance
515 | relevance_values = list(self.relevance_scores.values())
516 | avg_relevance = sum(relevance_values) / len(relevance_values) if relevance_values else 0
517 |
518 | return {
519 | 'total_memories': len(self.memory_pool),
520 | 'active_memories': len(active_memories),
521 | 'capacity_used': len(self.memory_pool) / self.capacity * 100,
522 | 'avg_relevance_score': avg_relevance,
523 | 'category_distribution': category_dist,
524 | 'total_accesses': sum(self.access_counts.values()),
525 | 'optimization_suggestions': self._get_optimization_suggestions()
526 | }
527 |
528 | def _get_optimization_suggestions(self) -> int:
529 | """Count optimization suggestions"""
530 | suggestions = 0
531 |
532 | # Check for low relevance memories
533 | low_relevance = sum(1 for score in self.relevance_scores.values() if score < 0.3)
534 | if low_relevance > self.capacity * 0.2: # More than 20% low relevance
535 | suggestions += 1
536 |
537 | # Check for stale memories
538 | now = datetime.now().timestamp()
539 | stale_memories = sum(1 for m in self.memory_pool if (now - m['last_accessed']) > 3600) # 1 hour
540 | if stale_memories > self.capacity * 0.3: # More than 30% stale
541 | suggestions += 1
542 |
543 | # Check for unbalanced categories
544 | if self.category_index:
545 | sizes = [len(ids) for ids in self.category_index.values()]
546 | if max(sizes) > sum(sizes) * 0.5: # One category has more than 50%
547 | suggestions += 1
548 |
549 | return suggestions
550 |
551 | def optimize(self):
552 | """Optimize working memory by removing low-relevance memories"""
553 | # Remove memories below threshold
554 | to_remove = [
555 | m['memory_id'] for m in self.memory_pool
556 | if self.relevance_scores.get(m['memory_id'], 0) < 0.2
557 | ]
558 |
559 | for memory_id in to_remove:
560 | self.remove_memory(memory_id)
561 |
562 | self.last_optimization = datetime.now()
563 | self.optimization_count += 1
564 |
565 | return len(to_remove)
566 |
567 |
568 | # Global working memory instance
569 | _working_memory_system = None
570 | _working_memory_lock = Lock()
571 |
572 |
573 | def get_working_memory_system() -> WorkingMemorySystem:
574 | """Get or create the global working memory system instance"""
575 | global _working_memory_system
576 |
577 | with _working_memory_lock:
578 | if _working_memory_system is None:
579 | _working_memory_system = WorkingMemorySystem()
580 | return _working_memory_system
581 |
582 |
583 | # ---------- Timeline Analysis Functions ----------
584 |
585 | def generate_timeline_segments(
586 | timeline_data: List[Dict[str, Any]], granularity: str, hours: int
587 | ) -> List[Dict[str, Any]]:
588 | """Generate timeline segments summarising state counts / complexity over time."""
589 | if not timeline_data:
590 | return []
591 |
592 | start_ts = min(item["timestamp"] for item in timeline_data)
593 | end_ts = max(item["timestamp"] for item in timeline_data)
594 |
595 | seg_seconds = 1 if granularity == "second" else 60 if granularity == "minute" else 3600
596 | segments: List[Dict[str, Any]] = []
597 | current = start_ts
598 |
599 | while current < end_ts:
600 | seg_end = current + seg_seconds
601 | seg_states = [it for it in timeline_data if current <= it["timestamp"] < seg_end]
602 | if seg_states:
603 | segments.append(
604 | {
605 | "start_time": current,
606 | "end_time": seg_end,
607 | "state_count": len(seg_states),
608 | "avg_complexity": sum(s["complexity_score"] for s in seg_states)
609 | / len(seg_states),
610 | "max_change_magnitude": max(s["change_magnitude"] for s in seg_states),
611 | "dominant_type": Counter(
612 | s["state_type"] for s in seg_states
613 | ).most_common(1)[0][0],
614 | }
615 | )
616 | current = seg_end
617 | return segments
618 |
619 |
620 | def calculate_timeline_stats(timeline_data: List[Dict[str, Any]]) -> Dict[str, Any]:
621 | """Return aggregate stats about timeline complexity / changes."""
622 | if not timeline_data:
623 | return {}
624 |
625 | complexities = [it["complexity_score"] for it in timeline_data]
626 | changes = [it["change_magnitude"] for it in timeline_data if it["change_magnitude"] > 0]
627 | stypes = Counter(it["state_type"] for it in timeline_data)
628 | return {
629 | "avg_complexity": sum(complexities) / len(complexities),
630 | "max_complexity": max(complexities),
631 | "avg_change_magnitude": (sum(changes) / len(changes)) if changes else 0,
632 | "max_change_magnitude": max(changes) if changes else 0,
633 | "most_common_type": stypes.most_common(1)[0][0] if stypes else None,
634 | "type_distribution": dict(stypes),
635 | }
636 |
637 |
638 | # ---------- Flame Graph Functions ----------
639 |
640 | def build_flame_graph_structure(actions: List[Dict], workflow_id: str) -> Dict:
641 | """Build hierarchical flame graph structure from actions"""
642 | total_duration = sum(action.get('duration', 0) for action in actions if action.get('duration'))
643 |
644 | flame_graph_data = {
645 | 'name': f'Workflow {workflow_id}',
646 | 'value': total_duration,
647 | 'children': []
648 | }
649 |
650 | # Group actions by tool for flame graph hierarchy
651 | tool_groups = {}
652 | for action in actions:
653 | tool_name = action.get('tool_name', 'unknown')
654 | if tool_name not in tool_groups:
655 | tool_groups[tool_name] = []
656 | tool_groups[tool_name].append(action)
657 |
658 | # Build hierarchical structure
659 | for tool_name, tool_actions in tool_groups.items():
660 | tool_duration = sum(action.get('duration', 0) for action in tool_actions if action.get('duration'))
661 |
662 | tool_node = {
663 | 'name': tool_name,
664 | 'value': tool_duration,
665 | 'children': []
666 | }
667 |
668 | # Add individual actions as children
669 | for action in tool_actions:
670 | if action.get('duration'):
671 | action_node = {
672 | 'name': f"Action {action['action_id']}",
673 | 'value': action['duration'],
674 | 'action_id': action['action_id'],
675 | 'status': action.get('status'),
676 | 'reasoning': action.get('reasoning', ''),
677 | 'started_at': action.get('started_at'),
678 | 'completed_at': action.get('completed_at')
679 | }
680 | tool_node['children'].append(action_node)
681 |
682 | flame_graph_data['children'].append(tool_node)
683 |
684 | return flame_graph_data
685 |
686 |
687 | def calculate_critical_path(actions: List[Dict]) -> List[Dict]:
688 | """Calculate the critical path through the workflow"""
689 | if not actions:
690 | return []
691 |
692 | # Sort actions by start time
693 | sorted_actions = sorted(actions, key=lambda x: x.get('started_at', 0))
694 |
695 | critical_path = []
696 | current_time = min(action['started_at'] for action in sorted_actions if action.get('started_at'))
697 | workflow_end = max(action['completed_at'] for action in sorted_actions if action.get('completed_at'))
698 |
699 | while current_time < workflow_end:
700 | # Find action that was running at current_time and ends latest
701 | running_actions = [
702 | a for a in sorted_actions
703 | if a.get('started_at', 0) <= current_time and a.get('completed_at', 0) > current_time
704 | ]
705 |
706 | if running_actions:
707 | # Find the action that ends latest (most critical)
708 | critical_action = max(running_actions, key=lambda x: x.get('completed_at', 0))
709 | if critical_action not in [cp['action_id'] for cp in critical_path]:
710 | critical_path.append({
711 | 'action_id': critical_action['action_id'],
712 | 'tool_name': critical_action.get('tool_name'),
713 | 'duration': critical_action.get('duration', 0),
714 | 'start_time': critical_action.get('started_at'),
715 | 'end_time': critical_action.get('completed_at')
716 | })
717 | current_time = critical_action.get('completed_at', current_time + 1)
718 | else:
719 | # No action running, find next action start
720 | future_actions = [a for a in sorted_actions if a.get('started_at', 0) > current_time]
721 | if future_actions:
722 | current_time = min(a['started_at'] for a in future_actions)
723 | else:
724 | break
725 |
726 | return critical_path
727 |
728 |
729 | def convert_to_model(node: Dict) -> FlameGraphNode:
730 | """Convert flame graph dictionary to Pydantic model"""
731 | return FlameGraphNode(
732 | name=node['name'],
733 | value=node['value'],
734 | children=[convert_to_model(child) for child in node.get('children', [])],
735 | action_id=node.get('action_id'),
736 | status=node.get('status'),
737 | reasoning=node.get('reasoning'),
738 | started_at=node.get('started_at'),
739 | completed_at=node.get('completed_at')
740 | )
741 |
742 |
743 | # ---------- Performance Recommendation Functions ----------
744 |
745 | def calculate_tool_reliability_score(tool_stats: dict) -> float:
746 | """Calculate reliability score for a tool"""
747 | total_calls = tool_stats.get('total_calls', 0)
748 | successful_calls = tool_stats.get('successful_calls', 0)
749 |
750 | if total_calls == 0:
751 | return 0.0
752 |
753 | success_rate = successful_calls / total_calls
754 | volume_factor = min(1.0, total_calls / 100) # Normalize by 100 calls
755 |
756 | return round(success_rate * volume_factor * 100, 2)
757 |
758 |
759 | def categorize_tool_performance(avg_execution_time: float) -> str:
760 | """Categorize tool performance based on average execution time"""
761 | if avg_execution_time is None:
762 | return 'unknown'
763 |
764 | if avg_execution_time <= 5:
765 | return 'fast'
766 | elif avg_execution_time <= 15:
767 | return 'normal'
768 | elif avg_execution_time <= 30:
769 | return 'slow'
770 | else:
771 | return 'very_slow'
```
--------------------------------------------------------------------------------
/ultimate_mcp_server/services/prompts.py:
--------------------------------------------------------------------------------
```python
1 | """Prompt template service for managing and rendering prompt templates."""
2 | import asyncio
3 | import json
4 | import os
5 | import threading
6 | from pathlib import Path
7 | from typing import Any, Dict, Optional
8 |
9 | from ultimate_mcp_server.config import get_config
10 | from ultimate_mcp_server.exceptions import PromptTemplateError
11 | from ultimate_mcp_server.utils.logging import get_logger
12 |
13 | logger = get_logger(__name__)
14 |
15 | # Singleton instance
16 | _prompt_service = None
17 |
18 |
19 | def get_prompt_service():
20 | """Get the global prompt service instance."""
21 | global _prompt_service
22 | if _prompt_service is None:
23 | _prompt_service = PromptService()
24 | return _prompt_service
25 |
26 |
27 | class PromptService:
28 | """
29 | Service for managing, storing, rendering, and versioning prompt templates.
30 |
31 | The PromptService provides a centralized system for handling prompt templates
32 | used throughout the MCP application. It manages the entire lifecycle of prompt
33 | templates, from loading them from disk to rendering them with variables for use
34 | with language models. The service provides both persistent storage and runtime
35 | management of templates.
36 |
37 | Key Features:
38 | - File-based template storage using both .txt and .json formats
39 | - Runtime template registration and modification
40 | - Variable substitution for dynamic prompt generation
41 | - Categorization of templates for organizational purposes
42 | - Asynchronous persistence to avoid blocking operations
43 | - Error handling and logging for template issues
44 |
45 | Template Organization:
46 | Templates are organized using a naming convention where the prefix before the
47 | first underscore represents the category (e.g., "rag_query" belongs to the "rag"
48 | category). This categorization is used when saving templates to disk, with each
49 | category stored in its own JSON file.
50 |
51 | File Formats:
52 | - Individual .txt files: One template per file, filename is the template name
53 | - JSON files: Multiple templates in a single file, typically grouped by category
54 |
55 | This service employs a singleton pattern, ensuring only one instance exists
56 | across the application. Always use the get_prompt_service() or get_prompt_manager()
57 | functions to access it, rather than instantiating directly.
58 |
59 | Usage Example:
60 | ```python
61 | # Get the service
62 | prompt_service = get_prompt_service()
63 |
64 | # Retrieve a template
65 | template = prompt_service.get_template("rag_query")
66 |
67 | # Register a new template
68 | prompt_service.register_template(
69 | "greeting",
70 | "Hello {name}, welcome to {service_name}!"
71 | )
72 |
73 | # Render a template with variables
74 | greeting = prompt_service.render_template(
75 | "greeting",
76 | {"name": "Alice", "service_name": "Ultimate MCP"}
77 | )
78 | ```
79 |
80 | Note:
81 | All file operations are handled with proper error handling to ensure
82 | the service continues functioning even if individual template files
83 | are corrupted or missing.
84 | """
85 |
86 | def __init__(self):
87 | """Initialize the prompt service.
88 |
89 | Args:
90 | templates_dir: Directory containing template files
91 | """
92 | self.templates: Dict[str, str] = {}
93 | try:
94 | config = get_config()
95 | self.templates_dir = config.prompt_templates_directory
96 | logger.info(f"Initializing PromptService. Looking for templates in: {self.templates_dir}")
97 | self._load_templates()
98 | except Exception as e:
99 | logger.error(f"Failed to initialize PromptService: {e}", exc_info=True)
100 | # Allow service to exist even if loading fails, get_template will raise errors
101 |
102 | # Create templates directory if it doesn't exist
103 | os.makedirs(self.templates_dir, exist_ok=True)
104 |
105 | # Read templates from files
106 | self._read_templates()
107 | logger.info(f"Prompt service initialized with {len(self.templates)} templates")
108 |
109 | def _load_templates(self):
110 | """
111 | Load prompt templates from individual .txt files in the templates directory.
112 |
113 | This method scans the configured templates directory for .txt files and loads
114 | each file as a separate template. It uses the filename (without extension)
115 | as the template name and the file content as the template text. This provides
116 | a simple way to manage templates as individual files, which can be useful for
117 | version control and template organization.
118 |
119 | The loading process:
120 | 1. Verifies the templates directory exists and is accessible
121 | 2. Scans for all .txt files using glob pattern matching
122 | 3. For each file:
123 | - Extracts the template name from the filename
124 | - Reads the file content as the template text
125 | - Adds the template to the in-memory template dictionary
126 | - Logs the successful load
127 | 4. Handles exceptions for each file individually to prevent a single corrupted
128 | file from blocking all template loading
129 | 5. Logs summary information about the loading process
130 |
131 | This approach allows templates to be:
132 | - Managed individually in separate files
133 | - Edited directly using text editors
134 | - Organized in a flat structure for simplicity
135 | - Added or removed without changing code
136 |
137 | The method is called during service initialization but can be called again
138 | to refresh templates from disk if needed.
139 |
140 | Note:
141 | This method only processes .txt files. JSON format templates are handled
142 | by the separate _read_templates method. Both methods work together to
143 | provide a complete template loading solution.
144 | """
145 | if not Path(self.templates_dir).is_dir():
146 | logger.warning(f"Prompt templates directory not found or not a directory: {self.templates_dir}")
147 | return
148 |
149 | loaded_count = 0
150 | for filepath in Path(self.templates_dir).glob('*.txt'):
151 | try:
152 | template_name = filepath.stem # Use filename without extension as name
153 | with open(filepath, 'r', encoding='utf-8') as f:
154 | content = f.read()
155 | self.templates[template_name] = content
156 | logger.debug(f"Loaded prompt template: {template_name}")
157 | loaded_count += 1
158 | except Exception as e:
159 | logger.error(f"Failed to load prompt template {filepath.name}: {e}")
160 |
161 | if loaded_count > 0:
162 | logger.info(f"Successfully loaded {loaded_count} prompt templates.")
163 | else:
164 | logger.info("No prompt templates found or loaded.")
165 |
166 | def _read_templates(self) -> None:
167 | """
168 | Load prompt templates from JSON files in the templates directory.
169 |
170 | This method complements _load_templates by handling template collections
171 | stored in JSON format. It scans for .json files in the templates directory
172 | and processes each file to extract multiple templates. Each JSON file can
173 | contain multiple templates, organized as a dictionary with template names
174 | as keys and template content as values.
175 |
176 | The loading process:
177 | 1. Scans the templates directory for all .json files
178 | 2. For each JSON file:
179 | - Parses the JSON content
180 | - Extracts each key-value pair as a template name and content
181 | - Handles both simple string templates and structured template objects
182 | - Adds valid templates to the in-memory template collection
183 | 3. Logs detailed information about successful and failed loads
184 |
185 | Template JSON Format Support:
186 | - Simple format: `{"template_name": "Template content with {variables}"}`
187 | - Structured format: `{"template_name": {"text": "Template content", ...}}`
188 | where the template text is extracted from the "text" field
189 |
190 | The JSON format is particularly useful for:
191 | - Storing multiple related templates in a single file
192 | - Organizing templates by category or function
193 | - Including metadata or configuration alongside templates
194 | - Efficiently managing large collections of templates
195 |
196 | Error Handling:
197 | - Each JSON file is processed independently, so errors in one file won't
198 | prevent loading from other files
199 | - Invalid template formats trigger warnings but don't halt processing
200 | - JSON parse errors are logged with file information for easier debugging
201 |
202 | Note:
203 | This method works in conjunction with _load_templates to provide a
204 | comprehensive template loading system supporting both individual
205 | .txt files and collections in .json files.
206 | """
207 | try:
208 | template_files = list(Path(self.templates_dir).glob("*.json"))
209 | logger.info(f"Found {len(template_files)} template files")
210 |
211 | for template_file in template_files:
212 | try:
213 | with open(template_file, "r", encoding="utf-8") as f:
214 | templates_data = json.load(f)
215 |
216 | # Add templates from file
217 | for template_name, template_content in templates_data.items():
218 | if isinstance(template_content, str):
219 | self.templates[template_name] = template_content
220 | elif isinstance(template_content, dict) and "text" in template_content:
221 | self.templates[template_name] = template_content["text"]
222 | else:
223 | logger.warning(f"Invalid template format for {template_name}")
224 |
225 | logger.info(f"Loaded templates from {template_file.name}")
226 | except Exception as e:
227 | logger.error(f"Error loading template file {template_file.name}: {str(e)}")
228 | except Exception as e:
229 | logger.error(f"Error reading templates: {str(e)}")
230 |
231 | def _save_templates(self) -> None:
232 | """
233 | Persist all in-memory templates to disk in categorized JSON files.
234 |
235 | This method implements the template persistence strategy, organizing templates
236 | by category and saving them to appropriate JSON files on disk. It ensures that
237 | any runtime changes to templates (additions, modifications, or deletions) are
238 | preserved across application restarts.
239 |
240 | The saving process:
241 | 1. Groups templates into categories based on naming conventions
242 | - Extracts the category from the template name (prefix before first underscore)
243 | - Templates without underscores go to the "general" category
244 | 2. For each category:
245 | - Creates or updates a JSON file named "{category}_templates.json"
246 | - Writes all templates in that category as a formatted JSON object
247 | - Uses proper indentation for human readability
248 | 3. Logs detailed information about the save operation
249 |
250 | Template Categorization:
251 | The method uses a convention-based approach to categorize templates:
252 | - "rag_query" → Category: "rag", saved to "rag_templates.json"
253 | - "chat_system" → Category: "chat", saved to "chat_templates.json"
254 | - "greeting" → Category: "general", saved to "general_templates.json"
255 |
256 | This categorization approach:
257 | - Keeps related templates together for easier management
258 | - Avoids a single monolithic file for all templates
259 | - Makes it easier to locate templates by purpose
260 | - Reduces the chance of merge conflicts in version control
261 |
262 | Error Handling:
263 | - The entire save operation is wrapped in exception handling to prevent
264 | crashes due to file system issues
265 | - Detailed error information is logged for debugging
266 | - Even if saving fails, the in-memory templates remain intact
267 |
268 | Note:
269 | This method is called both directly and asynchronously through
270 | _async_save_templates to provide both immediate and non-blocking
271 | persistence options.
272 | """
273 | try:
274 | # Group templates by category
275 | categorized_templates: Dict[str, Dict[str, Any]] = {}
276 |
277 | for template_name, template_text in self.templates.items():
278 | # Extract category from template name (before first _)
279 | parts = template_name.split("_", 1)
280 | category = parts[0] if len(parts) > 1 else "general"
281 |
282 | if category not in categorized_templates:
283 | categorized_templates[category] = {}
284 |
285 | categorized_templates[category][template_name] = template_text
286 |
287 | # Save each category to its own file
288 | for category, templates in categorized_templates.items():
289 | file_path = Path(self.templates_dir) / f"{category}_templates.json"
290 |
291 | with open(file_path, "w", encoding="utf-8") as f:
292 | json.dump(templates, f, indent=2)
293 |
294 | logger.info(f"Saved {len(templates)} templates to {file_path.name}")
295 |
296 | except Exception as e:
297 | logger.error(f"Error saving templates: {str(e)}")
298 |
299 | def get_template(self, template_name: str) -> Optional[str]:
300 | """
301 | Retrieve a specific prompt template by its name.
302 |
303 | This method provides access to individual templates stored in the service.
304 | It performs a simple dictionary lookup, returning the template text if found
305 | or None if the requested template doesn't exist in the collection.
306 |
307 | The lookup is exact and case-sensitive, with no fuzzy matching or fallback
308 | behavior. This design ensures predictable template resolution, which is
309 | important for maintaining consistent prompt behavior in production systems.
310 |
311 | Args:
312 | template_name: The exact name of the template to retrieve
313 |
314 | Returns:
315 | The template text as a string if found, None if not found
316 |
317 | Usage Example:
318 | ```python
319 | template = prompt_service.get_template("rag_query")
320 | if template:
321 | # Template found, use it
322 | prompt = template.format(query="What is machine learning?")
323 | else:
324 | # Template not found, handle the error
325 | logger.error(f"Template 'rag_query' not found")
326 | prompt = "Default fallback prompt: {query}"
327 | ```
328 |
329 | Note:
330 | Consider checking the return value for None before using the template,
331 | or use a default template as a fallback to handle missing templates
332 | gracefully.
333 | """
334 | return self.templates.get(template_name)
335 |
336 | def get_all_templates(self) -> Dict[str, str]:
337 | """
338 | Retrieve a copy of all available prompt templates.
339 |
340 | This method returns a dictionary containing all templates currently loaded
341 | in the service, with template names as keys and template texts as values.
342 | The returned dictionary is a shallow copy of the internal templates collection,
343 | ensuring that modifications to the returned dictionary won't affect the
344 | service's template storage.
345 |
346 | Use cases for this method include:
347 | - Listing available templates in an admin interface
348 | - Analyzing or processing multiple templates at once
349 | - Creating a template catalog or documentation
350 | - Debugging template availability issues
351 |
352 | Returns:
353 | A dictionary mapping template names to their content
354 |
355 | Usage Example:
356 | ```python
357 | all_templates = prompt_service.get_all_templates()
358 |
359 | # Display available templates
360 | print(f"Available templates ({len(all_templates)}): ")
361 | for name in sorted(all_templates.keys()):
362 | print(f" - {name}")
363 |
364 | # Find templates by pattern
365 | rag_templates = {
366 | name: content
367 | for name, content in all_templates.items()
368 | if name.startswith("rag_")
369 | }
370 | ```
371 |
372 | Note:
373 | While the dictionary is a copy, the template strings themselves
374 | are not deep-copied. This is generally not an issue since strings
375 | are immutable in Python.
376 | """
377 | return self.templates.copy()
378 |
379 | def register_template(self, template_name: str, template_text: str) -> bool:
380 | """
381 | Register a new template or update an existing one in the template collection.
382 |
383 | This method adds a new template to the in-memory template collection or updates
384 | an existing template if the name already exists. After adding or updating the
385 | template, it initiates an asynchronous save operation to persist the changes
386 | to disk, ensuring durability without blocking the calling code.
387 |
388 | The template registration process:
389 | 1. Adds or updates the template in the in-memory dictionary
390 | 2. Schedules an asynchronous task to save all templates to disk
391 | 3. Returns a success indicator
392 |
393 | This method is the primary way to programmatically add or modify templates
394 | at runtime, enabling dynamic template management without requiring file
395 | system access or application restarts.
396 |
397 | Template Naming Conventions:
398 | While not enforced, it's recommended to follow these naming conventions:
399 | - Use lowercase names with underscores for readability
400 | - Prefix with category name for organizational purposes (e.g., "rag_query")
401 | - Use descriptive names that indicate the template's purpose
402 |
403 | Args:
404 | template_name: Name for the template (used for later retrieval)
405 | template_text: Content of the template with variable placeholders
406 |
407 | Returns:
408 | True if the template was successfully registered, False if an error occurred
409 |
410 | Usage Example:
411 | ```python
412 | # Register a simple greeting template
413 | success = prompt_service.register_template(
414 | "greeting_formal",
415 | "Dear {title} {last_name},\n\nI hope this message finds you well."
416 | )
417 |
418 | # Register a more complex template with formatting options
419 | success = prompt_service.register_template(
420 | "invoice_summary",
421 | "Invoice #{invoice_id}\nDate: {date}\nTotal: ${amount:.2f}\n\n{items}"
422 | )
423 | ```
424 |
425 | Note:
426 | This method handles the persistence automatically through an asynchronous
427 | save operation. The changes are immediately available in memory but may
428 | take a moment to be written to disk.
429 | """
430 | try:
431 | self.templates[template_name] = template_text
432 |
433 | # Schedule template save
434 | asyncio.create_task(self._async_save_templates())
435 |
436 | return True
437 | except Exception as e:
438 | logger.error(f"Error registering template {template_name}: {str(e)}")
439 | return False
440 |
441 | async def _async_save_templates(self) -> None:
442 | """
443 | Asynchronously persist templates to disk without blocking the main execution flow.
444 |
445 | This method provides a non-blocking way to save templates by delegating to
446 | the synchronous _save_templates method. It's designed to be called from
447 | contexts where immediate persistence is desired but blocking operations
448 | would be problematic, such as during API request handling.
449 |
450 | When called:
451 | - The method executes _save_templates directly rather than creating a task
452 | - Despite being async, it doesn't actually perform any async operations
453 | - This approach simplifies the interface while maintaining consistent
454 | method signatures
455 |
456 | Usage Context:
457 | This method is typically called after template modifications to ensure
458 | changes are persisted, such as after:
459 | - Registering new templates
460 | - Updating existing templates
461 | - Removing templates
462 |
463 | Since saving templates is an I/O-bound operation that involves disk writes,
464 | this async wrapper helps to:
465 | - Prevent UI freezing in interactive contexts
466 | - Avoid blocking the event loop in server contexts
467 | - Return control quickly to the calling code
468 | - Ensure template persistence happens reliably in the background
469 |
470 | Note:
471 | While designed for asynchronous usage, this implementation currently
472 | performs blocking I/O. In a future optimization, this could be changed
473 | to use true async file I/O using libraries like aiofiles.
474 | """
475 | self._save_templates()
476 |
477 | def remove_template(self, template_name: str) -> bool:
478 | """
479 | Remove a template from the collection if it exists.
480 |
481 | This method deletes a template from the in-memory template collection
482 | and initiates an asynchronous save operation to persist the deletion to disk.
483 | If the specified template doesn't exist, the method returns False but
484 | doesn't raise an exception, following a fail-soft approach for easier
485 | error handling.
486 |
487 | The template removal process:
488 | 1. Checks if the template exists in the collection
489 | 2. If found, removes it from the in-memory dictionary
490 | 3. Schedules an asynchronous task to save the updated template collection
491 | 4. Returns a boolean indicating success or failure
492 |
493 | This method enables runtime management of templates, allowing obsolete
494 | or incorrect templates to be removed without requiring file system access
495 | or application restarts.
496 |
497 | Args:
498 | template_name: Name of the template to remove
499 |
500 | Returns:
501 | True if the template was found and removed, False if it wasn't found
502 |
503 | Usage Example:
504 | ```python
505 | # Check if removal was successful
506 | if prompt_service.remove_template("outdated_template"):
507 | logger.info("Template successfully removed")
508 | else:
509 | logger.warning("Template not found, nothing to remove")
510 |
511 | # Unconditional removal attempt (ignoring result)
512 | prompt_service.remove_template("temporary_template")
513 | ```
514 |
515 | Note:
516 | The template is immediately removed from memory but the disk
517 | persistence happens asynchronously. If the application crashes
518 | immediately after this call, the template might still exist in
519 | the persisted files when the application restarts.
520 | """
521 | if template_name in self.templates:
522 | del self.templates[template_name]
523 |
524 | # Schedule template save
525 | asyncio.create_task(self._async_save_templates())
526 |
527 | return True
528 | return False
529 |
530 | def render_template(
531 | self,
532 | template_name: str,
533 | variables: Dict[str, Any]
534 | ) -> Optional[str]:
535 | """
536 | Render a prompt template by substituting variables into the template text.
537 |
538 | This method performs dynamic template rendering using Python's string formatting
539 | system. It takes a template by name and a dictionary of variables, substitutes
540 | the variables into the template placeholders, and returns the fully rendered text
541 | ready for use with language models or other downstream components.
542 |
543 | The rendering process:
544 | 1. Retrieves the template by name from the template repository
545 | 2. Validates that the template exists
546 | 3. Performs variable substitution using Python's str.format() method
547 | 4. Handles any errors that occur during substitution
548 |
549 | Template Format:
550 | Templates use Python's string formatting syntax with curly braces:
551 | - Simple variables: "Hello, {name}!"
552 | - Nested attributes: "Author: {book.author}"
553 | - Formatting options: "Score: {score:.2f}"
554 |
555 | Error Handling:
556 | The method has comprehensive error handling for common issues:
557 | - Missing templates: Returns None with a warning log
558 | - Missing variables: Logs the specific missing variable and returns None
559 | - Format errors: Logs the formatting error and returns None
560 |
561 | Variable handling:
562 | - All variables must be provided in the variables dictionary
563 | - Variable types should be compatible with string formatting
564 | - Complex objects can be used if they have string representations
565 |
566 | Args:
567 | template_name: Name of the template to render
568 | variables: Dictionary mapping variable names to their values
569 |
570 | Returns:
571 | Rendered template text with variables substituted, or None if rendering fails
572 |
573 | Example:
574 | ```python
575 | # Define a template
576 | service.register_template(
577 | "user_profile",
578 | "Name: {name}\nAge: {age}\nRole: {role}"
579 | )
580 |
581 | # Render with variables
582 | profile = service.render_template(
583 | "user_profile",
584 | {"name": "Alice", "age": 30, "role": "Administrator"}
585 | )
586 | # Result: "Name: Alice\nAge: 30\nRole: Administrator"
587 | ```
588 | """
589 | template = self.get_template(template_name)
590 | if not template:
591 | logger.warning(f"Template {template_name} not found")
592 | return None
593 |
594 | try:
595 | return template.format(**variables)
596 | except KeyError as e:
597 | logger.error(f"Missing variable in template {template_name}: {str(e)}")
598 | return None
599 | except Exception as e:
600 | logger.error(f"Error rendering template {template_name}: {str(e)}")
601 | return None
602 |
603 | # Global instance
604 | _prompt_manager_instance = None
605 | _prompt_manager_lock = threading.Lock()
606 |
607 | def get_prompt_manager() -> PromptService:
608 | """
609 | Get or create the global thread-safe singleton PromptService instance.
610 |
611 | This function implements a thread-safe singleton pattern for the PromptService,
612 | ensuring that only one instance is created and shared across the entire application,
613 | regardless of which thread accesses it. It uses a mutex lock to prevent race conditions
614 | when multiple threads attempt to create the instance simultaneously.
615 |
616 | The singleton pattern ensures all components throughout the application use the same
617 | prompt template repository and caching system, providing consistent behavior across
618 | different threads and request contexts.
619 |
620 | Returns:
621 | PromptService: The global singleton PromptService instance.
622 |
623 | Example:
624 | ```python
625 | # This will always return the same instance, even from different threads
626 | prompt_manager = get_prompt_manager()
627 | template = prompt_manager.render_template("greeting", {"name": "User"})
628 | ```
629 | """
630 | global _prompt_manager_instance
631 | if _prompt_manager_instance is None:
632 | with _prompt_manager_lock:
633 | if _prompt_manager_instance is None:
634 | _prompt_manager_instance = PromptService()
635 | return _prompt_manager_instance
636 |
637 | # Example Usage
638 | if __name__ == '__main__':
639 | from ultimate_mcp_server.utils.logging import setup_logging
640 |
641 | setup_logging(log_level="DEBUG")
642 |
643 | # Create dummy templates dir and file for example
644 | EXAMPLE_TEMPLATES_DIR = Path("./temp_prompt_templates_example")
645 | EXAMPLE_TEMPLATES_DIR.mkdir(exist_ok=True)
646 | (EXAMPLE_TEMPLATES_DIR / "greeting.txt").write_text("Hello, {{name}}! How are you today?")
647 | (EXAMPLE_TEMPLATES_DIR / "summary.txt").write_text("Summarize the following text:\n\n{{text}}")
648 |
649 | # Set env var to use this temp dir
650 | os.environ['GATEWAY_PROMPT_TEMPLATES_DIR'] = str(EXAMPLE_TEMPLATES_DIR.resolve())
651 | os.environ['GATEWAY_FORCE_CONFIG_RELOAD'] = 'true' # Force reload
652 |
653 | try:
654 | manager = get_prompt_manager()
655 | print(f"Templates directory: {manager.templates_dir}")
656 | print(f"Available templates: {manager.list_templates()}")
657 |
658 | greeting_template = manager.get_template('greeting')
659 | print(f"Greeting Template: {greeting_template}")
660 |
661 | try:
662 | manager.get_template('non_existent')
663 | except PromptTemplateError as e:
664 | print(f"Caught expected error: {e}")
665 |
666 | finally:
667 | # Clean up
668 | import shutil
669 | shutil.rmtree(EXAMPLE_TEMPLATES_DIR)
670 | print(f"Cleaned up {EXAMPLE_TEMPLATES_DIR}")
671 | if 'GATEWAY_PROMPT_TEMPLATES_DIR' in os.environ:
672 | del os.environ['GATEWAY_PROMPT_TEMPLATES_DIR']
673 | if 'GATEWAY_FORCE_CONFIG_RELOAD' in os.environ:
674 | del os.environ['GATEWAY_FORCE_CONFIG_RELOAD']
```
--------------------------------------------------------------------------------
/ultimate_mcp_server/core/tournaments/utils.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Utility functions for tournament functionality.
3 | """
4 |
5 | import difflib
6 | import logging
7 | import re
8 | from datetime import datetime, timezone
9 | from pathlib import Path
10 | from typing import Any, Dict, List, Literal, Optional
11 |
12 | from ultimate_mcp_server.core.models.tournament import (
13 | EvaluatorConfig,
14 | ModelResponseData,
15 | TournamentData,
16 | TournamentStatus,
17 | )
18 |
19 | # For file write, if using a tool:
20 | from ultimate_mcp_server.tools.filesystem import write_file
21 |
22 | logger = logging.getLogger(__name__)
23 |
24 |
25 | def create_round_prompt(
26 | tournament: TournamentData,
27 | round_num: int,
28 | previous_round_variant_responses: Dict[
29 | str, ModelResponseData
30 | ], # Now takes full ModelResponseData
31 | target_model_variant_id: Optional[str] = None, # For per-variant system prompts etc. (future)
32 | ) -> str:
33 | """Creates the prompt for a specific round."""
34 | if round_num == 0:
35 | return tournament.config.prompt
36 |
37 | # --- Build prompt with previous round's responses ---
38 | base_prompt_header = f"""This is Round {round_num} of an iterative refinement process.
39 | Original Problem:
40 | ---
41 | {tournament.config.prompt}
42 | ---
43 |
44 | In the previous round (Round {round_num - 1}), different model variants produced the following outputs.
45 | Your goal is to synthesize the best aspects, address weaknesses, and produce a superior solution.
46 | """
47 |
48 | responses_section = []
49 | for variant_id, resp_data in previous_round_variant_responses.items():
50 | # For code tournaments, use extracted code if available and valid for prompting.
51 | # For text, use full response_text.
52 | content_to_show = ""
53 | if tournament.config.tournament_type == "code":
54 | # Prioritize clean extracted code for next round's prompt
55 | content_to_show = (
56 | resp_data.extracted_code if resp_data.extracted_code else resp_data.response_text
57 | )
58 | if (
59 | not content_to_show or len(content_to_show.strip()) < 10
60 | ): # Heuristic for empty/trivial code
61 | content_to_show = resp_data.response_text # Fallback to full text if code is bad
62 | content_to_show = (
63 | f"```python\n{content_to_show.strip()}\n```"
64 | if content_to_show
65 | else "[No valid code extracted]"
66 | )
67 | else: # Text tournament
68 | content_to_show = (
69 | resp_data.response_text if resp_data.response_text else "[No response text]"
70 | )
71 |
72 | if resp_data.error:
73 | content_to_show += f"\n[Note: This variant encountered an error: {resp_data.error}]"
74 |
75 | # Show overall score if available
76 | score_info = ""
77 | if resp_data.overall_score is not None:
78 | score_info = f" (Overall Score: {resp_data.overall_score:.2f})"
79 |
80 | responses_section.append(
81 | f"--- Output from Variant: {variant_id}{score_info} ---\n{content_to_show.strip()}\n"
82 | )
83 |
84 | # --- Add type-specific instructions ---
85 | if tournament.config.tournament_type == "code":
86 | instructions = """
87 | Carefully analyze all previous code solutions. Consider correctness, efficiency, readability, robustness, and how well they integrate good ideas.
88 | Produce a NEW, complete Python implementation that is demonstrably better than any single prior solution.
89 | Provide ONLY the Python code block, enclosed in triple backticks (```python ... ```).
90 | Do not include any explanations outside of code comments.
91 | """
92 | else: # Text tournament
93 | instructions = """
94 | Analyze each previous response based on the original problem. Consider relevance, accuracy, completeness, clarity, conciseness, and style.
95 | Synthesize the best aspects of ALL responses into a single, improved response.
96 | Your new response should be superior to any individual response from the previous round.
97 | You MAY optionally start your response with a brief (1-2 sentences) explanation of your synthesis choices, enclosed in <thinking>...</thinking> tags.
98 | Then, provide the improved text response itself.
99 | """
100 |
101 | final_prompt = f"{base_prompt_header}\n{''.join(responses_section)}\n---Refinement Instructions---\n{instructions}"
102 | return final_prompt.strip()
103 |
104 |
105 | async def extract_thinking(response_text: str) -> Optional[str]:
106 | """Extracts <thinking>...</thinking> block. More robust extraction can be added."""
107 | if not response_text:
108 | return None
109 | match = re.search(r"<thinking>(.*?)</thinking>", response_text, re.DOTALL | re.IGNORECASE)
110 | return match.group(1).strip() if match else None
111 |
112 |
113 | async def save_model_response_content(
114 | tournament_storage_path: Path,
115 | round_num: int,
116 | variant_id: str,
117 | response_text: Optional[str],
118 | extracted_code: Optional[str],
119 | thinking_process: Optional[str],
120 | metrics: Dict[str, Any],
121 | tournament_type: Literal["code", "text"],
122 | ) -> Dict[str, Optional[str]]:
123 | """Saves response text, extracted code, and metadata to files."""
124 | round_dir = tournament_storage_path / f"round_{round_num}"
125 | round_dir.mkdir(parents=True, exist_ok=True)
126 |
127 | sanitized_variant_id = re.sub(r"[^a-zA-Z0-9_\-.]", "_", variant_id)
128 | base_filename = f"{sanitized_variant_id}_r{round_num}"
129 |
130 | # --- Main Markdown Report File ---
131 | md_content = f"# Response: {variant_id} - Round {round_num}\n\n"
132 | md_content += "## Metrics\n"
133 | for k, v in metrics.items():
134 | if isinstance(v, float):
135 | md_content += f"- **{k.replace('_', ' ').title()}:** {v:.4f}\n"
136 | else:
137 | md_content += f"- **{k.replace('_', ' ').title()}:** {v}\n"
138 |
139 | if thinking_process:
140 | md_content += f"\n## Thinking Process\n```\n{thinking_process}\n```\n"
141 |
142 | md_content += f"\n## Full Response Text\n```\n{response_text or '[No response text]'}\n```\n"
143 |
144 | if tournament_type == "code" and extracted_code:
145 | md_content += f"\n## Extracted Code\n```python\n{extracted_code}\n```\n"
146 |
147 | md_file_path = round_dir / f"{base_filename}_report.md"
148 | md_file_path.write_text(md_content, encoding="utf-8")
149 |
150 | saved_paths = {"markdown_file": str(md_file_path), "code_file": None}
151 |
152 | # --- Save Raw Extracted Code (if any) ---
153 | if tournament_type == "code" and extracted_code:
154 | code_file_path = round_dir / f"{base_filename}.py"
155 | code_file_path.write_text(extracted_code, encoding="utf-8")
156 | saved_paths["code_file"] = str(code_file_path)
157 |
158 | logger.debug(f"Saved response artifacts for {variant_id} to {round_dir}")
159 | return saved_paths
160 |
161 |
162 | def generate_comparison_file_content(tournament: TournamentData, round_num: int) -> Optional[str]:
163 | if round_num < 0 or round_num >= len(tournament.rounds_results):
164 | return None
165 | round_result = tournament.rounds_results[round_num]
166 | if not round_result.responses:
167 | return None
168 |
169 | content = f"# Tournament Comparison Report - Round {round_num}\n\n"
170 | content += f"**Tournament:** {tournament.name} (ID: {tournament.tournament_id})\n"
171 | content += f"**Type:** {tournament.config.tournament_type}\n"
172 | content += f"**Generated:** {datetime.now(timezone.utc).isoformat()}\n\n"
173 |
174 | content += "## Round Summary & Scores\n"
175 | content += (
176 | "| Variant ID | Overall Score | Key Metrics (e.g., Cost, Latency) | Evaluator Scores |\n"
177 | )
178 | content += (
179 | "|------------|---------------|-----------------------------------|------------------|\n"
180 | )
181 |
182 | sorted_responses = sorted(
183 | round_result.responses.items(),
184 | key=lambda item: item[1].overall_score if item[1].overall_score is not None else -1,
185 | reverse=True,
186 | )
187 |
188 | for variant_id, resp_data in sorted_responses:
189 | score_str = (
190 | f"{resp_data.overall_score:.2f}" if resp_data.overall_score is not None else "N/A"
191 | )
192 | cost = resp_data.metrics.get("cost", 0.0)
193 | latency = resp_data.metrics.get("latency_ms", "N/A")
194 | key_metrics = f"Cost: ${cost:.4f}, Latency: {latency}ms"
195 |
196 | eval_scores_str = (
197 | "; ".join(
198 | [
199 | f"{eval_id}: {s_data.get('score', 'N/A')}"
200 | for eval_id, s_data in resp_data.scores.items()
201 | ]
202 | )
203 | if resp_data.scores
204 | else "N/A"
205 | )
206 |
207 | content += f"| {variant_id} | {score_str} | {key_metrics} | {eval_scores_str} |\n"
208 | content += "\n"
209 |
210 | # --- Add Diffs (Proposal 6) ---
211 | if round_num > 0 and tournament.config.tournament_type == "code":
212 | content += "## Code Diffs from Previous Best (if applicable)\n"
213 | prev_round_best_code = None
214 | # Find best code from previous round (simplistic: first non-error, highest score)
215 | if round_num - 1 >= 0:
216 | prev_round_data = tournament.rounds_results[round_num - 1]
217 | best_prev_resp = max(
218 | filter(
219 | lambda r: r.extracted_code and r.overall_score is not None,
220 | prev_round_data.responses.values(),
221 | ),
222 | key=lambda r: r.overall_score,
223 | default=None,
224 | )
225 | if best_prev_resp:
226 | prev_round_best_code = best_prev_resp.extracted_code
227 |
228 | current_best_resp = max(
229 | filter(
230 | lambda r: r.extracted_code and r.overall_score is not None,
231 | round_result.responses.values(),
232 | ),
233 | key=lambda r: r.overall_score,
234 | default=None,
235 | )
236 | current_best_code = current_best_resp.extracted_code if current_best_resp else None
237 |
238 | if prev_round_best_code and current_best_code:
239 | diff = difflib.unified_diff(
240 | prev_round_best_code.splitlines(keepends=True),
241 | current_best_code.splitlines(keepends=True),
242 | fromfile=f"round_{round_num - 1}_best.py",
243 | tofile=f"round_{round_num}_best.py",
244 | lineterm="",
245 | )
246 | content += f"### Diff: Best of Round {round_num - 1} vs Best of Round {round_num}\n"
247 | content += "```diff\n"
248 | content += "".join(diff)
249 | content += "\n```\n\n"
250 | elif current_best_code:
251 | content += "Could not determine previous round's best code for diffing, or this is the first round with code.\n"
252 | # TODO: Add HTML diff for text tournaments if a library is available.
253 |
254 | content += "## Detailed Variant Responses\n"
255 | for variant_id, resp_data in sorted_responses:
256 | content += f"### Variant: {variant_id}\n"
257 | content += f"- **Original Model:** {resp_data.model_id_original}\n"
258 | content += (
259 | f"- **Overall Score:** {resp_data.overall_score:.2f}\n"
260 | if resp_data.overall_score is not None
261 | else "- **Overall Score:** N/A\n"
262 | )
263 | content += "#### Metrics:\n"
264 | for k, v in resp_data.metrics.items():
265 | content += f" - {k}: {v}\n"
266 | content += "#### Evaluator Scores:\n"
267 | if resp_data.scores:
268 | for eval_id, s_data in resp_data.scores.items():
269 | content += f" - **{eval_id}**: Score: {s_data.get('score', 'N/A')}\n - Details: {s_data.get('details', 'N/A')[:200]}...\n" # Truncate details
270 | else:
271 | content += " - No scores available.\n"
272 |
273 | if resp_data.thinking_process:
274 | content += f"#### Thinking Process:\n```\n{resp_data.thinking_process}\n```\n"
275 |
276 | content_key = (
277 | "Extracted Code" if tournament.config.tournament_type == "code" else "Response Text"
278 | )
279 | code_lang_hint = "python" if tournament.config.tournament_type == "code" else ""
280 | actual_content = (
281 | resp_data.extracted_code
282 | if tournament.config.tournament_type == "code" and resp_data.extracted_code
283 | else resp_data.response_text
284 | )
285 |
286 | content += f"#### {content_key}:\n```{code_lang_hint}\n{actual_content or '[Content not available]'}\n```\n"
287 | if resp_data.response_file_path: # Link to the full report for this variant
288 | # Make path relative to tournament storage root for portability
289 | try:
290 | tournament_root = Path(tournament.storage_path)
291 | relative_path = Path(resp_data.response_file_path).relative_to(
292 | tournament_root.parent
293 | ) # one level up for `round_X/file`
294 | content += f"\n[View Full Variant Report](./{relative_path})\n"
295 | except ValueError: # If not relative (e.g. absolute path)
296 | content += f"\n[View Full Variant Report]({resp_data.response_file_path})\n"
297 |
298 | content += "\n---\n"
299 | return content
300 |
301 |
302 | def generate_leaderboard_file_content(tournament: TournamentData, round_num: int) -> Optional[str]:
303 | """Generates a leaderboard summary for the current round."""
304 | if round_num < 0 or round_num >= len(tournament.rounds_results):
305 | return None
306 | round_result = tournament.rounds_results[round_num]
307 | if not round_result.responses:
308 | return None
309 |
310 | content = f"# Leaderboard - Round {round_num}\n\n"
311 | content += f"**Tournament:** {tournament.name}\n"
312 | content += f"**Primary Metric(s):** {', '.join([e.evaluator_id for e in tournament.config.evaluators if e.primary_metric]) or 'Overall Score'}\n\n"
313 |
314 | content += "| Rank | Variant ID | Overall Score | Primary Metric Score(s) |\n"
315 | content += "|------|------------|---------------|-------------------------|\n"
316 |
317 | # Sort by overall_score, then by primary metric if tied (more complex sorting can be added)
318 | sorted_responses = sorted(
319 | round_result.responses.values(),
320 | key=lambda r: r.overall_score if r.overall_score is not None else -float("inf"),
321 | reverse=True,
322 | )
323 |
324 | for i, resp_data in enumerate(sorted_responses):
325 | rank = i + 1
326 | score_str = (
327 | f"{resp_data.overall_score:.2f}" if resp_data.overall_score is not None else "N/A"
328 | )
329 |
330 | primary_scores_list = []
331 | for eval_cfg in tournament.config.evaluators:
332 | if eval_cfg.primary_metric and eval_cfg.evaluator_id in resp_data.scores:
333 | primary_scores_list.append(
334 | f"{eval_cfg.evaluator_id}: {resp_data.scores[eval_cfg.evaluator_id].get('score', 'N/A')}"
335 | )
336 | primary_metrics_str = "; ".join(primary_scores_list) or "N/A"
337 |
338 | content += (
339 | f"| {rank} | {resp_data.model_id_variant} | {score_str} | {primary_metrics_str} |\n"
340 | )
341 |
342 | return content
343 |
344 |
345 | def calculate_weighted_score(
346 | scores: Dict[str, Dict[str, Any]], evaluator_configs: List[EvaluatorConfig]
347 | ) -> Optional[float]:
348 | """Calculates a single weighted overall score from multiple evaluator scores."""
349 | if not scores or not evaluator_configs:
350 | return None
351 |
352 | total_score = 0.0
353 | total_weight = 0.0
354 |
355 | for eval_cfg in evaluator_configs:
356 | eval_id = eval_cfg.evaluator_id
357 | if eval_id in scores:
358 | score_data = scores[eval_id]
359 | # Assuming 'score' is the primary numerical output of an evaluator
360 | numerical_score = score_data.get("score")
361 | if isinstance(numerical_score, (int, float)):
362 | total_score += numerical_score * eval_cfg.weight
363 | total_weight += eval_cfg.weight
364 | else:
365 | logger.warning(
366 | f"Evaluator '{eval_id}' provided non-numeric score: {numerical_score}"
367 | )
368 |
369 | if total_weight == 0:
370 | # If no weights or no valid scores, average non-weighted if any scores present
371 | valid_scores = [
372 | s.get("score") for s in scores.values() if isinstance(s.get("score"), (int, float))
373 | ]
374 | return sum(valid_scores) / len(valid_scores) if valid_scores else None
375 |
376 | return total_score / total_weight
377 |
378 |
379 | def update_overall_best_response(tournament: TournamentData):
380 | """Identifies and updates the tournament's overall best response across all completed rounds."""
381 | current_best_score = -float("inf")
382 | if (
383 | tournament.overall_best_response
384 | and tournament.overall_best_response.overall_score is not None
385 | ):
386 | current_best_score = tournament.overall_best_response.overall_score
387 |
388 | new_best_found = False
389 | for round_result in tournament.rounds_results:
390 | if round_result.status == TournamentStatus.COMPLETED:
391 | for _, resp_data in round_result.responses.items():
392 | if resp_data.overall_score is not None and not resp_data.error:
393 | if resp_data.overall_score > current_best_score:
394 | tournament.overall_best_response = resp_data
395 | current_best_score = resp_data.overall_score
396 | new_best_found = True
397 |
398 | if new_best_found:
399 | logger.info(
400 | f"New overall best response for tournament '{tournament.name}' found: {tournament.overall_best_response.model_id_variant} with score {current_best_score:.2f}"
401 | )
402 |
403 |
404 | def calculate_code_metrics(code: Optional[str]) -> dict:
405 | """
406 | Calculates basic metrics about a code string.
407 | """
408 | if not code:
409 | return {
410 | "code_lines": 0,
411 | "code_size_kb": 0.0,
412 | "function_count": 0,
413 | "class_count": 0,
414 | "import_count": 0,
415 | }
416 |
417 | code_lines = code.count("\n") + 1
418 | code_size_bytes = len(code.encode("utf-8"))
419 | code_size_kb = round(code_size_bytes / 1024, 2)
420 | function_count = len(re.findall(r"\bdef\s+\w+", code))
421 | class_count = len(re.findall(r"\bclass\s+\w+", code))
422 | import_count = len(re.findall(r"^import\s+|\bfrom\s+", code, re.MULTILINE))
423 |
424 | return {
425 | "code_lines": code_lines,
426 | "code_size_kb": code_size_kb,
427 | "function_count": function_count,
428 | "class_count": class_count,
429 | "import_count": import_count,
430 | }
431 |
432 |
433 | def generate_comparison_file(tournament: TournamentData, round_num: int) -> Optional[str]:
434 | """Generate a markdown comparison file for the given round.
435 |
436 | Args:
437 | tournament: The tournament data.
438 | round_num: The round number to generate the comparison for.
439 |
440 | Returns:
441 | The markdown content string, or None if data is missing.
442 | """
443 | if round_num < 0 or round_num >= len(tournament.rounds_results):
444 | logger.warning(f"Cannot generate comparison for invalid round {round_num}")
445 | return None
446 |
447 | round_result = tournament.rounds_results[round_num]
448 | if not round_result.responses:
449 | logger.warning(f"Cannot generate comparison for round {round_num}, no responses found.")
450 | return None
451 |
452 | previous_round = tournament.rounds_results[round_num - 1] if round_num > 0 else None
453 | is_code_tournament = tournament.config.tournament_type == "code"
454 |
455 | # Start with a comprehensive header
456 | timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
457 | comparison_content = f"# Tournament Comparison - Round {round_num}\n\n"
458 | comparison_content += f"**Generated:** {timestamp}\n"
459 | comparison_content += f"**Tournament ID:** {tournament.tournament_id}\n"
460 | comparison_content += f"**Tournament Name:** {tournament.config.name}\n"
461 | comparison_content += f"**Type:** {tournament.config.tournament_type}\n"
462 | comparison_content += f"**Current Round:** {round_num} of {tournament.config.rounds}\n"
463 | comparison_content += (
464 | f"**Models:** {', '.join(model.model_id for model in tournament.config.models)}\n\n"
465 | )
466 |
467 | # Add original prompt section
468 | if round_num == 0:
469 | comparison_content += f"## Original Prompt\n\n```\n{tournament.config.prompt}\n```\n\n"
470 | else:
471 | # For later rounds, show what was provided to the models
472 | comparison_content += f"## Round {round_num} Prompt\n\n"
473 | # Get a sample prompt - all models get the same prompt in a round
474 | sample_prompt = create_round_prompt(tournament, round_num)
475 | comparison_content += f"```\n{sample_prompt[:500]}...\n```\n\n"
476 |
477 | # Summarize overall metrics
478 | comparison_content += "## Summary Metrics\n\n"
479 | comparison_content += "| Model | Tokens In | Tokens Out | Cost | Latency (ms) |\n"
480 | comparison_content += "|-------|-----------|------------|------|-------------|\n"
481 |
482 | for model_id, response_data in sorted(round_result.responses.items()):
483 | metrics = response_data.metrics
484 | tokens_in = metrics.get("input_tokens", "N/A")
485 | tokens_out = metrics.get("output_tokens", "N/A")
486 | cost = metrics.get("cost", "N/A")
487 | latency = metrics.get("latency_ms", "N/A")
488 |
489 | display_model_id = model_id.split(":")[-1] if ":" in model_id else model_id
490 | cost_display = f"${cost:.6f}" if isinstance(cost, (int, float)) else cost
491 |
492 | comparison_content += (
493 | f"| {display_model_id} | {tokens_in} | {tokens_out} | {cost_display} | {latency} |\n"
494 | )
495 |
496 | comparison_content += "\n## Detailed Model Responses\n\n"
497 |
498 | for model_id, response_data in sorted(round_result.responses.items()):
499 | metrics = response_data.metrics
500 | display_model_id = model_id.split(":")[-1] if ":" in model_id else model_id
501 |
502 | comparison_content += f"### {display_model_id}\n\n"
503 |
504 | # Display detailed metrics as a subsection
505 | comparison_content += "#### Metrics\n\n"
506 | tokens_in = metrics.get("input_tokens", "N/A")
507 | tokens_out = metrics.get("output_tokens", "N/A")
508 | total_tokens = metrics.get("total_tokens", "N/A")
509 | cost = metrics.get("cost", "N/A")
510 | latency = metrics.get("latency_ms", "N/A")
511 |
512 | comparison_content += (
513 | f"- **Tokens:** {tokens_in} in, {tokens_out} out, {total_tokens} total\n"
514 | )
515 | if isinstance(cost, (int, float)):
516 | comparison_content += f"- **Cost:** ${cost:.6f}\n"
517 | else:
518 | comparison_content += f"- **Cost:** {cost}\n"
519 | comparison_content += f"- **Latency:** {latency}ms\n"
520 |
521 | # Code-specific metrics
522 | if is_code_tournament:
523 | code_lines = metrics.get("code_lines", "N/A")
524 | code_size = metrics.get("code_size_kb", "N/A")
525 | comparison_content += f"- **Code Stats:** {code_lines} lines, {code_size} KB\n"
526 |
527 | comparison_content += "\n"
528 |
529 | # Display thinking process if available
530 | if response_data.thinking_process:
531 | comparison_content += "#### Thinking Process\n\n"
532 | comparison_content += f"```\n{response_data.thinking_process}\n```\n\n"
533 |
534 | # Display response content
535 | if is_code_tournament:
536 | comparison_content += "#### Extracted Code\n\n"
537 | comparison_content += "```python\n"
538 | comparison_content += response_data.extracted_code or "# No code extracted"
539 | comparison_content += "\n```\n\n"
540 | else:
541 | # For text tournaments, display the raw response
542 | comparison_content += "#### Response Text\n\n"
543 | comparison_content += "```\n"
544 | comparison_content += response_data.response_text or "[No response text]"
545 | comparison_content += "\n```\n\n"
546 |
547 | # Add link to the full response file
548 | if response_data.response_file_path:
549 | comparison_content += (
550 | f"[View full response file]({response_data.response_file_path})\n\n"
551 | )
552 |
553 | # Add a section comparing changes from previous round if this isn't round 0
554 | if previous_round and previous_round.responses:
555 | comparison_content += "## Changes from Previous Round\n\n"
556 | for model_id, response_data in sorted(round_result.responses.items()):
557 | if model_id in previous_round.responses:
558 | display_model_id = model_id.split(":")[-1] if ":" in model_id else model_id
559 | comparison_content += f"### {display_model_id}\n\n"
560 |
561 | # Compare metrics
562 | current_metrics = response_data.metrics
563 | previous_metrics = previous_round.responses[model_id].metrics
564 |
565 | current_tokens_out = current_metrics.get("output_tokens", 0)
566 | previous_tokens_out = previous_metrics.get("output_tokens", 0)
567 | token_change = (
568 | current_tokens_out - previous_tokens_out
569 | if isinstance(current_tokens_out, (int, float))
570 | and isinstance(previous_tokens_out, (int, float))
571 | else "N/A"
572 | )
573 |
574 | comparison_content += f"- **Token Change:** {token_change} tokens\n"
575 |
576 | # Note: Here you could add more sophisticated text comparison/diff
577 | comparison_content += "- Review the full responses to see detailed changes\n\n"
578 |
579 | return comparison_content.strip()
580 |
581 |
582 | async def save_model_response(
583 | tournament: TournamentData,
584 | round_num: int,
585 | model_id: str,
586 | response_text: str,
587 | thinking: Optional[str] = None,
588 | timestamp: Optional[str] = None,
589 | ) -> str:
590 | """Save model response to a file using standardized filesystem tools.
591 |
592 | Args:
593 | tournament: Tournament data
594 | round_num: Round number
595 | model_id: Model ID that generated this response
596 | response_text: The text response to save
597 | thinking: Optional thinking process from the model
598 | timestamp: Optional timestamp (defaults to current time if not provided)
599 |
600 | Returns:
601 | Path to saved response file
602 | """
603 | if not timestamp:
604 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
605 |
606 | # Get path to tournament storage directory
607 | storage_dir = Path(tournament.storage_path)
608 | round_dir = storage_dir / f"round_{round_num}"
609 | round_dir.mkdir(exist_ok=True)
610 |
611 | # Create a safe filename from model ID
612 | safe_model_id = model_id.replace(":", "_").replace("/", "_")
613 | response_file = round_dir / f"{safe_model_id}_response.md"
614 |
615 | # Construct the markdown file with basic metadata header
616 | content = f"""# Response from {model_id}
617 |
618 | ## Metadata
619 | - Tournament: {tournament.name}
620 | - Round: {round_num}
621 | - Model: {model_id}
622 | - Timestamp: {timestamp}
623 |
624 | ## Response:
625 |
626 | {response_text}
627 | """
628 |
629 | # Add thinking process if available
630 | if thinking:
631 | content += f"\n\n## Thinking Process:\n\n{thinking}\n"
632 |
633 | # Use the standard filesystem write tool
634 | try:
635 | # Properly use the async write_file tool
636 | result = await write_file(path=str(response_file), content=content)
637 |
638 | if not result.get("success", False):
639 | logger.warning(f"Standard write_file tool reported failure: {result.get('error')}")
640 | # Fall back to direct write
641 | with open(response_file, "w", encoding="utf-8") as f:
642 | f.write(content)
643 | except Exception as e:
644 | logger.error(f"Error using standardized file writer: {e}. Using direct file write.")
645 | # Fall back to direct write in case of errors
646 | with open(response_file, "w", encoding="utf-8") as f:
647 | f.write(content)
648 |
649 | return str(response_file)
650 |
651 |
652 | def get_round_dir(tournament: TournamentData, round_num: int) -> Path:
653 | """Get the directory path for a specific tournament round.
654 |
655 | Args:
656 | tournament: The tournament data.
657 | round_num: The round number.
658 |
659 | Returns:
660 | Path to the round directory.
661 | """
662 | tournament_dir = Path(tournament.storage_path)
663 | round_dir = tournament_dir / f"round_{round_num}"
664 | return round_dir
665 |
666 |
667 | def get_word_count(text: str) -> int:
668 | """Get the word count of a text string.
669 |
670 | Args:
671 | text: The text to count words in.
672 |
673 | Returns:
674 | The number of words.
675 | """
676 | if not text:
677 | return 0
678 | return len(text.split())
679 |
680 |
681 | def generate_synthesis_prompt(
682 | tournament: TournamentData, previous_responses: Dict[str, str]
683 | ) -> str:
684 | """Generate the prompt for the synthesis round.
685 |
686 | Args:
687 | tournament: The tournament data
688 | previous_responses: A dictionary mapping model IDs to their responses
689 |
690 | Returns:
691 | The synthesis prompt for the next round.
692 | """
693 | # Letter used for referring to models to avoid bias
694 | letters = ["A", "B", "C", "D", "E", "F", "G", "H"]
695 |
696 | # Start with a base prompt instructing the model what to do
697 | prompt = f"""# {tournament.name} - Synthesis Round
698 |
699 | Your task is to create an improved version based on the responses from multiple models.
700 |
701 | Original task:
702 | {tournament.config.prompt}
703 |
704 | Below are responses from different models. Review them and create a superior response
705 | that combines the strengths of each model's approach while addressing any weaknesses.
706 |
707 | """
708 |
709 | # Add each model's response
710 | for i, (model_id, response) in enumerate(previous_responses.items()):
711 | if i < len(letters):
712 | letter = letters[i]
713 | model_name = model_id.split(":")[-1] if ":" in model_id else model_id
714 |
715 | prompt += f"""
716 | ## Model {letter} ({model_name}) Response:
717 |
718 | {response}
719 |
720 | """
721 |
722 | # Add synthesis instructions
723 | prompt += """
724 | # Your Task
725 |
726 | Based on the responses above:
727 |
728 | 1. Create a single, unified response that represents the best synthesis of the information
729 | 2. Incorporate the strengths of each model's approach
730 | 3. Improve upon any weaknesses or omissions
731 | 4. Your response should be more comprehensive, accurate, and well-structured than any individual response
732 |
733 | ## Thinking Process
734 | Start by briefly analyzing the strengths and weaknesses of each model's response, then explain your synthesis approach.
735 |
736 | Example: "I synthesized the structured approach of Model A with the comprehensive detail from Model B, ensuring..."
737 |
738 | """
739 |
740 | return prompt
741 |
```