This is page 12 of 45. Use http://codebase.md/dicklesworthstone/llm_gateway_mcp_server?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .cursorignore
├── .env.example
├── .envrc
├── .gitignore
├── additional_features.md
├── check_api_keys.py
├── completion_support.py
├── comprehensive_test.py
├── docker-compose.yml
├── Dockerfile
├── empirically_measured_model_speeds.json
├── error_handling.py
├── example_structured_tool.py
├── examples
│ ├── __init__.py
│ ├── advanced_agent_flows_using_unified_memory_system_demo.py
│ ├── advanced_extraction_demo.py
│ ├── advanced_unified_memory_system_demo.py
│ ├── advanced_vector_search_demo.py
│ ├── analytics_reporting_demo.py
│ ├── audio_transcription_demo.py
│ ├── basic_completion_demo.py
│ ├── cache_demo.py
│ ├── claude_integration_demo.py
│ ├── compare_synthesize_demo.py
│ ├── cost_optimization.py
│ ├── data
│ │ ├── sample_event.txt
│ │ ├── Steve_Jobs_Introducing_The_iPhone_compressed.md
│ │ └── Steve_Jobs_Introducing_The_iPhone_compressed.mp3
│ ├── docstring_refiner_demo.py
│ ├── document_conversion_and_processing_demo.py
│ ├── entity_relation_graph_demo.py
│ ├── filesystem_operations_demo.py
│ ├── grok_integration_demo.py
│ ├── local_text_tools_demo.py
│ ├── marqo_fused_search_demo.py
│ ├── measure_model_speeds.py
│ ├── meta_api_demo.py
│ ├── multi_provider_demo.py
│ ├── ollama_integration_demo.py
│ ├── prompt_templates_demo.py
│ ├── python_sandbox_demo.py
│ ├── rag_example.py
│ ├── research_workflow_demo.py
│ ├── sample
│ │ ├── article.txt
│ │ ├── backprop_paper.pdf
│ │ ├── buffett.pdf
│ │ ├── contract_link.txt
│ │ ├── legal_contract.txt
│ │ ├── medical_case.txt
│ │ ├── northwind.db
│ │ ├── research_paper.txt
│ │ ├── sample_data.json
│ │ └── text_classification_samples
│ │ ├── email_classification.txt
│ │ ├── news_samples.txt
│ │ ├── product_reviews.txt
│ │ └── support_tickets.txt
│ ├── sample_docs
│ │ └── downloaded
│ │ └── attention_is_all_you_need.pdf
│ ├── sentiment_analysis_demo.py
│ ├── simple_completion_demo.py
│ ├── single_shot_synthesis_demo.py
│ ├── smart_browser_demo.py
│ ├── sql_database_demo.py
│ ├── sse_client_demo.py
│ ├── test_code_extraction.py
│ ├── test_content_detection.py
│ ├── test_ollama.py
│ ├── text_classification_demo.py
│ ├── text_redline_demo.py
│ ├── tool_composition_examples.py
│ ├── tournament_code_demo.py
│ ├── tournament_text_demo.py
│ ├── unified_memory_system_demo.py
│ ├── vector_search_demo.py
│ ├── web_automation_instruction_packs.py
│ └── workflow_delegation_demo.py
├── LICENSE
├── list_models.py
├── marqo_index_config.json.example
├── mcp_protocol_schema_2025-03-25_version.json
├── mcp_python_lib_docs.md
├── mcp_tool_context_estimator.py
├── model_preferences.py
├── pyproject.toml
├── quick_test.py
├── README.md
├── resource_annotations.py
├── run_all_demo_scripts_and_check_for_errors.py
├── storage
│ └── smart_browser_internal
│ ├── locator_cache.db
│ ├── readability.js
│ └── storage_state.enc
├── test_client.py
├── test_connection.py
├── TEST_README.md
├── test_sse_client.py
├── test_stdio_client.py
├── tests
│ ├── __init__.py
│ ├── conftest.py
│ ├── integration
│ │ ├── __init__.py
│ │ └── test_server.py
│ ├── manual
│ │ ├── test_extraction_advanced.py
│ │ └── test_extraction.py
│ └── unit
│ ├── __init__.py
│ ├── test_cache.py
│ ├── test_providers.py
│ └── test_tools.py
├── TODO.md
├── tool_annotations.py
├── tools_list.json
├── ultimate_mcp_banner.webp
├── ultimate_mcp_logo.webp
├── ultimate_mcp_server
│ ├── __init__.py
│ ├── __main__.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── commands.py
│ │ ├── helpers.py
│ │ └── typer_cli.py
│ ├── clients
│ │ ├── __init__.py
│ │ ├── completion_client.py
│ │ └── rag_client.py
│ ├── config
│ │ └── examples
│ │ └── filesystem_config.yaml
│ ├── config.py
│ ├── constants.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── evaluation
│ │ │ ├── base.py
│ │ │ └── evaluators.py
│ │ ├── providers
│ │ │ ├── __init__.py
│ │ │ ├── anthropic.py
│ │ │ ├── base.py
│ │ │ ├── deepseek.py
│ │ │ ├── gemini.py
│ │ │ ├── grok.py
│ │ │ ├── ollama.py
│ │ │ ├── openai.py
│ │ │ └── openrouter.py
│ │ ├── server.py
│ │ ├── state_store.py
│ │ ├── tournaments
│ │ │ ├── manager.py
│ │ │ ├── tasks.py
│ │ │ └── utils.py
│ │ └── ums_api
│ │ ├── __init__.py
│ │ ├── ums_database.py
│ │ ├── ums_endpoints.py
│ │ ├── ums_models.py
│ │ └── ums_services.py
│ ├── exceptions.py
│ ├── graceful_shutdown.py
│ ├── services
│ │ ├── __init__.py
│ │ ├── analytics
│ │ │ ├── __init__.py
│ │ │ ├── metrics.py
│ │ │ └── reporting.py
│ │ ├── cache
│ │ │ ├── __init__.py
│ │ │ ├── cache_service.py
│ │ │ ├── persistence.py
│ │ │ ├── strategies.py
│ │ │ └── utils.py
│ │ ├── cache.py
│ │ ├── document.py
│ │ ├── knowledge_base
│ │ │ ├── __init__.py
│ │ │ ├── feedback.py
│ │ │ ├── manager.py
│ │ │ ├── rag_engine.py
│ │ │ ├── retriever.py
│ │ │ └── utils.py
│ │ ├── prompts
│ │ │ ├── __init__.py
│ │ │ ├── repository.py
│ │ │ └── templates.py
│ │ ├── prompts.py
│ │ └── vector
│ │ ├── __init__.py
│ │ ├── embeddings.py
│ │ └── vector_service.py
│ ├── tool_token_counter.py
│ ├── tools
│ │ ├── __init__.py
│ │ ├── audio_transcription.py
│ │ ├── base.py
│ │ ├── completion.py
│ │ ├── docstring_refiner.py
│ │ ├── document_conversion_and_processing.py
│ │ ├── enhanced-ums-lookbook.html
│ │ ├── entity_relation_graph.py
│ │ ├── excel_spreadsheet_automation.py
│ │ ├── extraction.py
│ │ ├── filesystem.py
│ │ ├── html_to_markdown.py
│ │ ├── local_text_tools.py
│ │ ├── marqo_fused_search.py
│ │ ├── meta_api_tool.py
│ │ ├── ocr_tools.py
│ │ ├── optimization.py
│ │ ├── provider.py
│ │ ├── pyodide_boot_template.html
│ │ ├── python_sandbox.py
│ │ ├── rag.py
│ │ ├── redline-compiled.css
│ │ ├── sentiment_analysis.py
│ │ ├── single_shot_synthesis.py
│ │ ├── smart_browser.py
│ │ ├── sql_databases.py
│ │ ├── text_classification.py
│ │ ├── text_redline_tools.py
│ │ ├── tournament.py
│ │ ├── ums_explorer.html
│ │ └── unified_memory_system.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── async_utils.py
│ │ ├── display.py
│ │ ├── logging
│ │ │ ├── __init__.py
│ │ │ ├── console.py
│ │ │ ├── emojis.py
│ │ │ ├── formatter.py
│ │ │ ├── logger.py
│ │ │ ├── panels.py
│ │ │ ├── progress.py
│ │ │ └── themes.py
│ │ ├── parse_yaml.py
│ │ ├── parsing.py
│ │ ├── security.py
│ │ └── text.py
│ └── working_memory_api.py
├── unified_memory_system_technical_analysis.md
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/ultimate_mcp_server/tools/html_to_markdown.py:
--------------------------------------------------------------------------------
```python
1 | """HTML to Markdown conversion tools for Ultimate MCP Server."""
2 | import re
3 | import time
4 | from typing import Any, Dict, List
5 |
6 | import html2text
7 | import readability
8 | import trafilatura
9 | from bs4 import BeautifulSoup
10 | from markdownify import markdownify as md
11 |
12 | from ultimate_mcp_server.exceptions import ToolInputError
13 | from ultimate_mcp_server.tools.base import with_error_handling, with_tool_metrics
14 | from ultimate_mcp_server.utils import get_logger
15 |
16 | logger = get_logger("ultimate_mcp_server.tools.html_to_markdown")
17 |
18 | # --- Helper Functions ---
19 |
20 | def _is_html_fragment(text: str) -> bool:
21 | """Detect if text is likely an HTML fragment.
22 |
23 | Args:
24 | text: Input text to check
25 |
26 | Returns:
27 | bool: True if the text appears to be HTML, False otherwise
28 | """
29 | # Simple heuristics to check if the text contains HTML
30 | html_patterns = [
31 | r"<\s*[a-zA-Z]+[^>]*>", # Basic HTML tag pattern
32 | r"<\s*/\s*[a-zA-Z]+\s*>", # Closing HTML tag
33 | r"&[a-zA-Z]+;", # HTML entities
34 | r"<!\s*DOCTYPE", # DOCTYPE declaration
35 | r"<!\s*--", # HTML comment
36 | r"style\s*=\s*['\"]", # style attribute
37 | r"class\s*=\s*['\"]", # class attribute
38 | r"id\s*=\s*['\"]", # id attribute
39 | r"href\s*=\s*['\"]", # href attribute
40 | r"src\s*=\s*['\"]", # src attribute
41 | ]
42 |
43 | # Check if the text matches any of the patterns
44 | for pattern in html_patterns:
45 | if re.search(pattern, text, re.IGNORECASE):
46 | return True
47 |
48 | return False
49 |
50 | def _clean_html_with_beautifulsoup(html: str) -> str:
51 | """Clean HTML using BeautifulSoup.
52 |
53 | Args:
54 | html: HTML content to clean
55 |
56 | Returns:
57 | Cleaned HTML string with unwanted elements removed
58 | """
59 | try:
60 | soup = BeautifulSoup(html, 'html.parser')
61 |
62 | # Remove unwanted elements
63 | for element in soup(['script', 'style', 'svg', 'iframe', 'canvas', 'noscript']):
64 | element.decompose()
65 |
66 | # Remove base64 data attributes and other potentially problematic attributes
67 | for tag in soup.find_all(True):
68 | for attr in list(tag.attrs):
69 | # Clean data URLs
70 | if attr == 'src' and isinstance(tag.attrs[attr], str) and 'data:' in tag.attrs[attr]:
71 | del tag.attrs[attr]
72 | # Remove other problematic attributes
73 | elif attr.startswith('on') or attr == 'style' or attr.startswith('data-'):
74 | del tag.attrs[attr]
75 |
76 | return str(soup)
77 | except Exception as e:
78 | logger.warning(f"Error cleaning HTML with BeautifulSoup: {str(e)}")
79 | # If BeautifulSoup fails, return the original HTML
80 | return html
81 |
82 | def _html_to_markdown_with_html2text(html: str) -> str:
83 | """Convert HTML to Markdown using html2text.
84 |
85 | Args:
86 | html: HTML content to convert
87 |
88 | Returns:
89 | Markdown formatted text
90 | """
91 | try:
92 | h = html2text.HTML2Text()
93 | h.ignore_links = False
94 | h.ignore_images = False
95 | h.ignore_tables = False
96 | h.unicode_snob = True # Use Unicode instead of ASCII
97 | h.body_width = 0 # No wrapping
98 |
99 | return h.handle(html)
100 | except Exception as e:
101 | logger.warning(f"Error converting HTML to Markdown with html2text: {str(e)}")
102 | # If html2text fails, try a simpler approach
103 | return html
104 |
105 | def _html_to_markdown_with_markdownify(html: str) -> str:
106 | """Convert HTML to Markdown using markdownify.
107 |
108 | Args:
109 | html: HTML content to convert
110 |
111 | Returns:
112 | Markdown formatted text
113 | """
114 | try:
115 | return md(html, heading_style="ATX")
116 | except Exception as e:
117 | logger.warning(f"Error converting HTML to Markdown with markdownify: {str(e)}")
118 | # If markdownify fails, try a simpler approach
119 | return html
120 |
121 | def _extract_content_with_readability(html: str) -> str:
122 | """Extract main content from HTML using readability.
123 |
124 | Args:
125 | html: HTML content to process
126 |
127 | Returns:
128 | HTML string containing only the main content
129 | """
130 | try:
131 | doc = readability.Document(html)
132 | content = doc.summary()
133 | return content
134 | except Exception as e:
135 | logger.warning(f"Error extracting content with readability: {str(e)}")
136 | # If readability fails, return the original HTML
137 | return html
138 |
139 | def _extract_content_with_trafilatura(html: str) -> str:
140 | """Extract main content from HTML using trafilatura.
141 |
142 | Args:
143 | html: HTML content to process
144 |
145 | Returns:
146 | Extracted text content
147 | """
148 | try:
149 | extracted_text = trafilatura.extract(html, include_comments=False, include_tables=True)
150 | if extracted_text:
151 | return extracted_text
152 | # Fall back to HTML extraction if text extraction fails
153 | extracted_html = trafilatura.extract(html, output_format='html', include_comments=False, include_tables=True)
154 | return extracted_html or html
155 | except Exception as e:
156 | logger.warning(f"Error extracting content with trafilatura: {str(e)}")
157 | # If trafilatura fails, return the original HTML
158 | return html
159 |
160 | def _sanitize_markdown(markdown: str) -> str:
161 | """Clean up and format the markdown to be more readable.
162 |
163 | Args:
164 | markdown: Markdown text to sanitize
165 |
166 | Returns:
167 | Cleaned markdown text
168 | """
169 | # Fix excessive newlines (more than 2 consecutive)
170 | sanitized = re.sub(r'\n{3,}', '\n\n', markdown)
171 |
172 | # Fix list item spacing
173 | sanitized = re.sub(r'(\n[*-].*\n)(?!\n)', r'\1\n', sanitized)
174 |
175 | # Remove trailing whitespace from lines
176 | sanitized = re.sub(r' +$', '', sanitized, flags=re.MULTILINE)
177 |
178 | # Fix markdown heading formatting (ensure space after #)
179 | sanitized = re.sub(r'(^|\n)(#{1,6})([^#\s])', r'\1\2 \3', sanitized)
180 |
181 | # Fix code block formatting
182 | sanitized = re.sub(r'```\s*\n', '```\n', sanitized)
183 | sanitized = re.sub(r'\n\s*```', '\n```', sanitized)
184 |
185 | # Ensure proper code block syntax (start with language or leave empty)
186 | sanitized = re.sub(r'```([^a-zA-Z\s\n][^`\n]*)$', '```\n\\1', sanitized, flags=re.MULTILINE)
187 |
188 | # Normalize list indicators (consistent use of - or * for unordered lists)
189 | sanitized = re.sub(r'^[*+] ', '- ', sanitized, flags=re.MULTILINE)
190 |
191 | return sanitized
192 |
193 | def _improve_markdown_formatting(markdown: str) -> str:
194 | """Improve the formatting of the markdown to make it more readable.
195 |
196 | Args:
197 | markdown: Markdown text to improve
198 |
199 | Returns:
200 | Improved markdown text
201 | """
202 | # Ensure proper spacing for headings
203 | improved = re.sub(r'(\n#{1,6}[^\n]+)(\n[^\n#])', r'\1\n\2', markdown)
204 |
205 | # Ensure paragraphs have proper spacing
206 | improved = re.sub(r'(\n[^\s#>*-][^\n]+)(\n[^\s#>*-])', r'\1\n\2', improved)
207 |
208 | # Fix blockquote formatting
209 | improved = re.sub(r'(\n>[ ][^\n]+)(\n[^>\s])', r'\1\n\2', improved)
210 |
211 | # Fix nested list formatting
212 | improved = re.sub(r'(\n[ ]{2,}[*-][ ][^\n]+)(\n[^\s*-])', r'\1\n\2', improved)
213 |
214 | # Add horizontal rules for clear section breaks (if large content gaps exist)
215 | improved = re.sub(r'\n\n\n\n+', '\n\n---\n\n', improved)
216 |
217 | return improved
218 |
219 | def _convert_html_tables_to_markdown(html: str) -> str:
220 | """Specifically handle HTML tables and convert them to markdown tables.
221 |
222 | Args:
223 | html: HTML content with tables to convert
224 |
225 | Returns:
226 | Markdown text with properly formatted tables
227 | """
228 | try:
229 | soup = BeautifulSoup(html, 'html.parser')
230 | tables = soup.find_all('table')
231 |
232 | # If no tables, return original HTML
233 | if not tables:
234 | return html
235 |
236 | for table in tables:
237 | rows = table.find_all('tr')
238 | if not rows:
239 | continue
240 |
241 | markdown_table = []
242 |
243 | # Process header row
244 | header_cells = rows[0].find_all(['th', 'td'])
245 | if header_cells:
246 | header_row = '| ' + ' | '.join([cell.get_text().strip() for cell in header_cells]) + ' |'
247 | markdown_table.append(header_row)
248 |
249 | # Add separator row
250 | separator_row = '| ' + ' | '.join(['---' for _ in header_cells]) + ' |'
251 | markdown_table.append(separator_row)
252 |
253 | # Process data rows
254 | for row in rows[1:]:
255 | cells = row.find_all('td')
256 | if cells:
257 | data_row = '| ' + ' | '.join([cell.get_text().strip() for cell in cells]) + ' |'
258 | markdown_table.append(data_row)
259 |
260 | # Replace the table with its markdown equivalent
261 | table_html = str(table)
262 | table_markdown = '\n'.join(markdown_table)
263 | html = html.replace(table_html, table_markdown)
264 |
265 | return html
266 |
267 | except Exception as e:
268 | logger.warning(f"Error converting HTML tables to Markdown: {str(e)}")
269 | # If conversion fails, return the original HTML
270 | return html
271 |
272 | # --- Main Tool Function ---
273 |
274 | @with_tool_metrics
275 | @with_error_handling
276 | async def clean_and_format_text_as_markdown(
277 | text: str,
278 | force_markdown_conversion: bool = False,
279 | extraction_method: str = "auto",
280 | preserve_tables: bool = True,
281 | preserve_links: bool = True,
282 | preserve_images: bool = False,
283 | max_line_length: int = 0 # 0 means no wrapping
284 | ) -> Dict[str, Any]:
285 | """Converts plain text or HTML to clean, well-formatted markdown.
286 |
287 | Automatically detects if input is HTML, then cleans and converts it.
288 | For non-HTML text, it applies minimal formatting to create valid markdown.
289 |
290 | Args:
291 | text: The input text to clean and format (plain text or HTML).
292 | force_markdown_conversion: Whether to force markdown conversion even if the text doesn't
293 | look like HTML. Default is False.
294 | extraction_method: Method to extract content from HTML. Options:
295 | - "auto": Automatically choose the best method
296 | - "readability": Use Mozilla's Readability algorithm
297 | - "trafilatura": Use trafilatura library
298 | - "raw": Don't extract main content, convert the whole document
299 | Default is "auto".
300 | preserve_tables: Whether to preserve and convert HTML tables to markdown tables.
301 | Default is True.
302 | preserve_links: Whether to preserve and convert HTML links to markdown links.
303 | Default is True.
304 | preserve_images: Whether to preserve and convert HTML images to markdown image syntax.
305 | Default is False.
306 | max_line_length: Maximum line length for text wrapping. 0 means no wrapping.
307 | Default is 0.
308 |
309 | Returns:
310 | Dictionary containing:
311 | {
312 | "markdown_text": "Cleaned and formatted markdown text",
313 | "was_html": true, # Whether the input was detected as HTML
314 | "extraction_method_used": "readability", # Which extraction method was used
315 | "processing_time": 0.35, # Time taken in seconds
316 | "success": true
317 | }
318 |
319 | Raises:
320 | ToolInputError: If the input text is empty or not a string.
321 | """
322 | start_time = time.time()
323 |
324 | # Input validation
325 | if not text:
326 | raise ToolInputError("Input text cannot be empty")
327 | if not isinstance(text, str):
328 | raise ToolInputError("Input text must be a string")
329 |
330 | # Determine if input is HTML
331 | is_html = _is_html_fragment(text) or force_markdown_conversion
332 |
333 | # Process based on content type
334 | if is_html:
335 | logger.info("Input detected as HTML, processing for conversion to markdown")
336 |
337 | # Convert HTML tables to markdown before main processing
338 | if preserve_tables:
339 | text = _convert_html_tables_to_markdown(text)
340 |
341 | # Extract main content based on specified method
342 | extraction_method_used = extraction_method
343 | if extraction_method == "auto":
344 | # If the text is a small fragment, use raw conversion
345 | if len(text) < 1000:
346 | extraction_method_used = "raw"
347 | else:
348 | # Try trafilatura first, fallback to readability
349 | try:
350 | extracted = _extract_content_with_trafilatura(text)
351 | if extracted and len(extracted) > 0.2 * len(text): # Ensure we got meaningful extraction
352 | text = extracted
353 | extraction_method_used = "trafilatura"
354 | else:
355 | text = _extract_content_with_readability(text)
356 | extraction_method_used = "readability"
357 | except Exception:
358 | text = _extract_content_with_readability(text)
359 | extraction_method_used = "readability"
360 | elif extraction_method == "readability":
361 | text = _extract_content_with_readability(text)
362 | elif extraction_method == "trafilatura":
363 | text = _extract_content_with_trafilatura(text)
364 | # For "raw", we use the text as is
365 |
366 | # Clean HTML before conversion
367 | text = _clean_html_with_beautifulsoup(text)
368 |
369 | # Set up conversion options based on parameters
370 | h = html2text.HTML2Text()
371 | h.ignore_links = not preserve_links
372 | h.ignore_images = not preserve_images
373 | h.ignore_tables = not preserve_tables
374 | h.body_width = max_line_length
375 | h.unicode_snob = True
376 |
377 | # Try multiple conversion methods and use the best result
378 | try:
379 | markdown_text = h.handle(text)
380 |
381 | # Fallback to markdownify if html2text result looks problematic
382 | if '<' in markdown_text or '>' in markdown_text or len(markdown_text.strip()) < 100 and len(text) > 500:
383 | try:
384 | alternative = _html_to_markdown_with_markdownify(text)
385 | if len(alternative.strip()) > len(markdown_text.strip()):
386 | markdown_text = alternative
387 | except Exception:
388 | pass
389 | except Exception as e:
390 | logger.warning(f"Primary markdown conversion failed: {str(e)}")
391 | try:
392 | markdown_text = _html_to_markdown_with_markdownify(text)
393 | except Exception:
394 | # Last resort: strip tags and return plain text
395 | markdown_text = re.sub(r'<[^>]*>', '', text)
396 | else:
397 | logger.info("Input detected as plain text, applying minimal markdown formatting")
398 | # For plain text, just clean it up a bit
399 | markdown_text = text
400 | extraction_method_used = "none"
401 |
402 | # Final cleanup and formatting of the markdown
403 | markdown_text = _sanitize_markdown(markdown_text)
404 | markdown_text = _improve_markdown_formatting(markdown_text)
405 |
406 | processing_time = time.time() - start_time
407 | logger.info(f"Text cleaned and formatted as markdown in {processing_time:.2f}s")
408 |
409 | return {
410 | "markdown_text": markdown_text,
411 | "was_html": is_html,
412 | "extraction_method_used": extraction_method_used,
413 | "processing_time": processing_time,
414 | "success": True
415 | }
416 |
417 | # --- Additional Tool Functions ---
418 |
419 | @with_tool_metrics
420 | @with_error_handling
421 | async def detect_content_type(text: str) -> Dict[str, Any]:
422 | """Analyzes text to detect its type: HTML, markdown, code, or plain text.
423 |
424 | Applies multiple heuristics to determine the most likely content type
425 | of the provided text string.
426 |
427 | Args:
428 | text: The input text to analyze
429 |
430 | Returns:
431 | Dictionary containing:
432 | {
433 | "content_type": "html", # One of: "html", "markdown", "code", "plain_text"
434 | "confidence": 0.85, # Confidence score (0.0-1.0)
435 | "details": {
436 | "html_markers": 12, # Count of HTML markers found
437 | "markdown_markers": 3, # Count of markdown markers found
438 | "code_markers": 1, # Count of code markers found
439 | "detected_language": "javascript" # If code is detected
440 | },
441 | "success": true
442 | }
443 |
444 | Raises:
445 | ToolInputError: If the input text is empty or not a string.
446 | """
447 | if not text:
448 | raise ToolInputError("Input text cannot be empty")
449 | if not isinstance(text, str):
450 | raise ToolInputError("Input text must be a string")
451 |
452 | # Initialize counters for markers
453 | html_markers = 0
454 | markdown_markers = 0
455 | code_markers = 0
456 | detected_language = None
457 |
458 | # Check for HTML markers
459 | html_patterns = [
460 | (r"<\s*[a-zA-Z]+[^>]*>", 1), # HTML tag
461 | (r"<\s*/\s*[a-zA-Z]+\s*>", 1), # Closing HTML tag
462 | (r"&[a-zA-Z]+;", 0.5), # HTML entity
463 | (r"<!\s*DOCTYPE", 2), # DOCTYPE
464 | (r"<!\s*--", 1), # HTML comment
465 | (r"<!--.*?-->", 1), # Complete HTML comment
466 | (r"<(div|span|p|a|img|table|ul|ol|li|h[1-6])\b", 1.5), # Common HTML tags
467 | (r"</(div|span|p|a|img|table|ul|ol|li|h[1-6])>", 1.5), # Common closing tags
468 | (r"<(html|head|body|meta|link|script|style)\b", 2), # Structure tags
469 | (r"</(html|head|body|script|style)>", 2), # Structure closing tags
470 | (r"style\s*=\s*['\"]", 1), # style attribute
471 | (r"class\s*=\s*['\"]", 1), # class attribute
472 | (r"id\s*=\s*['\"]", 1), # id attribute
473 | (r"href\s*=\s*['\"]", 1), # href attribute
474 | (r"src\s*=\s*['\"]", 1) # src attribute
475 | ]
476 |
477 | for pattern, weight in html_patterns:
478 | matches = re.findall(pattern, text, re.IGNORECASE)
479 | html_markers += len(matches) * weight
480 |
481 | # Check for Markdown markers
482 | markdown_patterns = [
483 | (r"^#\s+.+$", 2), # Heading level 1
484 | (r"^#{2,6}\s+.+$", 1.5), # Headings levels 2-6
485 | (r"^\s*[*-]\s+.+$", 1), # Unordered list
486 | (r"^\s*\d+\.\s+.+$", 1), # Ordered list
487 | (r"^\s*>\s+.+$", 1.5), # Blockquote
488 | (r"\[.+?\]\(.+?\)", 2), # Link
489 | (r"!\[.+?\]\(.+?\)", 2), # Image
490 | (r"`[^`\n]+`", 1), # Inline code
491 | (r"^```\s*\w*$", 2), # Code block start
492 | (r"^```$", 2), # Code block end
493 | (r"\*\*.+?\*\*", 1), # Bold
494 | (r"\*.+?\*", 0.5), # Italic
495 | (r"__(.+?)__", 1), # Bold with underscore
496 | (r"_(.+?)_", 0.5), # Italic with underscore
497 | (r"~~.+?~~", 1), # Strikethrough
498 | (r"^\s*[-*_]{3,}\s*$", 1.5), # Horizontal rule
499 | (r"^\s*\|(.+\|)+\s*$", 2), # Table row
500 | (r"^\s*\|([-:]+\|)+\s*$", 3) # Table header/divider
501 | ]
502 |
503 | for pattern, weight in markdown_patterns:
504 | matches = re.findall(pattern, text, re.MULTILINE)
505 | markdown_markers += len(matches) * weight
506 |
507 | # Check for code markers
508 | code_patterns = [
509 | (r"function\s+\w+\s*\(.*?\)\s*\{", 2), # Function declaration
510 | (r"(var|let|const)\s+\w+\s*=", 1.5), # Variable declaration JS
511 | (r"if\s*\(.*?\)\s*\{", 1), # If statement
512 | (r"for\s*\(.*?;.*?;.*?\)\s*\{", 2), # For loop
513 | (r"while\s*\(.*?\)\s*\{", 2), # While loop
514 | (r"class\s+\w+(\s+extends\s+\w+)?\s*\{", 2), # Class declaration
515 | (r"import\s+.*?from\s+['\"].*?['\"]", 2), # ES6 Import
516 | (r"def\s+\w+\s*\(.*?\):", 2), # Python function
517 | (r"class\s+\w+(\(\w+\))?:", 2), # Python class
518 | (r"import\s+\w+(\s+as\s+\w+)?", 1.5), # Python import
519 | (r"from\s+\w+(\.\w+)*\s+import", 1.5), # Python from import
520 | (r"public\s+(static\s+)?(void|int|String)\s+\w+\s*\(", 2), # Java method
521 | (r"#include\s*<.*?>", 2), # C/C++ include
522 | (r"^\s*package\s+[\w\.]+;", 2), # Java/Kotlin package
523 | (r"^\s*using\s+[\w\.]+;", 2), # C# using
524 | (r"^\s*(public|private|protected)\s+class", 2) # Access modifier
525 | ]
526 |
527 | for pattern, weight in code_patterns:
528 | matches = re.findall(pattern, text, re.MULTILINE)
529 | code_markers += len(matches) * weight
530 |
531 | # Detect programming language if it looks like code
532 | if code_markers > 5:
533 | # Very basic language detection based on unique syntax
534 | language_patterns = [
535 | (r"function\s+\w+|var\s+\w+|let\s+\w+|const\s+\w+|document\.|\$\(", "javascript"),
536 | (r"<\?php|\$[a-zA-Z_]", "php"),
537 | (r"def\s+\w+\s*\(.*?\):|import\s+\w+|from\s+\w+\s+import", "python"),
538 | (r"public\s+class\s+\w+|public\s+static\s+void\s+main", "java"),
539 | (r"#include\s*<.*?>|int\s+main\s*\(", "c/c++"),
540 | (r"^\s*using\s+System;|namespace\s+\w+|public\s+class\s+\w+\s*:", "c#"),
541 | (r"module\s+\w+|fn\s+\w+|let\s+\w+|impl", "rust"),
542 | (r"^\s*import\s+\w+\s+from\s+['\"]|export\s+(default\s+)?", "typescript"),
543 | (r"^package\s+main|func\s+\w+\(|import\s+\([^)]*\)", "go")
544 | ]
545 |
546 | for pattern, lang in language_patterns:
547 | if re.search(pattern, text, re.MULTILINE | re.IGNORECASE):
548 | detected_language = lang
549 | break
550 |
551 | # Calculate final scores and confidence
552 | html_score = html_markers / max(len(text) / 100, 1)
553 | markdown_score = markdown_markers / max(len(text.split('\n')), 1)
554 | code_score = code_markers / max(len(text.split('\n')), 1)
555 |
556 | # Plain text has no specific markers, so it's the default fallback
557 | plain_text_score = 1.0 - max(min(html_score / 10, 1), min(markdown_score / 5, 1), min(code_score / 5, 1))
558 |
559 | # Determine the content type
560 | scores = {
561 | "html": html_score,
562 | "markdown": markdown_score,
563 | "code": code_score,
564 | "plain_text": plain_text_score
565 | }
566 |
567 | content_type = max(scores, key=scores.get)
568 | max_score = scores[content_type]
569 |
570 | # Calculate confidence based on how dominant the max score is
571 | total_score = sum(scores.values())
572 | if total_score > 0:
573 | confidence = max_score / total_score
574 | else:
575 | confidence = 0.25 # Equal probability for all types
576 |
577 | # Adjust confidence if very few markers were found
578 | if content_type != "plain_text" and (html_markers + markdown_markers + code_markers) < 3:
579 | confidence *= 0.7
580 |
581 | return {
582 | "content_type": content_type,
583 | "confidence": min(confidence, 1.0),
584 | "details": {
585 | "html_markers": html_markers,
586 | "markdown_markers": markdown_markers,
587 | "code_markers": code_markers,
588 | "detected_language": detected_language if content_type == "code" else None
589 | },
590 | "success": True
591 | }
592 |
593 | @with_tool_metrics
594 | @with_error_handling
595 | async def batch_format_texts(
596 | texts: List[str],
597 | force_markdown_conversion: bool = False,
598 | extraction_method: str = "auto",
599 | max_concurrency: int = 5,
600 | preserve_tables: bool = True
601 | ) -> Dict[str, Any]:
602 | """Processes multiple text inputs in parallel, converting each to markdown.
603 |
604 | Efficiently handles a batch of text inputs by processing them concurrently
605 | up to a specified concurrency limit.
606 |
607 | Args:
608 | texts: List of text strings to clean and format.
609 | force_markdown_conversion: Whether to force markdown conversion for all inputs.
610 | Default is False.
611 | extraction_method: Method to extract content from HTML. Options:
612 | - "auto": Automatically choose the best method
613 | - "readability": Use Mozilla's Readability algorithm
614 | - "trafilatura": Use trafilatura library
615 | - "raw": Don't extract main content, convert the whole document
616 | Default is "auto".
617 | max_concurrency: Maximum number of texts to process simultaneously.
618 | Default is 5.
619 | preserve_tables: Whether to preserve and convert HTML tables to markdown tables.
620 | Default is True.
621 |
622 | Returns:
623 | Dictionary containing:
624 | {
625 | "results": [
626 | {
627 | "markdown_text": "Cleaned and formatted markdown text",
628 | "was_html": true,
629 | "extraction_method_used": "readability"
630 | },
631 | ...
632 | ],
633 | "total_processing_time": 2.45, # Total time in seconds
634 | "success_count": 5, # Number of successfully processed texts
635 | "failure_count": 0, # Number of failed texts
636 | "success": true
637 | }
638 |
639 | Raises:
640 | ToolInputError: If the input list is empty or not a list of strings.
641 | """
642 | import asyncio
643 |
644 | start_time = time.time()
645 |
646 | # Input validation
647 | if not texts:
648 | raise ToolInputError("Input texts list cannot be empty")
649 | if not isinstance(texts, list):
650 | raise ToolInputError("Input must be a list of text strings")
651 |
652 | # Set up concurrency control
653 | semaphore = asyncio.Semaphore(max_concurrency)
654 |
655 | async def process_text(text, index):
656 | """Process a single text with semaphore control."""
657 | async with semaphore:
658 | try:
659 | result = await clean_and_format_text_as_markdown(
660 | text=text,
661 | force_markdown_conversion=force_markdown_conversion,
662 | extraction_method=extraction_method,
663 | preserve_tables=preserve_tables
664 | )
665 | result["index"] = index # Add original index for ordering
666 | return result
667 | except Exception as e:
668 | logger.error(f"Error processing text at index {index}: {str(e)}")
669 | return {
670 | "index": index,
671 | "error": str(e),
672 | "success": False
673 | }
674 |
675 | # Process all texts concurrently
676 | tasks = [process_text(text, i) for i, text in enumerate(texts)]
677 | results = await asyncio.gather(*tasks)
678 |
679 | # Sort results by original index
680 | sorted_results = sorted(results, key=lambda x: x.get("index", 0))
681 |
682 | # Remove index from results
683 | for result in sorted_results:
684 | if "index" in result:
685 | del result["index"]
686 |
687 | # Calculate statistics
688 | success_count = sum(1 for result in sorted_results if result.get("success", False))
689 | failure_count = len(sorted_results) - success_count
690 | total_time = time.time() - start_time
691 |
692 | return {
693 | "results": sorted_results,
694 | "total_processing_time": total_time,
695 | "success_count": success_count,
696 | "failure_count": failure_count,
697 | "success": True
698 | }
699 |
700 | @with_tool_metrics
701 | @with_error_handling
702 | async def optimize_markdown_formatting(
703 | markdown: str,
704 | normalize_headings: bool = False,
705 | fix_lists: bool = True,
706 | fix_links: bool = True,
707 | add_line_breaks: bool = True,
708 | compact_mode: bool = False,
709 | max_line_length: int = 0
710 | ) -> Dict[str, Any]:
711 | """Optimizes and improves the formatting of existing markdown text.
712 |
713 | Takes markdown text and enhances its formatting by fixing common issues
714 | and applying stylistic improvements.
715 |
716 | Args:
717 | markdown: The markdown text to optimize.
718 | normalize_headings: If True, ensures heading levels start at h1 and are sequential.
719 | Default is False.
720 | fix_lists: If True, fixes common issues with list formatting.
721 | Default is True.
722 | fix_links: If True, fixes common issues with link formatting.
723 | Default is True.
724 | add_line_breaks: If True, ensures proper paragraph breaks.
725 | Default is True.
726 | compact_mode: If True, reduces whitespace for a more compact presentation.
727 | Default is False.
728 | max_line_length: Maximum line length for wrapping. 0 means no wrapping.
729 | Default is 0.
730 |
731 | Returns:
732 | Dictionary containing:
733 | {
734 | "optimized_markdown": "Cleaned and formatted markdown text",
735 | "changes_made": {
736 | "headings_normalized": true,
737 | "lists_fixed": true,
738 | "links_fixed": true,
739 | "line_breaks_added": true
740 | },
741 | "processing_time": 0.15, # Time taken in seconds
742 | "success": true
743 | }
744 |
745 | Raises:
746 | ToolInputError: If the input markdown is empty or not a string.
747 | """
748 | import re
749 |
750 | start_time = time.time()
751 |
752 | # Input validation
753 | if not markdown:
754 | raise ToolInputError("Input markdown cannot be empty")
755 | if not isinstance(markdown, str):
756 | raise ToolInputError("Input markdown must be a string")
757 |
758 | # Track changes made
759 | changes_made = {
760 | "headings_normalized": False,
761 | "lists_fixed": False,
762 | "links_fixed": False,
763 | "line_breaks_added": False,
764 | "whitespace_adjusted": False
765 | }
766 |
767 | optimized = markdown
768 |
769 | # Fix markdown heading formatting (ensure space after #)
770 | if "#" in optimized:
771 | original = optimized
772 | optimized = re.sub(r'(^|\n)(#{1,6})([^#\s])', r'\1\2 \3', optimized)
773 | changes_made["headings_normalized"] = original != optimized
774 |
775 | # Normalize heading levels if requested
776 | if normalize_headings and "#" in optimized:
777 | original = optimized
778 |
779 | # Find all headings and their levels
780 | heading_pattern = r'(^|\n)(#{1,6})\s+(.*?)(\n|$)'
781 | headings = [(m.group(2), m.group(3), m.start(), m.end())
782 | for m in re.finditer(heading_pattern, optimized)]
783 |
784 | if headings:
785 | # Find the minimum heading level used
786 | min_level = min(len(h[0]) for h in headings)
787 |
788 | # Adjust heading levels if the minimum isn't h1
789 | if min_level > 1:
790 | # Process headings in reverse order to avoid messing up positions
791 | for level, text, start, end in reversed(headings):
792 | new_level = '#' * (len(level) - min_level + 1)
793 | replacement = f"{optimized[start:start+1]}{new_level} {text}{optimized[end-1:end]}"
794 | optimized = optimized[:start] + replacement + optimized[end:]
795 |
796 | changes_made["headings_normalized"] = True
797 |
798 | # Fix list formatting
799 | if fix_lists and any(c in optimized for c in ['-', '*', '+']):
800 | original = optimized
801 |
802 | # Ensure consistent list markers
803 | optimized = re.sub(r'^([*+]) ', r'- ', optimized, flags=re.MULTILINE)
804 |
805 | # Fix list item spacing
806 | optimized = re.sub(r'(\n- .+)(\n[^-\s])', r'\1\n\2', optimized)
807 |
808 | # Fix indentation in nested lists
809 | optimized = re.sub(r'(\n- .+\n)(\s{1,3}- )', r'\1 \2', optimized)
810 |
811 | changes_made["lists_fixed"] = original != optimized
812 |
813 | # Fix link formatting
814 | if fix_links and "[" in optimized:
815 | original = optimized
816 |
817 | # Fix reference-style links (ensure consistent spacing)
818 | optimized = re.sub(r'\]\[', r'] [', optimized)
819 |
820 | # Fix malformed links with space between []()
821 | optimized = re.sub(r'\] \(', r'](', optimized)
822 |
823 | # Ensure proper spacing around links in sentences
824 | optimized = re.sub(r'([^\s])\[', r'\1 [', optimized)
825 | optimized = re.sub(r'\]([^\(\s])', r'] \1', optimized)
826 |
827 | changes_made["links_fixed"] = original != optimized
828 |
829 | # Add proper line breaks for readability
830 | if add_line_breaks:
831 | original = optimized
832 |
833 | # Ensure headings have a blank line before (except at start of document)
834 | optimized = re.sub(r'(?<!\n\n)(^|\n)#', r'\1\n#', optimized)
835 |
836 | # Ensure paragraphs have blank lines between them
837 | optimized = re.sub(r'(\n[^\s#>*-][^\n]+)(\n[^\s#>*-])', r'\1\n\2', optimized)
838 |
839 | # Clean up any excessive blank lines created
840 | optimized = re.sub(r'\n{3,}', r'\n\n', optimized)
841 |
842 | changes_made["line_breaks_added"] = original != optimized
843 |
844 | # Adjust whitespace based on compact_mode
845 | original = optimized
846 | if compact_mode:
847 | # Reduce blank lines to single blank lines
848 | optimized = re.sub(r'\n\s*\n', r'\n\n', optimized)
849 |
850 | # Remove trailing whitespace
851 | optimized = re.sub(r' +$', '', optimized, flags=re.MULTILINE)
852 | else:
853 | # Ensure consistent double line breaks for section transitions
854 | optimized = re.sub(r'(\n#{1,6}[^\n]+\n)(?!\n)', r'\1\n', optimized)
855 |
856 | changes_made["whitespace_adjusted"] = original != optimized
857 |
858 | # Apply line wrapping if specified
859 | if max_line_length > 0:
860 | import textwrap
861 |
862 | # Split into paragraphs, wrap each, then rejoin
863 | paragraphs = re.split(r'\n\s*\n', optimized)
864 | wrapped_paragraphs = []
865 |
866 | for p in paragraphs:
867 | # Skip wrapping for code blocks, lists, and headings
868 | if (p.strip().startswith("```") or
869 | re.match(r'^\s*[*\-+]', p, re.MULTILINE) or
870 | re.match(r'^#{1,6}\s', p.strip())):
871 | wrapped_paragraphs.append(p)
872 | else:
873 | # Wrap regular paragraphs
874 | lines = p.split('\n')
875 | wrapped_lines = []
876 | for line in lines:
877 | if not line.strip().startswith(('>', '#', '-', '*', '+')):
878 | wrapped = textwrap.fill(line, width=max_line_length)
879 | wrapped_lines.append(wrapped)
880 | else:
881 | wrapped_lines.append(line)
882 | wrapped_paragraphs.append('\n'.join(wrapped_lines))
883 |
884 | optimized = '\n\n'.join(wrapped_paragraphs)
885 |
886 | processing_time = time.time() - start_time
887 |
888 | return {
889 | "optimized_markdown": optimized,
890 | "changes_made": changes_made,
891 | "processing_time": processing_time,
892 | "success": True
893 | }
```
--------------------------------------------------------------------------------
/examples/advanced_extraction_demo.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python
2 | """Demo of advanced extraction capabilities using Ultimate MCP Server."""
3 | import asyncio
4 | import json
5 | import os
6 | import re
7 | import sys
8 | import time
9 | from pathlib import Path
10 |
11 | # Add project root to path for imports when running as script
12 | sys.path.insert(0, str(Path(__file__).parent.parent))
13 |
14 | from rich.panel import Panel
15 | from rich.rule import Rule
16 | from rich.syntax import Syntax
17 | from rich.traceback import Traceback
18 |
19 | from ultimate_mcp_server.constants import Provider
20 | from ultimate_mcp_server.core.providers.base import get_provider
21 | from ultimate_mcp_server.utils import get_logger
22 | from ultimate_mcp_server.utils.display import CostTracker, parse_and_display_result
23 | from ultimate_mcp_server.utils.logging.console import console
24 | from ultimate_mcp_server.utils.parsing import extract_json_from_markdown
25 |
26 | # --- Debug Flag ---
27 | USE_DEBUG_LOGS = True # Set to True to enable detailed logging
28 | # ------------------
29 |
30 | # Initialize logger
31 | logger = get_logger("example.advanced_extraction")
32 | logger.set_level("debug")
33 |
34 | # Configure the OpenAI client for direct extraction demos
35 | async def setup_openai_provider():
36 | """Set up an OpenAI provider for demonstration."""
37 | try:
38 | logger.info("Initializing OpenAI for demonstration", emoji_key="start")
39 |
40 | # Get OpenAI provider - get_provider will return None if key missing/invalid in config
41 | provider = await get_provider(Provider.OPENAI.value)
42 | if not provider:
43 | logger.error("Failed to get OpenAI provider. Is the OPENAI_API_KEY configured correctly in your environment/config?")
44 | return None
45 |
46 | logger.success("OpenAI provider initialized successfully.")
47 | return provider
48 | except Exception as e:
49 | logger.error(f"Failed to initialize OpenAI provider: {e}", emoji_key="error")
50 | return None
51 |
52 | async def run_json_extraction_example(provider, tracker: CostTracker):
53 | """Demonstrate JSON extraction."""
54 | if USE_DEBUG_LOGS:
55 | logger.debug("Entering run_json_extraction_example.")
56 | if not provider:
57 | console.print("[yellow]Skipping JSON extraction demo - no provider available.[/yellow]")
58 | if USE_DEBUG_LOGS:
59 | logger.debug("Exiting run_json_extraction_example (no provider).")
60 | return
61 |
62 | console.print(Rule("[bold blue]1. JSON Extraction Example[/bold blue]"))
63 |
64 | # Load sample text
65 | sample_path = Path(__file__).parent / "data" / "sample_event.txt"
66 | if not sample_path.exists():
67 | # Create a sample text for demonstration
68 | sample_text = """
69 | Tech Conference 2024
70 | Location: San Francisco Convention Center, 123 Tech Blvd, San Francisco, CA 94103
71 | Date: June 15-17, 2024
72 | Time: 9:00 AM - 6:00 PM daily
73 |
74 | Registration Fee: $599 (Early Bird: $499 until March 31)
75 |
76 | Keynote Speakers:
77 | - Dr. Sarah Johnson, AI Research Director at TechCorp
78 | - Mark Williams, CTO of FutureTech Industries
79 | - Prof. Emily Chen, MIT Computer Science Department
80 |
81 | Special Events:
82 | - Networking Reception: June 15, 7:00 PM - 10:00 PM
83 | - Hackathon: June 16, 9:00 PM - 9:00 AM (overnight)
84 | - Career Fair: June 17, 1:00 PM - 5:00 PM
85 |
86 | For more information, contact [email protected] or call (555) 123-4567.
87 | """
88 | # Ensure the data directory exists
89 | os.makedirs(os.path.dirname(sample_path), exist_ok=True)
90 | # Write sample text to file
91 | with open(sample_path, "w") as f:
92 | f.write(sample_text)
93 | else:
94 | # Read existing sample text
95 | with open(sample_path, "r") as f:
96 | sample_text = f.read()
97 |
98 | # Display sample text
99 | console.print(Panel(sample_text, title="Sample Event Text", border_style="blue"))
100 |
101 | # Define JSON schema for event
102 | event_schema = {
103 | "type": "object",
104 | "properties": {
105 | "name": {"type": "string", "description": "Event name"},
106 | "location": {
107 | "type": "object",
108 | "properties": {
109 | "venue": {"type": "string"},
110 | "address": {"type": "string"},
111 | "city": {"type": "string"},
112 | "state": {"type": "string"},
113 | "zip": {"type": "string"}
114 | }
115 | },
116 | "dates": {
117 | "type": "object",
118 | "properties": {
119 | "start": {"type": "string", "format": "date"},
120 | "end": {"type": "string", "format": "date"}
121 | }
122 | },
123 | "time": {"type": "string"},
124 | "registration": {
125 | "type": "object",
126 | "properties": {
127 | "regular_fee": {"type": "number"},
128 | "early_bird_fee": {"type": "number"},
129 | "early_bird_deadline": {"type": "string", "format": "date"}
130 | }
131 | },
132 | "speakers": {
133 | "type": "array",
134 | "items": {
135 | "type": "object",
136 | "properties": {
137 | "name": {"type": "string"},
138 | "title": {"type": "string"},
139 | "organization": {"type": "string"}
140 | }
141 | }
142 | },
143 | "special_events": {
144 | "type": "array",
145 | "items": {
146 | "type": "object",
147 | "properties": {
148 | "name": {"type": "string"},
149 | "date": {"type": "string", "format": "date"},
150 | "time": {"type": "string"}
151 | }
152 | }
153 | },
154 | "contact": {
155 | "type": "object",
156 | "properties": {
157 | "email": {"type": "string", "format": "email"},
158 | "phone": {"type": "string"}
159 | }
160 | }
161 | }
162 | }
163 |
164 | # Display JSON schema
165 | schema_json = json.dumps(event_schema, indent=2)
166 | console.print(Panel(
167 | Syntax(schema_json, "json", theme="monokai", line_numbers=True),
168 | title="Event JSON Schema",
169 | border_style="green"
170 | ))
171 |
172 | # Extract JSON using direct provider call
173 | logger.info("Extracting structured JSON data from text...", emoji_key="processing")
174 |
175 | try:
176 | start_time = time.time()
177 |
178 | # Instead of using the tool, use direct completion for demo purposes
179 | prompt = f"""
180 | Extract structured information from the following text into a JSON object.
181 | Follow the provided JSON schema exactly.
182 |
183 | TEXT:
184 | {sample_text}
185 |
186 | JSON SCHEMA:
187 | {json.dumps(event_schema, indent=2)}
188 |
189 | Provide only the valid JSON object as output, with no additional commentary.
190 | """
191 |
192 | if USE_DEBUG_LOGS:
193 | logger.debug(f"JSON Extraction Prompt:\n{prompt}")
194 |
195 | # Call the provider directly
196 | result = await provider.generate_completion(
197 | prompt=prompt,
198 | model="gpt-4.1-mini", # Use an available OpenAI model
199 | temperature=0.2, # Lower temperature for more deterministic output
200 | max_tokens=1500 # Enough tokens for a full response
201 | )
202 |
203 | # Track cost
204 | tracker.add_call(result)
205 |
206 | if USE_DEBUG_LOGS:
207 | logger.debug(f"Raw JSON Extraction Result Text:\n{result.text}")
208 |
209 | # Process the result to extract just the JSON
210 | try:
211 | # Try to parse the response as JSON
212 | raw_text = result.text.strip()
213 | text_to_parse = extract_json_from_markdown(raw_text)
214 | if USE_DEBUG_LOGS:
215 | logger.debug(f"Raw text received: {raw_text[:500]}...")
216 | logger.debug(f"Attempting to parse JSON after cleaning: {text_to_parse[:500]}...")
217 | json_result = json.loads(text_to_parse)
218 | if USE_DEBUG_LOGS:
219 | logger.debug(f"Successfully parsed JSON: {json.dumps(json_result, indent=2)}")
220 |
221 | # Create a dictionary with structured data and metadata for display
222 | structured_result_data = {
223 | "json": json_result, # The actual parsed JSON
224 | "validated": True, # Assuming validation happens elsewhere or is implied
225 | "model": result.model,
226 | "processing_time": time.time() - start_time,
227 | "tokens": {
228 | "input": result.input_tokens,
229 | "output": result.output_tokens,
230 | "total": result.input_tokens + result.output_tokens
231 | },
232 | "cost": result.cost
233 | }
234 |
235 | # Display the results using the utility function
236 | parse_and_display_result(
237 | title="JSON Extraction Results",
238 | input_data={"text": sample_text, "schema": event_schema},
239 | result=structured_result_data, # Pass the structured data
240 | console=console
241 | )
242 |
243 | except json.JSONDecodeError as e:
244 | # Log the error regardless of debug flag
245 | logger.error(f"JSONDecodeError occurred: {e}", exc_info=False)
246 |
247 | if USE_DEBUG_LOGS:
248 | # Log the string that caused the error (before cleaning)
249 | logger.debug(f"Raw string causing JSONDecodeError:\n{raw_text}")
250 | # Log the string that failed parsing (after cleaning)
251 | logger.debug(f"Cleaned string that failed JSON parsing:\n{text_to_parse}")
252 | # Print a rich traceback to the console
253 | console.print("[bold red]-- Traceback for JSONDecodeError --[/bold red]")
254 | console.print(Traceback())
255 | console.print("[bold red]-- End Traceback --[/bold red]")
256 |
257 | # If JSON parsing fails, show the raw response
258 | console.print(Panel(
259 | raw_text, # Show the original raw text from the model
260 | title="[yellow]Raw Model Output (JSON parsing failed)[/yellow]",
261 | border_style="red"
262 | ))
263 |
264 | except Exception as e:
265 | logger.error(f"Error extracting JSON: {str(e)}", emoji_key="error", exc_info=True)
266 |
267 | console.print()
268 | if USE_DEBUG_LOGS:
269 | logger.debug("Exiting run_json_extraction_example.")
270 |
271 | async def table_extraction_demo(provider, tracker: CostTracker):
272 | """Demonstrate table extraction capabilities."""
273 | if USE_DEBUG_LOGS:
274 | logger.debug("Entering table_extraction_demo.")
275 | if not provider:
276 | console.print("[yellow]Skipping table extraction demo - no provider available.[/yellow]")
277 | if USE_DEBUG_LOGS:
278 | logger.debug("Exiting table_extraction_demo (no provider).")
279 | return
280 |
281 | logger.info("Starting table extraction demo", emoji_key="start")
282 |
283 | # Sample text with embedded table
284 | text = """
285 | Financial Performance by Quarter (2023-2024)
286 |
287 | | Quarter | Revenue ($M) | Expenses ($M) | Profit ($M) | Growth (%) |
288 | |---------|-------------|---------------|-------------|------------|
289 | | Q1 2023 | 42.5 | 32.1 | 10.4 | 3.2 |
290 | | Q2 2023 | 45.7 | 33.8 | 11.9 | 6.5 |
291 | | Q3 2023 | 50.2 | 35.6 | 14.6 | 9.8 |
292 | | Q4 2023 | 58.3 | 38.2 | 20.1 | 15.2 |
293 | | Q1 2024 | 60.1 | 39.5 | 20.6 | 3.1 |
294 | | Q2 2024 | 65.4 | 41.2 | 24.2 | 8.8 |
295 |
296 | Note: All figures are in millions of dollars and are unaudited.
297 | Growth percentages are relative to the previous quarter.
298 | """
299 |
300 | # Log extraction attempt
301 | logger.info("Performing table extraction", emoji_key="processing")
302 |
303 | try:
304 | start_time = time.time()
305 |
306 | # Prompt for table extraction
307 | prompt = f"""
308 | Extract the table from the following text and format it as both JSON and Markdown.
309 |
310 | TEXT:
311 | {text}
312 |
313 | For the JSON format, use this structure:
314 | {{
315 | "headers": ["Header1", "Header2", ...],
316 | "rows": [
317 | {{"Header1": "value", "Header2": "value", ...}},
318 | ...
319 | ]
320 | }}
321 |
322 | For the Markdown format, output a well-formatted Markdown table.
323 |
324 | Also extract any metadata about the table (title, notes, etc.).
325 |
326 | Format your response as JSON with the following structure:
327 | {{
328 | "json_table": {{...}},
329 | "markdown_table": "...",
330 | "metadata": {{
331 | "title": "...",
332 | "notes": [
333 | "..."
334 | ]
335 | }}
336 | }}
337 | """
338 |
339 | if USE_DEBUG_LOGS:
340 | logger.debug(f"Table Extraction Prompt:\n{prompt}")
341 |
342 | # Call the provider directly
343 | result = await provider.generate_completion(
344 | prompt=prompt,
345 | model="gpt-4.1-mini",
346 | temperature=0.2,
347 | max_tokens=1500
348 | )
349 |
350 | # Track cost
351 | tracker.add_call(result)
352 |
353 | if USE_DEBUG_LOGS:
354 | logger.debug(f"Raw Table Extraction Result Text:\n{result.text}")
355 |
356 | try:
357 | # Try to parse the response as JSON
358 | raw_text = result.text.strip() # Keep raw text separate
359 | text_to_parse = extract_json_from_markdown(raw_text) # Clean it
360 | if USE_DEBUG_LOGS:
361 | # Log both raw and cleaned versions
362 | logger.debug(f"Raw text received (Table): {raw_text[:500]}...")
363 | logger.debug(f"Attempting to parse Table Extraction JSON after cleaning: {text_to_parse[:500]}...")
364 | json_result = json.loads(text_to_parse) # Parse the cleaned version
365 | if USE_DEBUG_LOGS:
366 | logger.debug(f"Successfully parsed Table Extraction JSON: {json.dumps(json_result, indent=2)}")
367 |
368 | # Create structured data dictionary for display
369 | structured_result_data = {
370 | "formats": {
371 | "json": json_result.get("json_table", {}),
372 | "markdown": json_result.get("markdown_table", "")
373 | },
374 | "metadata": json_result.get("metadata", {}),
375 | "model": result.model,
376 | "processing_time": time.time() - start_time,
377 | "tokens": {
378 | "input": result.input_tokens,
379 | "output": result.output_tokens,
380 | "total": result.input_tokens + result.output_tokens
381 | },
382 | "cost": result.cost
383 | }
384 |
385 | # Parse the result using the shared utility
386 | parse_and_display_result(
387 | "Table Extraction Demo",
388 | {"text": text},
389 | structured_result_data # Pass the structured data
390 | )
391 |
392 | except json.JSONDecodeError as e:
393 | # Log the error regardless of debug flag
394 | logger.error(f"JSONDecodeError in Table Extraction occurred: {e}", exc_info=False)
395 |
396 | if USE_DEBUG_LOGS:
397 | # Log both raw and cleaned versions for debugging the failure
398 | logger.debug(f"Raw string causing JSONDecodeError in Table Extraction:\n{raw_text}")
399 | logger.debug(f"Cleaned string that failed JSON parsing in Table Extraction:\n{text_to_parse}")
400 | # Print a rich traceback to the console
401 | console.print("[bold red]-- Traceback for JSONDecodeError (Table Extraction) --[/bold red]")
402 | console.print(Traceback())
403 | console.print("[bold red]-- End Traceback --[/bold red]")
404 |
405 | # If JSON parsing fails, show the raw response using the original raw_text
406 | console.print(Panel(
407 | raw_text,
408 | title="[yellow]Raw Model Output (JSON parsing failed)[/yellow]",
409 | border_style="red"
410 | ))
411 |
412 | except Exception as e:
413 | logger.error(f"Error in table extraction: {str(e)}", emoji_key="error")
414 | # Add exit log
415 | if USE_DEBUG_LOGS:
416 | logger.debug("Exiting table_extraction_demo.")
417 |
418 | async def semantic_schema_inference_demo(provider, tracker: CostTracker):
419 | """Demonstrate semantic schema inference."""
420 | if USE_DEBUG_LOGS:
421 | logger.debug("Entering semantic_schema_inference_demo.")
422 | if not provider:
423 | console.print("[yellow]Skipping semantic schema inference demo - no provider available.[/yellow]")
424 | if USE_DEBUG_LOGS:
425 | logger.debug("Exiting semantic_schema_inference_demo (no provider).")
426 | return
427 |
428 | logger.info("Starting semantic schema inference demo", emoji_key="start")
429 |
430 | # Sample text for schema inference
431 | text = """
432 | Patient Record: John Smith
433 | Date of Birth: 05/12/1978
434 | Patient ID: P-98765
435 | Blood Type: O+
436 | Height: 182 cm
437 | Weight: 76 kg
438 |
439 | Medications:
440 | - Lisinopril 10mg, once daily
441 | - Metformin 500mg, twice daily
442 | - Atorvastatin 20mg, once daily at bedtime
443 |
444 | Allergies:
445 | - Penicillin (severe)
446 | - Shellfish (mild)
447 |
448 | Recent Vital Signs:
449 | Date: 03/15/2024
450 | Blood Pressure: 128/85 mmHg
451 | Heart Rate: 72 bpm
452 | Temperature: 98.6°F
453 | Oxygen Saturation: 98%
454 |
455 | Medical History:
456 | - Type 2 Diabetes (diagnosed 2015)
457 | - Hypertension (diagnosed 2017)
458 | - Hyperlipidemia (diagnosed 2019)
459 | - Appendectomy (2005)
460 | """
461 |
462 | # Define a schema template for the extraction
463 | patient_schema = {
464 | "type": "object",
465 | "properties": {
466 | "patient": {
467 | "type": "object",
468 | "properties": {
469 | "name": {"type": "string"},
470 | "dob": {"type": "string"},
471 | "id": {"type": "string"},
472 | "blood_type": {"type": "string"},
473 | "height": {"type": "string"},
474 | "weight": {"type": "string"}
475 | }
476 | },
477 | "medications": {
478 | "type": "array",
479 | "items": {
480 | "type": "object",
481 | "properties": {
482 | "name": {"type": "string"},
483 | "dosage": {"type": "string"},
484 | "frequency": {"type": "string"}
485 | }
486 | }
487 | },
488 | "allergies": {
489 | "type": "array",
490 | "items": {
491 | "type": "object",
492 | "properties": {
493 | "allergen": {"type": "string"},
494 | "severity": {"type": "string"}
495 | }
496 | }
497 | },
498 | "vital_signs": {
499 | "type": "object",
500 | "properties": {
501 | "date": {"type": "string"},
502 | "blood_pressure": {"type": "string"},
503 | "heart_rate": {"type": "string"},
504 | "temperature": {"type": "string"},
505 | "oxygen_saturation": {"type": "string"}
506 | }
507 | },
508 | "medical_history": {
509 | "type": "array",
510 | "items": {
511 | "type": "object",
512 | "properties": {
513 | "condition": {"type": "string"},
514 | "diagnosed": {"type": "string"}
515 | }
516 | }
517 | }
518 | }
519 | }
520 |
521 | # Log schema inference attempt
522 | logger.info("Performing schema inference", emoji_key="processing")
523 |
524 | try:
525 | start_time = time.time()
526 |
527 | # Prompt for semantic schema extraction
528 | prompt = f"""
529 | Extract structured information from the text according to the provided semantic schema.
530 |
531 | TEXT:
532 | {text}
533 |
534 | SEMANTIC SCHEMA:
535 | {json.dumps(patient_schema, indent=2)}
536 |
537 | Analyze the text and extract information following the schema structure. Return a valid JSON object.
538 | """
539 |
540 | if USE_DEBUG_LOGS:
541 | logger.debug(f"Schema Inference Prompt:\n{prompt}")
542 |
543 | # Call the provider directly
544 | result = await provider.generate_completion(
545 | prompt=prompt,
546 | model="gpt-4.1-mini",
547 | temperature=0.2,
548 | max_tokens=1000
549 | )
550 |
551 | # Track cost
552 | tracker.add_call(result)
553 |
554 | if USE_DEBUG_LOGS:
555 | logger.debug(f"Raw Schema Inference Result Text:\n{result.text}")
556 |
557 | try:
558 | # Try to parse the response as JSON
559 | raw_text = result.text.strip()
560 | text_to_parse = extract_json_from_markdown(raw_text)
561 | if USE_DEBUG_LOGS:
562 | logger.debug(f"Raw text received (Schema): {raw_text[:500]}...")
563 | logger.debug(f"Attempting to parse Schema Inference JSON after cleaning: {text_to_parse[:500]}...")
564 | json_result = json.loads(text_to_parse)
565 | if USE_DEBUG_LOGS:
566 | logger.debug(f"Successfully parsed Schema Inference JSON: {json.dumps(json_result, indent=2)}")
567 |
568 | # Create structured data dictionary for display
569 | structured_result_data = {
570 | "extracted_data": json_result,
571 | "model": result.model,
572 | "processing_time": time.time() - start_time,
573 | "tokens": {
574 | "input": result.input_tokens,
575 | "output": result.output_tokens,
576 | "total": result.input_tokens + result.output_tokens
577 | },
578 | "cost": result.cost
579 | }
580 |
581 | # Parse the result using the shared utility
582 | parse_and_display_result(
583 | "Semantic Schema Inference Demo",
584 | {"text": text},
585 | structured_result_data # Pass the structured data
586 | )
587 |
588 | except json.JSONDecodeError as e:
589 | # Log the error regardless of debug flag
590 | logger.error(f"JSONDecodeError in Schema Inference occurred: {e}", exc_info=False)
591 |
592 | if USE_DEBUG_LOGS:
593 | # Log both raw and cleaned versions
594 | logger.debug(f"Raw string causing JSONDecodeError in Schema Inference:\n{raw_text}")
595 | logger.debug(f"Cleaned string that failed JSON parsing in Schema Inference:\n{text_to_parse}")
596 | # Print a rich traceback to the console
597 | console.print("[bold red]-- Traceback for JSONDecodeError (Schema Inference) --[/bold red]")
598 | console.print(Traceback())
599 | console.print("[bold red]-- End Traceback --[/bold red]")
600 |
601 | # If JSON parsing fails, show the raw response
602 | console.print(Panel(
603 | raw_text,
604 | title="[yellow]Raw Model Output (JSON parsing failed)[/yellow]",
605 | border_style="red"
606 | ))
607 |
608 | except Exception as e:
609 | logger.error(f"Error in schema inference: {str(e)}", emoji_key="error")
610 | # Add exit log
611 | if USE_DEBUG_LOGS:
612 | logger.debug("Exiting semantic_schema_inference_demo.")
613 |
614 | async def entity_extraction_demo(provider, tracker: CostTracker):
615 | """Demonstrate entity extraction capabilities."""
616 | if USE_DEBUG_LOGS:
617 | logger.debug("Entering entity_extraction_demo.")
618 | if not provider:
619 | console.print("[yellow]Skipping entity extraction demo - no provider available.[/yellow]")
620 | if USE_DEBUG_LOGS:
621 | logger.debug("Exiting entity_extraction_demo (no provider).")
622 | return
623 |
624 | logger.info("Starting entity extraction demo", emoji_key="start")
625 |
626 | # Sample text for entity extraction
627 | text = """
628 | In a groundbreaking announcement on March 15, 2024, Tesla unveiled its latest solar energy
629 | technology in partnership with SolarCity. CEO Elon Musk presented the new PowerWall 4.0
630 | battery system at their headquarters in Austin, Texas. The system can store up to 20kWh of
631 | energy and costs approximately $6,500 per unit.
632 |
633 | According to Dr. Maria Chen, lead researcher at the National Renewable Energy Laboratory (NREL),
634 | this technology represents a significant advancement in residential energy storage. The new
635 | system integrates with the Tesla mobile app on both iOS and Android platforms, allowing users
636 | to monitor energy usage in real-time.
637 |
638 | Tesla stock (TSLA) rose 5.8% following the announcement, reaching $248.32 per share on the NASDAQ.
639 | The company plans to begin production at their Gigafactory Nevada location by June 2024, with
640 | initial deployments in California and Texas markets.
641 | """
642 |
643 | # Log entity extraction attempt
644 | logger.info("Performing entity extraction", emoji_key="processing")
645 |
646 | try:
647 | start_time = time.time()
648 |
649 | # Prompt for entity extraction
650 | prompt = f"""
651 | Extract key-value pairs and entities from the following text, categorized by type.
652 |
653 | TEXT:
654 | {text}
655 |
656 | Extract the following categories of information:
657 | - Organizations (companies, institutions, etc.)
658 | - People (names and titles)
659 | - Locations (cities, states, facilities, etc.)
660 | - Dates and Times
661 | - Products and Technologies
662 | - Numerical Values (monetary values, percentages, measurements, etc.)
663 |
664 | Format the output as a JSON object with these categories as keys, and each containing relevant entities found.
665 | Within each category, provide structured information when possible.
666 | """
667 |
668 | if USE_DEBUG_LOGS:
669 | logger.debug(f"Entity Extraction Prompt:\n{prompt}")
670 |
671 | # Call the provider directly
672 | result = await provider.generate_completion(
673 | prompt=prompt,
674 | model="gpt-4.1-mini",
675 | temperature=0.2,
676 | max_tokens=500
677 | )
678 |
679 | # Track cost
680 | tracker.add_call(result)
681 |
682 | if USE_DEBUG_LOGS:
683 | logger.debug(f"Raw Entity Extraction Result Text:\n{result.text}")
684 |
685 | try:
686 | # Try to parse the response as JSON
687 | raw_text = result.text.strip()
688 | text_to_parse = extract_json_from_markdown(raw_text)
689 | if USE_DEBUG_LOGS:
690 | logger.debug(f"Raw text received (Entity): {raw_text[:500]}...")
691 | logger.debug(f"Attempting to parse Entity Extraction JSON after cleaning: {text_to_parse[:500]}...")
692 | if USE_DEBUG_LOGS:
693 | logger.debug(f"EXACT STRING PASSED TO json.loads: >>>{text_to_parse}<<<")
694 |
695 | try:
696 | # First try standard parsing
697 | json_result = json.loads(text_to_parse)
698 | except json.JSONDecodeError as e:
699 | logger.warning(f"Standard JSON parsing failed: {e}. Attempting emergency repair.")
700 |
701 | # Emergency fallback for malformed JSON due to unterminated strings
702 | # 1. Look for the raw JSON structure with markdown removed
703 | text_no_markdown = text_to_parse
704 |
705 | # 2. Manually check for key entity categories, even if JSON is malformed
706 | # Create a structured result with categories we expect to find
707 | json_result = {
708 | "Organizations": [],
709 | "People": [],
710 | "Locations": [],
711 | "Dates and Times": [],
712 | "Products and Technologies": [],
713 | "Numerical Values": []
714 | }
715 |
716 | # Look for entity categories using regex
717 | org_matches = re.findall(r'"name"\s*:\s*"([^"]+)".*?"type"\s*:\s*"([^"]+)"', text_no_markdown)
718 | for name, entity_type in org_matches:
719 | # Determine which category this entity belongs to based on type
720 | if any(keyword in entity_type.lower() for keyword in ["company", "corporation", "institution", "exchange"]):
721 | json_result["Organizations"].append({"name": name, "type": entity_type})
722 | elif any(keyword in entity_type.lower() for keyword in ["city", "state", "facility"]):
723 | json_result["Locations"].append({"name": name, "type": entity_type})
724 | elif any(keyword in entity_type.lower() for keyword in ["battery", "app", "system", "technology"]):
725 | json_result["Products and Technologies"].append({"name": name, "type": entity_type})
726 |
727 | # Look for people - they usually have titles and organizations
728 | people_matches = re.findall(r'"name"\s*:\s*"([^"]+)".*?"title"\s*:\s*"([^"]+)".*?"organization"\s*:\s*"([^"]*)"', text_no_markdown)
729 | for name, title, org in people_matches:
730 | json_result["People"].append({"name": name, "title": title, "organization": org})
731 |
732 | # Dates and numerical values are harder to extract generically
733 | # but we can look for obvious patterns
734 | date_matches = re.findall(r'"date"\s*:\s*"([^"]+)".*?"event"\s*:\s*"([^"]+)"', text_no_markdown)
735 | for date, event in date_matches:
736 | json_result["Dates and Times"].append({"date": date, "event": event})
737 |
738 | # For numerical values, look for values with units
739 | value_matches = re.findall(r'"value"\s*:\s*([^,]+).*?"unit"\s*:\s*"([^"]+)"', text_no_markdown)
740 | for value, unit in value_matches:
741 | # Clean up the value
742 | clean_value = value.strip('" ')
743 | item = {"value": clean_value, "unit": unit}
744 |
745 | # Look for a description if available
746 | desc_match = re.search(r'"description"\s*:\s*"([^"]+)"', text_no_markdown)
747 | if desc_match:
748 | item["description"] = desc_match.group(1)
749 |
750 | json_result["Numerical Values"].append(item)
751 |
752 | # Add a note about emergency repair
753 | logger.warning("Used emergency JSON repair - results may be incomplete")
754 |
755 | if USE_DEBUG_LOGS:
756 | logger.debug(f"Successfully parsed Entity Extraction JSON: {json.dumps(json_result, indent=2)}")
757 |
758 | # Create structured data dictionary for display
759 | structured_result_data = {
760 | "extracted_data": json_result,
761 | "structured": True,
762 | "categorized": True,
763 | "model": result.model,
764 | "processing_time": time.time() - start_time,
765 | "tokens": {
766 | "input": result.input_tokens,
767 | "output": result.output_tokens,
768 | "total": result.input_tokens + result.output_tokens
769 | },
770 | "cost": result.cost
771 | }
772 |
773 | # Parse the result using the shared utility
774 | parse_and_display_result(
775 | "Entity Extraction Demo",
776 | {"text": text},
777 | structured_result_data # Pass the structured data
778 | )
779 |
780 | except json.JSONDecodeError as e:
781 | # Log the error regardless of debug flag
782 | logger.error(f"JSONDecodeError in Entity Extraction occurred: {e}", exc_info=False)
783 |
784 | if USE_DEBUG_LOGS:
785 | # Log both raw and cleaned versions
786 | logger.debug(f"Raw string causing JSONDecodeError in Entity Extraction:\n{raw_text}")
787 | logger.debug(f"Cleaned string that failed JSON parsing in Entity Extraction:\n{text_to_parse}")
788 | # Print a rich traceback to the console
789 | console.print("[bold red]-- Traceback for JSONDecodeError (Entity Extraction) --[/bold red]")
790 | console.print(Traceback())
791 | console.print("[bold red]-- End Traceback --[/bold red]")
792 |
793 | # If JSON parsing fails, show the raw response
794 | console.print(Panel(
795 | raw_text,
796 | title="[yellow]Raw Model Output (JSON parsing failed)[/yellow]",
797 | border_style="red"
798 | ))
799 |
800 | except Exception as e:
801 | logger.error(f"Error in entity extraction: {str(e)}", emoji_key="error")
802 | # Add exit log
803 | if USE_DEBUG_LOGS:
804 | logger.debug("Exiting entity_extraction_demo.")
805 |
806 | async def main():
807 | """Run the advanced extraction demos."""
808 | tracker = CostTracker() # Instantiate tracker
809 | provider = await setup_openai_provider()
810 |
811 | if not provider:
812 | logger.warning("OpenAI provider not available. Demo sections requiring it will be skipped.", emoji_key="warning")
813 |
814 | console.print(Rule("[bold magenta]Advanced Extraction Demos Starting[/bold magenta]"))
815 |
816 | demos_to_run = [
817 | (run_json_extraction_example, "JSON Extraction"),
818 | (table_extraction_demo, "Table Extraction"),
819 | (semantic_schema_inference_demo, "Schema Inference"),
820 | (entity_extraction_demo, "Entity Extraction")
821 | ]
822 |
823 | # Execute demos sequentially
824 | for demo_func, demo_name in demos_to_run:
825 | try:
826 | await demo_func(provider, tracker) # Pass tracker
827 | except Exception as e:
828 | logger.error(f"Error running {demo_name} demo: {e}", emoji_key="error", exc_info=True)
829 |
830 | # Display final cost summary
831 | tracker.display_summary(console)
832 |
833 | logger.success("Advanced Extraction Demo finished successfully!", emoji_key="complete")
834 | console.print(Rule("[bold magenta]Advanced Extraction Demos Complete[/bold magenta]"))
835 |
836 | if __name__ == "__main__":
837 | # Run the demos
838 | exit_code = asyncio.run(main())
839 | sys.exit(exit_code)
```
--------------------------------------------------------------------------------
/ultimate_mcp_server/core/providers/anthropic.py:
--------------------------------------------------------------------------------
```python
1 | # ultimate_mcp_server/providers/anthropic.py
2 | """Anthropic (Claude) provider implementation."""
3 |
4 | import json
5 | import re
6 | import time
7 | from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple
8 |
9 | from anthropic import AsyncAnthropic
10 |
11 | from ultimate_mcp_server.constants import Provider, TaskType # Import TaskType for logging
12 | from ultimate_mcp_server.core.providers.base import (
13 | BaseProvider,
14 | ModelResponse,
15 | )
16 | from ultimate_mcp_server.utils import get_logger
17 |
18 | # Use the same naming scheme everywhere: logger at module level
19 | logger = get_logger("ultimate_mcp_server.providers.anthropic")
20 |
21 |
22 | class AnthropicProvider(BaseProvider):
23 | """Provider implementation for Anthropic (Claude) API."""
24 |
25 | provider_name = Provider.ANTHROPIC.value
26 |
27 | def __init__(self, api_key: Optional[str] = None, **kwargs):
28 | """Initialize the Anthropic provider.
29 |
30 | Args:
31 | api_key: Anthropic API key
32 | **kwargs: Additional options (e.g., base_url)
33 | """
34 | super().__init__(api_key=api_key, **kwargs)
35 | self.base_url = kwargs.get("base_url")
36 | self.models_cache = None
37 | self.client: Optional[AsyncAnthropic] = None # Initialize client attribute
38 |
39 | async def initialize(self) -> bool:
40 | """Initialize the Anthropic client.
41 |
42 | Returns:
43 | bool: True if initialization was successful
44 | """
45 | if not self.api_key:
46 | self.logger.error("Anthropic API key is not configured.", emoji_key="error")
47 | return False
48 |
49 | try:
50 | self.client = AsyncAnthropic(
51 | api_key=self.api_key,
52 | base_url=self.base_url,
53 | )
54 |
55 | # Skip API call if using a mock key (for tests)
56 | if "mock-" in self.api_key:
57 | self.logger.info(
58 | "Using mock Anthropic key - skipping API validation", emoji_key="mock"
59 | )
60 | # Assume mock initialization is always successful for testing purposes
61 | self.is_initialized = True
62 | return True
63 |
64 | # Optional: Add a quick check_api_key() call here if desired,
65 | # but initialize might succeed even if key is invalid later.
66 | # is_valid = await self.check_api_key() # This makes initialize slower
67 | # if not is_valid:
68 | # self.logger.error("Anthropic API key appears invalid.", emoji_key="error")
69 | # return False
70 |
71 | self.logger.success("Anthropic provider initialized successfully", emoji_key="provider")
72 | self.is_initialized = True # Mark as initialized
73 | return True
74 |
75 | except Exception as e:
76 | self.logger.error(
77 | f"Failed to initialize Anthropic provider: {str(e)}",
78 | emoji_key="error",
79 | exc_info=True, # Log traceback for debugging
80 | )
81 | self.is_initialized = False
82 | return False
83 |
84 | async def generate_completion(
85 | self,
86 | prompt: Optional[str] = None,
87 | messages: Optional[List[Dict[str, Any]]] = None,
88 | model: Optional[str] = None,
89 | max_tokens: Optional[int] = 1024, # Signature default
90 | temperature: float = 0.7,
91 | json_mode: bool = False,
92 | **kwargs,
93 | ) -> ModelResponse:
94 | """Generate a single non-chat completion using Anthropic Claude.
95 |
96 | Args:
97 | prompt: Text prompt to send to the model.
98 | messages: List of message dictionaries, alternative to prompt.
99 | model: Model name to use (e.g., "claude-3-opus-20240229").
100 | max_tokens: Maximum tokens to generate. Defaults to 1024.
101 | temperature: Temperature parameter (0.0-1.0).
102 | json_mode: If True, attempt to guide model towards JSON output (via prompting).
103 | **kwargs: Additional model-specific parameters (e.g., top_p, system).
104 |
105 | Returns:
106 | ModelResponse object.
107 | """
108 | if not self.client:
109 | if not await self.initialize():
110 | raise ConnectionError("Anthropic provider failed to initialize.")
111 |
112 | model = model or self.get_default_model()
113 | actual_model_name = self.strip_provider_prefix(model)
114 |
115 | # Original logic: Validate that either prompt or messages is provided
116 | if prompt is None and not messages:
117 | raise ValueError("Either 'prompt' or 'messages' must be provided")
118 |
119 | # Original logic: If messages are provided, use the chat_completion function
120 | if messages:
121 | # Ensure all necessary parameters are passed to generate_chat_completion
122 | # This includes system_prompt if it's in kwargs
123 | return await self.generate_chat_completion(
124 | messages=messages,
125 | model=model, # Pass original model ID
126 | max_tokens=max_tokens,
127 | temperature=temperature,
128 | json_mode=json_mode, # Pass json_mode
129 | **kwargs # Pass other kwargs like system, top_p etc.
130 | )
131 |
132 | # Original logic: Prepare message list for the API from prompt
133 | # This path is taken if only 'prompt' is provided (and not 'messages')
134 | current_api_messages = [{"role": "user", "content": prompt}]
135 |
136 | # Original logic: Handle system prompt if passed in kwargs for the simple prompt case
137 | system_prompt = kwargs.pop("system", None)
138 |
139 | # Original logic: Handle JSON mode for simple prompt case
140 | if json_mode:
141 | self.logger.debug(
142 | "json_mode=True requested for completion (simple prompt), modifying user message for Anthropic."
143 | )
144 | # Modify the user message content in current_api_messages
145 | user_message_idx = -1
146 | for i, msg in enumerate(current_api_messages):
147 | if msg["role"] == "user":
148 | user_message_idx = i
149 | break
150 |
151 | if user_message_idx != -1:
152 | original_content = current_api_messages[user_message_idx]["content"]
153 | if isinstance(original_content, str) and "Please respond with valid JSON" not in original_content:
154 | current_api_messages[user_message_idx]["content"] = (
155 | f"{original_content}\\nPlease respond ONLY with valid JSON matching the expected schema. Do not include explanations or markdown formatting."
156 | )
157 | else:
158 | # This case should ideally not happen if prompt is always user role.
159 | # If it could, one might append a new user message asking for JSON,
160 | # or include it in system prompt if system_prompt is being constructed here.
161 | self.logger.warning("Could not find user message to append JSON instruction for simple prompt case.")
162 |
163 | # Prepare API call parameters using max_tokens directly from signature
164 | api_params = {
165 | "messages": current_api_messages,
166 | "model": actual_model_name,
167 | "max_tokens": max_tokens, # Uses max_tokens from signature (which defaults to 1024 if not passed)
168 | "temperature": temperature,
169 | **kwargs, # Pass remaining kwargs (like top_p, etc.) that were not popped
170 | }
171 | if system_prompt: # Add system prompt if it was extracted
172 | api_params["system"] = system_prompt
173 |
174 | # Logging before API call (original style)
175 | self.logger.info(
176 | f"Generating completion with Anthropic model {actual_model_name}",
177 | emoji_key=TaskType.COMPLETION.value,
178 | prompt_length=len(prompt) if prompt else 0, # length of prompt if provided
179 | json_mode_requested=json_mode,
180 | )
181 |
182 | try:
183 | response, processing_time = await self.process_with_timer(
184 | self.client.messages.create, **api_params
185 | )
186 | except Exception as e:
187 | error_message = f"Anthropic API error during completion for model {actual_model_name}: {type(e).__name__}: {str(e)}"
188 | self.logger.error(error_message, exc_info=True)
189 | raise ConnectionError(error_message) from e
190 |
191 | if (
192 | not response.content
193 | or not isinstance(response.content, list)
194 | or not hasattr(response.content[0], "text")
195 | ):
196 | raise ValueError(f"Unexpected response format from Anthropic API: {response}")
197 | completion_text = response.content[0].text
198 |
199 | # Post-process if JSON mode was requested (for simple prompt case) - best effort extraction
200 | if json_mode: # This json_mode is the original parameter
201 | original_text_for_json_check = completion_text
202 | completion_text = self._extract_json_from_text(completion_text)
203 | if original_text_for_json_check != completion_text:
204 | self.logger.debug("Extracted JSON content from Anthropic response post-processing (simple prompt case).")
205 |
206 | result = ModelResponse(
207 | text=completion_text,
208 | model=f"{self.provider_name}/{actual_model_name}",
209 | provider=self.provider_name,
210 | input_tokens=response.usage.input_tokens,
211 | output_tokens=response.usage.output_tokens,
212 | processing_time=processing_time,
213 | raw_response=response.model_dump(),
214 | )
215 | result.message = {"role": "assistant", "content": completion_text}
216 |
217 | self.logger.success(
218 | "Anthropic completion successful",
219 | emoji_key="success",
220 | model=result.model,
221 | tokens={"input": result.input_tokens, "output": result.output_tokens},
222 | cost=result.cost,
223 | time=result.processing_time,
224 | )
225 | return result
226 |
227 | # --- NEW METHOD ---
228 | async def generate_chat_completion(
229 | self,
230 | messages: List[
231 | Dict[str, Any]
232 | ], # Use Dict for broader compatibility, or specific MessageParam type
233 | model: Optional[str] = None,
234 | max_tokens: Optional[int] = 1024, # Provide a default
235 | temperature: float = 0.7,
236 | json_mode: bool = False, # Add json_mode parameter
237 | **kwargs,
238 | ) -> ModelResponse:
239 | """Generate a chat completion using Anthropic Claude.
240 |
241 | Args:
242 | messages: A list of message dictionaries (e.g., [{"role": "user", "content": "..."}]).
243 | Should conform to Anthropic's expected format.
244 | model: Model name to use (e.g., "claude-3-opus-20240229").
245 | max_tokens: Maximum tokens to generate. Defaults to 1024.
246 | temperature: Temperature parameter (0.0-1.0).
247 | json_mode: If True, guide the model to generate JSON output (via prompt engineering).
248 | **kwargs: Additional model-specific parameters (e.g., top_p, system).
249 |
250 | Returns:
251 | ModelResponse object containing the assistant's message.
252 | """
253 | if not self.client:
254 | if not await self.initialize():
255 | raise ConnectionError("Anthropic provider failed to initialize.")
256 |
257 | model = model or self.get_default_model()
258 | actual_model_name = self.strip_provider_prefix(model)
259 |
260 | # Handle system prompt extraction
261 | system_prompt = kwargs.pop("system", None)
262 |
263 | # Process the messages to extract system message and convert to Anthropic format
264 | processed_messages = []
265 | extracted_system = None
266 |
267 | for msg in messages:
268 | role = msg.get("role", "")
269 | content = msg.get("content", "")
270 |
271 | # Extract system message if present
272 | if role == "system":
273 | if extracted_system is None: # Take the first system message
274 | extracted_system = content
275 | # Don't add system messages to the processed_messages list
276 | continue
277 | elif role in ("user", "assistant"):
278 | # Keep user and assistant messages
279 | processed_messages.append({"role": role, "content": content})
280 | else:
281 | self.logger.warning(f"Ignoring unsupported message role: {role}")
282 |
283 | # If we found a system message, use it (overrides any system in kwargs)
284 | if extracted_system is not None:
285 | system_prompt = extracted_system
286 |
287 | # Process json_mode by modifying system prompt or last user message
288 | json_mode_requested = json_mode
289 |
290 | if json_mode_requested:
291 | self.logger.debug(
292 | "json_mode=True requested for chat completion, implementing via prompt engineering for Anthropic"
293 | )
294 |
295 | # If we have a system prompt, update it to include JSON instructions
296 | if system_prompt:
297 | system_prompt = f"{system_prompt}\n\nIMPORTANT: You must respond ONLY with valid JSON matching the expected schema. Do not include explanations or markdown formatting."
298 | # Otherwise, if there's at least one user message, modify the last one
299 | elif processed_messages and any(m.get("role") == "user" for m in processed_messages):
300 | # Find last user message
301 | for i in range(len(processed_messages) - 1, -1, -1):
302 | if processed_messages[i].get("role") == "user":
303 | user_content = processed_messages[i].get("content", "")
304 | # Only add JSON instruction if not already present
305 | if "respond with JSON" not in user_content and "respond in JSON" not in user_content:
306 | processed_messages[i]["content"] = f"{user_content}\n\nPlease respond ONLY with valid JSON. Do not include explanations or markdown formatting."
307 | break
308 | # If neither system prompt nor user messages to modify, add a system prompt
309 | else:
310 | system_prompt = "You must respond ONLY with valid JSON. Do not include explanations or markdown formatting."
311 |
312 | # Prepare API call parameters
313 | api_params = {
314 | "messages": processed_messages,
315 | "model": actual_model_name,
316 | "max_tokens": max_tokens,
317 | "temperature": temperature,
318 | **kwargs, # Pass remaining kwargs (like top_p, etc.)
319 | }
320 | if system_prompt:
321 | api_params["system"] = system_prompt
322 |
323 | self.logger.info(
324 | f"Generating chat completion with Anthropic model {actual_model_name}",
325 | emoji_key=TaskType.CHAT.value, # Use enum value
326 | message_count=len(processed_messages),
327 | json_mode_requested=json_mode_requested, # Log if it was requested
328 | )
329 |
330 | try:
331 | response, processing_time = await self.process_with_timer(
332 | self.client.messages.create, **api_params
333 | )
334 | except Exception as e:
335 | error_message = f"Anthropic API error during chat completion for model {actual_model_name}: {type(e).__name__}: {str(e)}"
336 | self.logger.error(error_message, exc_info=True)
337 | raise ConnectionError(error_message) from e
338 |
339 | # Extract response content
340 | if (
341 | not response.content
342 | or not isinstance(response.content, list)
343 | or not hasattr(response.content[0], "text")
344 | ):
345 | raise ValueError(f"Unexpected response format from Anthropic API: {response}")
346 | assistant_content = response.content[0].text
347 |
348 | # Create standardized response including the assistant message
349 | result = ModelResponse(
350 | text=assistant_content, # Keep raw text accessible
351 | model=f"{self.provider_name}/{actual_model_name}", # Return prefixed model ID
352 | provider=self.provider_name,
353 | input_tokens=response.usage.input_tokens,
354 | output_tokens=response.usage.output_tokens,
355 | processing_time=processing_time,
356 | raw_response=response.model_dump(), # Use model_dump() if Pydantic
357 | )
358 |
359 | # Add message to result for chat_completion
360 | result.message = {"role": "assistant", "content": assistant_content}
361 |
362 | # Log success
363 | self.logger.success(
364 | "Anthropic chat completion successful",
365 | emoji_key="success",
366 | model=result.model,
367 | tokens={"input": result.input_tokens, "output": result.output_tokens},
368 | cost=result.cost,
369 | time=result.processing_time,
370 | )
371 |
372 | return result
373 |
374 | # --- END NEW METHOD ---
375 |
376 | async def generate_completion_stream(
377 | self,
378 | # Keep existing signature: accepts prompt primarily, but also messages/system in kwargs
379 | prompt: Optional[str] = None, # Make prompt optional if messages are primary input
380 | messages: Optional[List[Dict[str, Any]]] = None, # Allow messages directly
381 | model: Optional[str] = None,
382 | max_tokens: Optional[int] = 1024, # Default max_tokens
383 | temperature: float = 0.7,
384 | json_mode: bool = False, # Accept json_mode flag
385 | **kwargs,
386 | ) -> AsyncGenerator[Tuple[str, Dict[str, Any]], None]:
387 | """Generate a streaming completion using Anthropic Claude. Handles both prompt and message inputs.
388 |
389 | Args:
390 | prompt: (Optional) Text prompt (if messages not provided).
391 | messages: (Optional) List of message dictionaries. Takes precedence over prompt.
392 | model: Model name to use.
393 | max_tokens: Maximum tokens to generate. Defaults to 1024.
394 | temperature: Temperature parameter.
395 | json_mode: If True, guides model towards JSON (via prompting if using prompt input).
396 | **kwargs: Additional parameters (system, top_p, etc.).
397 |
398 | Yields:
399 | Tuple of (text_chunk, metadata).
400 |
401 | Raises:
402 | ConnectionError: If provider initialization fails or API call fails.
403 | ValueError: If neither prompt nor messages are provided.
404 | """
405 | if not self.client:
406 | if not await self.initialize():
407 | raise ConnectionError("Anthropic provider failed to initialize.")
408 |
409 | model = model or self.get_default_model()
410 | actual_model_name = self.strip_provider_prefix(model)
411 |
412 | # Prepare system prompt if provided in kwargs
413 | system_prompt = kwargs.pop("system", None)
414 |
415 | # Determine input messages: Use 'messages' if provided, otherwise construct from 'prompt'
416 | if messages:
417 | # Process the messages to extract system message and convert to Anthropic format
418 | processed_messages = []
419 | extracted_system = None
420 |
421 | for msg in messages:
422 | role = msg.get("role", "")
423 | content = msg.get("content", "")
424 |
425 | # Extract system message if present
426 | if role == "system":
427 | if extracted_system is None: # Take the first system message
428 | extracted_system = content
429 | # Don't add system messages to the processed_messages list
430 | continue
431 | elif role in ("user", "assistant"):
432 | # Keep user and assistant messages
433 | processed_messages.append({"role": role, "content": content})
434 | else:
435 | self.logger.warning(f"Ignoring unsupported message role in streaming: {role}")
436 |
437 | # If we found a system message, use it (overrides any system in kwargs)
438 | if extracted_system is not None:
439 | system_prompt = extracted_system
440 |
441 | input_desc = f"{len(processed_messages)} messages"
442 | elif prompt:
443 | # Construct messages from prompt
444 | processed_messages = [{"role": "user", "content": prompt}]
445 | input_desc = f"prompt ({len(prompt)} chars)"
446 |
447 | # Apply JSON mode prompt modification ONLY if using prompt input
448 | if json_mode:
449 | self.logger.debug(
450 | "json_mode=True requested for stream completion, modifying prompt for Anthropic."
451 | )
452 | user_message = processed_messages[-1]
453 | original_content = user_message["content"]
454 | if "Please respond with valid JSON" not in original_content:
455 | user_message["content"] = (
456 | f"{original_content}\nPlease respond ONLY with valid JSON matching the expected schema. Do not include explanations or markdown formatting."
457 | )
458 | else:
459 | raise ValueError(
460 | "Either 'prompt' or 'messages' must be provided for generate_completion_stream"
461 | )
462 |
463 | # Apply JSON mode to system prompt if using messages input and json_mode is True
464 | json_mode_requested = kwargs.pop("json_mode", json_mode) # Keep track if it was requested
465 | if json_mode_requested and messages:
466 | if system_prompt:
467 | system_prompt = f"{system_prompt}\n\nIMPORTANT: You must respond ONLY with valid JSON matching the expected schema. Do not include explanations or markdown formatting."
468 | else:
469 | system_prompt = "You must respond ONLY with valid JSON matching the expected schema. Do not include explanations or markdown formatting."
470 |
471 | # Prepare API call parameters
472 | params = {
473 | "model": actual_model_name,
474 | "messages": processed_messages,
475 | "temperature": temperature,
476 | "max_tokens": max_tokens, # Use the default or provided value
477 | **kwargs, # Pass remaining kwargs
478 | }
479 | if system_prompt:
480 | params["system"] = system_prompt
481 |
482 | self.logger.info(
483 | f"Generating streaming completion with Anthropic model {actual_model_name}",
484 | emoji_key=self.provider_name,
485 | input_type=input_desc,
486 | json_mode_requested=json_mode_requested,
487 | )
488 |
489 | start_time = time.time()
490 | total_chunks = 0
491 | final_input_tokens = 0
492 | final_output_tokens = 0
493 | finish_reason = None # Track finish reason
494 |
495 | try:
496 | async with self.client.messages.stream(**params) as stream:
497 | async for chunk in stream:
498 | # Extract text delta
499 | if chunk.type == "content_block_delta":
500 | content = chunk.delta.text
501 | total_chunks += 1
502 | metadata = {
503 | "model": f"{self.provider_name}/{actual_model_name}",
504 | "provider": self.provider_name,
505 | "chunk_index": total_chunks,
506 | "finish_reason": None, # Not final yet
507 | }
508 | yield content, metadata
509 |
510 | # Don't attempt to capture usage from delta chunks - wait for final message
511 |
512 | # Important: Get final tokens from the final message state
513 | try:
514 | final_message = await stream.get_final_message()
515 | final_input_tokens = final_message.usage.input_tokens if hasattr(final_message, 'usage') else 0
516 | final_output_tokens = final_message.usage.output_tokens if hasattr(final_message, 'usage') else 0
517 | # Ensure finish_reason is captured from the final message
518 | finish_reason = final_message.stop_reason if hasattr(final_message, 'stop_reason') else "unknown"
519 | except Exception as e:
520 | # If we can't get the final message for any reason, log it but continue
521 | self.logger.warning(f"Couldn't get final message stats: {e}")
522 | # Estimate token counts based on total characters / avg chars per token
523 | char_count = sum(len(m.get("content", "")) for m in processed_messages)
524 | final_input_tokens = char_count // 4 # Rough estimate
525 | final_output_tokens = total_chunks * 5 # Very rough estimate
526 |
527 | processing_time = time.time() - start_time
528 | self.logger.success(
529 | "Anthropic streaming completion successful",
530 | emoji_key="success",
531 | model=f"{self.provider_name}/{actual_model_name}",
532 | chunks=total_chunks,
533 | tokens={"input": final_input_tokens, "output": final_output_tokens},
534 | time=processing_time,
535 | finish_reason=finish_reason,
536 | )
537 |
538 | # Yield a final empty chunk with aggregated metadata
539 | final_metadata = {
540 | "model": f"{self.provider_name}/{actual_model_name}",
541 | "provider": self.provider_name,
542 | "chunk_index": total_chunks + 1,
543 | "input_tokens": final_input_tokens,
544 | "output_tokens": final_output_tokens,
545 | "total_tokens": final_input_tokens + final_output_tokens,
546 | "processing_time": processing_time,
547 | "finish_reason": finish_reason,
548 | }
549 | yield "", final_metadata
550 |
551 | except Exception as e:
552 | processing_time = time.time() - start_time
553 | self.logger.error(
554 | f"Anthropic streaming completion failed after {processing_time:.2f}s: {str(e)}",
555 | emoji_key="error",
556 | model=f"{self.provider_name}/{actual_model_name}",
557 | exc_info=True,
558 | )
559 | # Yield a final error chunk
560 | error_metadata = {
561 | "model": f"{self.provider_name}/{actual_model_name}",
562 | "provider": self.provider_name,
563 | "chunk_index": total_chunks + 1,
564 | "error": f"{type(e).__name__}: {str(e)}",
565 | "finish_reason": "error",
566 | "processing_time": processing_time,
567 | }
568 | yield "", error_metadata
569 | # Don't re-raise here, let the caller handle the error chunk
570 |
571 | async def list_models(self) -> List[Dict[str, Any]]:
572 | """List available Anthropic Claude models.
573 |
574 | Returns:
575 | List of model information dictionaries including the provider prefix.
576 | """
577 | # Anthropic doesn't have a list models endpoint, return static list WITH prefix
578 | # Based on the models defined in constants.py
579 | static_models = [
580 | # Define with the full ID including provider prefix
581 | {
582 | "id": f"{self.provider_name}/claude-3-7-sonnet-20250219",
583 | "name": "Claude 3.7 Sonnet",
584 | "context_window": 200000,
585 | "input_cost_pmt": 3.0,
586 | "output_cost_pmt": 15.0,
587 | "features": ["chat", "completion", "vision", "tool_use"],
588 | },
589 | {
590 | "id": f"{self.provider_name}/claude-3-5-haiku-20241022",
591 | "name": "Claude 3.5 Haiku",
592 | "context_window": 200000,
593 | "input_cost_pmt": 0.80,
594 | "output_cost_pmt": 4.0,
595 | "features": ["chat", "completion", "vision"],
596 | },
597 | {
598 | "id": f"{self.provider_name}/claude-3-opus-20240229",
599 | "name": "Claude 3 Opus",
600 | "context_window": 200000,
601 | "input_cost_pmt": 15.0,
602 | "output_cost_pmt": 75.0,
603 | "features": ["chat", "completion", "vision"],
604 | },
605 | ]
606 |
607 | # Simple caching (optional, as list is static)
608 | if not self.models_cache:
609 | self.models_cache = static_models
610 | return self.models_cache
611 |
612 | def get_default_model(self) -> str:
613 | """Get the default Anthropic model ID (including provider prefix).
614 |
615 | Returns:
616 | Default model ID string (e.g., "anthropic/claude-3-5-haiku-20241022").
617 | """
618 | # Try getting from config first
619 | from ultimate_mcp_server.config import get_config
620 |
621 | default_model_id = f"{self.provider_name}/claude-3-5-haiku-20241022" # Hardcoded default
622 |
623 | try:
624 | config = get_config()
625 | # Access nested provider config safely
626 | provider_config = config.providers.get(self.provider_name) if config.providers else None
627 | if provider_config and provider_config.default_model:
628 | # Ensure the configured default includes the prefix
629 | configured_default = provider_config.default_model
630 | if not configured_default.startswith(f"{self.provider_name}/"):
631 | self.logger.warning(
632 | f"Configured default model '{configured_default}' for Anthropic is missing the provider prefix. Using hardcoded default: {default_model_id}"
633 | )
634 | return default_model_id
635 | else:
636 | return configured_default
637 | except (ImportError, AttributeError, TypeError) as e:
638 | self.logger.debug(
639 | f"Could not retrieve default model from config ({e}), using hardcoded default."
640 | )
641 |
642 | return default_model_id
643 |
644 | async def check_api_key(self) -> bool:
645 | """Check if the Anthropic API key is valid by making a minimal request.
646 |
647 | Returns:
648 | bool: True if API key allows a basic request.
649 | """
650 | if not self.client:
651 | self.logger.warning("Cannot check API key: Anthropic client not initialized.")
652 | # Attempt initialization first
653 | if not await self.initialize():
654 | return False # Initialization failed, key likely invalid or other issue
655 | # If initialize succeeded but client still None (e.g., mock key path)
656 | if not self.client:
657 | return True # Assume mock key is 'valid' for testing
658 |
659 | try:
660 | # Use the *unprefixed* default model name for the check
661 | default_model_unprefixed = self.strip_provider_prefix(self.get_default_model())
662 | await self.client.messages.create(
663 | model=default_model_unprefixed,
664 | messages=[{"role": "user", "content": "Test"}],
665 | max_tokens=1,
666 | )
667 | self.logger.info("Anthropic API key validation successful.")
668 | return True
669 | except Exception as e:
670 | self.logger.warning(f"Anthropic API key validation failed: {type(e).__name__}")
671 | return False
672 |
673 | def strip_provider_prefix(self, model_id: str) -> str:
674 | """Removes the provider prefix (e.g., 'anthropic/') from a model ID."""
675 | prefix = f"{self.provider_name}/"
676 | if model_id.startswith(prefix):
677 | return model_id[len(prefix) :]
678 | # Handle ':' separator as well for backward compatibility if needed
679 | alt_prefix = f"{self.provider_name}:"
680 | if model_id.startswith(alt_prefix):
681 | return model_id[len(alt_prefix) :]
682 | return model_id # Return original if no prefix found
683 |
684 | def _extract_json_from_text(self, text: str) -> str:
685 | """Extract JSON content from text that might include markdown code blocks or explanatory text.
686 |
687 | Args:
688 | text: The raw text response that might contain JSON
689 |
690 | Returns:
691 | Cleaned JSON content
692 | """
693 |
694 | # First check if the text is already valid JSON
695 | try:
696 | json.loads(text)
697 | return text # Already valid JSON
698 | except json.JSONDecodeError:
699 | pass # Continue with extraction
700 |
701 | # Extract JSON from code blocks - most common Anthropic pattern
702 | code_block_match = re.search(r'```(?:json)?\s*([\s\S]*?)```', text)
703 | if code_block_match:
704 | code_content = code_block_match.group(1).strip()
705 | try:
706 | json.loads(code_content)
707 | return code_content
708 | except json.JSONDecodeError:
709 | # Try to fix common JSON syntax issues like trailing commas
710 | fixed_content = re.sub(r',\s*([}\]])', r'\1', code_content)
711 | try:
712 | json.loads(fixed_content)
713 | return fixed_content
714 | except json.JSONDecodeError:
715 | pass # Continue with other extraction methods
716 |
717 | # Look for JSON array or object patterns in the content
718 | # Find the first [ or { and the matching closing ] or }
719 | stripped = text.strip()
720 |
721 | # Try to extract array
722 | if '[' in stripped and ']' in stripped:
723 | start = stripped.find('[')
724 | # Find the matching closing bracket
725 | end = -1
726 | depth = 0
727 | for i in range(start, len(stripped)):
728 | if stripped[i] == '[':
729 | depth += 1
730 | elif stripped[i] == ']':
731 | depth -= 1
732 | if depth == 0:
733 | end = i + 1
734 | break
735 |
736 | if end > start:
737 | array_content = stripped[start:end]
738 | try:
739 | json.loads(array_content)
740 | return array_content
741 | except json.JSONDecodeError:
742 | pass # Try other methods
743 |
744 | # Try to extract object
745 | if '{' in stripped and '}' in stripped:
746 | start = stripped.find('{')
747 | # Find the matching closing bracket
748 | end = -1
749 | depth = 0
750 | for i in range(start, len(stripped)):
751 | if stripped[i] == '{':
752 | depth += 1
753 | elif stripped[i] == '}':
754 | depth -= 1
755 | if depth == 0:
756 | end = i + 1
757 | break
758 |
759 | if end > start:
760 | object_content = stripped[start:end]
761 | try:
762 | json.loads(object_content)
763 | return object_content
764 | except json.JSONDecodeError:
765 | pass # Try other methods
766 |
767 | # If all else fails, return the original text
768 | return text
769 |
770 | async def process_with_timer(self, func, *args, **kwargs) -> Tuple[Any, float]:
771 | """Helper to time an async function call."""
772 | start_time = time.perf_counter()
773 | result = await func(*args, **kwargs)
774 | end_time = time.perf_counter()
775 | return result, end_time - start_time
776 |
777 |
```
--------------------------------------------------------------------------------
/examples/smart_browser_demo.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python
2 | """
3 | DETAILED Demonstration script for the Smart Browser Tools in Ultimate MCP Server,
4 | showcasing browsing, interaction, search, download, macro, and autopilot features.
5 | """
6 |
7 | import asyncio
8 | import logging
9 | import sys
10 | import time
11 | import traceback
12 | from datetime import datetime
13 | from pathlib import Path
14 | from typing import Any, Dict, Optional, Tuple
15 |
16 | # Add project root to path for imports when running as script
17 | # Adjust this relative path if your script structure is different
18 | _PROJECT_ROOT = Path(__file__).resolve().parent.parent
19 | if str(_PROJECT_ROOT) not in sys.path:
20 | sys.path.insert(0, str(_PROJECT_ROOT))
21 | print(f"INFO: Added {_PROJECT_ROOT} to sys.path")
22 |
23 | # Rich imports for enhanced terminal UI
24 | from rich import box, get_console # noqa: E402
25 | from rich.console import Group # noqa: E402
26 | from rich.markup import escape # noqa: E402
27 | from rich.panel import Panel # noqa: E402
28 | from rich.rule import Rule # noqa: E402
29 | from rich.table import Table # noqa: E402
30 | from rich.text import Text # noqa: E402
31 | from rich.traceback import install as install_rich_traceback # noqa: E402
32 |
33 | # Initialize Rich console
34 | console = get_console()
35 |
36 | # Define a fallback logger in case the import fails
37 | def create_fallback_logger(name):
38 | logger = logging.getLogger(name)
39 | logger.setLevel(logging.INFO)
40 | handler = logging.StreamHandler()
41 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
42 | handler.setFormatter(formatter)
43 | logger.addHandler(handler)
44 | return logger
45 |
46 | # Import Gateway and MCP components
47 | from ultimate_mcp_server.core.server import Gateway # noqa: E402
48 | from ultimate_mcp_server.exceptions import ToolError, ToolInputError # noqa: E402
49 |
50 | # Import smart browser tools directly
51 | from ultimate_mcp_server.tools.smart_browser import ( # noqa: E402
52 | autopilot,
53 | browse,
54 | click,
55 | collect_documentation,
56 | download,
57 | download_site_pdfs,
58 | parallel,
59 | run_macro,
60 | search,
61 | shutdown,
62 | type_text,
63 | )
64 | from ultimate_mcp_server.utils import get_logger # noqa: E402
65 | from ultimate_mcp_server.utils.display import CostTracker # noqa: E402
66 |
67 | # Initialize logger
68 | logger = get_logger("demo.smart_browser")
69 |
70 | # Install rich tracebacks
71 | install_rich_traceback(show_locals=True, width=console.width, extra_lines=2)
72 |
73 | # --- Configuration ---
74 | # Base directory for Smart Browser outputs
75 | SMART_BROWSER_INTERNAL_BASE = "storage/smart_browser_internal" # Relative path used by the tool
76 | SMART_BROWSER_DOWNLOADS_BASE = "storage/smart_browser_downloads" # Default download relative path
77 | DEMO_OUTPUTS_DIR = Path(
78 | "./sb_demo_outputs"
79 | ) # Local dir for demo-specific outputs like the test HTML
80 |
81 | # Example URLs for demo
82 | URL_EXAMPLE = "http://example.com"
83 | URL_BOOKSTORE = "http://books.toscrape.com/"
84 | URL_QUOTES = "http://quotes.toscrape.com/"
85 | URL_PDF_SAMPLE = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
86 | URL_GITHUB = "https://github.com/features/copilot"
87 |
88 | # --- Demo Helper Functions (Unchanged from previous version) ---
89 |
90 |
91 | def timestamp_str(short: bool = False) -> str:
92 | """Return a formatted timestamp string."""
93 | now = time.time() # Use time.time for consistency
94 | dt_now = datetime.fromtimestamp(now)
95 | if short:
96 | return f"[dim]{dt_now.strftime('%H:%M:%S')}[/]"
97 | return f"[dim]{dt_now.strftime('%Y-%m-%d %H:%M:%S')}[/]"
98 |
99 |
100 | def truncate_text_by_lines(text: str, max_lines: int = 50) -> str:
101 | """Truncates text to show first/last lines if too long."""
102 | if not text:
103 | return ""
104 | lines = text.splitlines()
105 | if len(lines) <= max_lines:
106 | return text
107 | half_lines = max_lines // 2
108 | # Ensure half_lines is at least 1 if max_lines >= 2
109 | half_lines = max(1, half_lines)
110 | # Handle edge case where max_lines is 1
111 | if max_lines == 1:
112 | return lines[0] + "\n[...TRUNCATED...]"
113 |
114 | # Return first half, separator, and last half
115 | return "\n".join(lines[:half_lines] + ["[...TRUNCATED...]"] + lines[-half_lines:])
116 |
117 |
118 | def format_value(key: str, value: Any, detail_level: int = 1) -> Any:
119 | """Format specific values for display, returning strings with markup."""
120 | if value is None:
121 | return "[dim]None[/]" # Keep markup
122 | if isinstance(value, bool):
123 | return "[green]Yes[/]" if value else "[red]No[/]" # Keep markup
124 | if isinstance(value, float):
125 | return f"{value:.3f}" # Return simple string
126 | if key.lower().endswith("time_seconds") or key.lower() == "duration_ms":
127 | try:
128 | val_s = float(value) / 1000.0 if key.lower() == "duration_ms" else float(value)
129 | return f"[green]{val_s:.3f}s[/]" # Keep markup
130 | except (ValueError, TypeError):
131 | return escape(str(value)) # Fallback for non-numeric time values
132 | if key.lower() == "size_bytes" and isinstance(value, int):
133 | if value < 0:
134 | return "[dim]N/A[/]"
135 | if value > 1024 * 1024:
136 | return f"{value / (1024 * 1024):.2f} MB"
137 | if value > 1024:
138 | return f"{value / 1024:.2f} KB"
139 | return f"{value} Bytes" # Return simple string
140 |
141 | if isinstance(value, list):
142 | if not value:
143 | return "[dim]Empty List[/]" # Keep markup
144 | list_len = len(value)
145 | preview_count = 3 if detail_level < 2 else 5
146 | suffix = (
147 | f" [dim]... ({list_len} items total)[/]" if list_len > preview_count else ""
148 | ) # Keep markup
149 | if detail_level >= 1:
150 | previews = [
151 | str(
152 | format_value(f"{key}[{i}]", item, detail_level=0)
153 | ) # Recursive call returns string
154 | for i, item in enumerate(value[:preview_count])
155 | ]
156 | return f"[{', '.join(previews)}]{suffix}" # Returns string with markup
157 | else:
158 | return f"[List with {list_len} items]" # Keep markup
159 |
160 | if isinstance(value, dict):
161 | if not value:
162 | return "[dim]Empty Dict[/]" # Keep markup
163 | dict_len = len(value)
164 | preview_count = 4 if detail_level < 2 else 8
165 | preview_keys = list(value.keys())[:preview_count]
166 | suffix = (
167 | f" [dim]... ({dict_len} keys total)[/]" if dict_len > preview_count else ""
168 | ) # Keep markup
169 | if detail_level >= 1:
170 | items_preview = [
171 | # Key repr for clarity, value formatted recursively
172 | f"{repr(k)}: {str(format_value(k, value[k], detail_level=0))}"
173 | for k in preview_keys
174 | ]
175 | return f"{{{'; '.join(items_preview)}}}{suffix}" # Returns string with markup
176 | else:
177 | return f"[Dict with {dict_len} keys]" # Keep markup
178 |
179 | if isinstance(value, str):
180 | value_truncated = truncate_text_by_lines(value, 30) # Truncate by lines first
181 | preview_len = 300 if detail_level < 2 else 600
182 | suffix = ""
183 | # Check length after line truncation
184 | if len(value_truncated) > preview_len:
185 | value_display = value_truncated[:preview_len]
186 | suffix = "[dim]... (truncated)[/]" # Keep markup
187 | else:
188 | value_display = value_truncated
189 |
190 | # Escape only if it doesn't look like it contains Rich markup
191 | if "[" in value_display and "]" in value_display and "/" in value_display:
192 | # Heuristic: Assume it might contain markup, don't escape
193 | return value_display + suffix
194 | else:
195 | # Safe to escape plain strings
196 | return escape(value_display) + suffix
197 |
198 | # Fallback: escape the string representation of other types
199 | return escape(str(value))
200 |
201 |
202 | def display_page_state(state: Dict[str, Any], title: str = "Page State"):
203 | """Display the 'page_state' dictionary nicely."""
204 | panel_content = []
205 | url = state.get("url", "N/A")
206 | panel_content.append(
207 | Text.from_markup(f"[bold cyan]URL:[/bold cyan] [link={url}]{escape(url)}[/link]")
208 | )
209 | panel_content.append(
210 | Text.from_markup(f"[bold cyan]Title:[/bold cyan] {escape(state.get('title', 'N/A'))}")
211 | )
212 |
213 | main_text = state.get("main_text", "")
214 | if main_text:
215 | truncated_text = truncate_text_by_lines(main_text, 15)
216 | panel_content.append(Text.from_markup("\n[bold cyan]Main Text Summary:[/bold cyan]"))
217 | panel_content.append(Panel(escape(truncated_text), border_style="dim", padding=(0, 1)))
218 |
219 | elements = state.get("elements", [])
220 | if elements:
221 | elements_table = Table(
222 | title=Text.from_markup(f"Interactive Elements ({len(elements)} found)"),
223 | box=box.MINIMAL,
224 | show_header=True,
225 | padding=(0, 1),
226 | border_style="blue",
227 | )
228 | elements_table.add_column("ID", style="magenta", no_wrap=True)
229 | elements_table.add_column("Tag", style="cyan")
230 | elements_table.add_column("Role", style="yellow")
231 | elements_table.add_column("Text Preview", style="white", max_width=60)
232 | elements_table.add_column("BBox", style="dim")
233 |
234 | preview_count = 15
235 | for elem in elements[:preview_count]:
236 | elem_text_raw = elem.get("text", "")
237 | elem_text_preview = escape(
238 | elem_text_raw[:60] + ("..." if len(elem_text_raw) > 60 else "")
239 | )
240 | bbox = elem.get("bbox", [])
241 | if len(bbox) == 4:
242 | bbox_str = f"({bbox[0]}x{bbox[1]}, {bbox[2]}w{bbox[3]}h)"
243 | else:
244 | bbox_str = "[Invalid Bbox]"
245 |
246 | elements_table.add_row(
247 | str(elem.get("id", "?")),
248 | str(elem.get("tag", "?")),
249 | str(elem.get("role", "")),
250 | elem_text_preview, # Pass escaped preview string
251 | bbox_str,
252 | )
253 | if len(elements) > preview_count:
254 | elements_table.add_row(
255 | "...",
256 | Text.from_markup(f"[dim]{len(elements) - preview_count} more...[/]"),
257 | "",
258 | "",
259 | "",
260 | )
261 |
262 | panel_content.append(Text.from_markup("\n[bold cyan]Elements:[/bold cyan]"))
263 | panel_content.append(elements_table)
264 |
265 | console.print(
266 | Panel(
267 | Group(*panel_content),
268 | title=Text.from_markup(title),
269 | border_style="blue",
270 | padding=(1, 2),
271 | expand=False,
272 | )
273 | )
274 |
275 |
276 | def display_result(
277 | title: str, result: Dict[str, Any], display_options: Optional[Dict] = None
278 | ) -> None:
279 | """Display operation result with enhanced formatting using Rich."""
280 | display_options = display_options or {}
281 | console.print(
282 | Rule(
283 | Text.from_markup(f"[bold cyan]{escape(title)}[/] {timestamp_str(short=True)}"),
284 | style="cyan",
285 | )
286 | )
287 |
288 | success = result.get("success", False)
289 | detail_level = display_options.get("detail_level", 1)
290 | # Use _display_options from result if available, otherwise use passed options
291 | effective_display_options = result.get("_display_options", display_options)
292 |
293 | hide_keys_set = set(
294 | effective_display_options.get(
295 | "hide_keys",
296 | [
297 | "success",
298 | "page_state",
299 | "results",
300 | "steps",
301 | "download",
302 | "final_page_state",
303 | "documentation",
304 | "raw_response",
305 | "raw_llm_response",
306 | "_display_options", # Also hide internal options
307 | ],
308 | )
309 | )
310 |
311 | # --- Status Panel ---
312 | status_panel_content = Text.from_markup(
313 | f"Status: {'[bold green]Success[/]' if success else '[bold red]Failed[/]'}\n"
314 | )
315 | if not success:
316 | error_code = result.get("error_code", "N/A")
317 | error_msg = result.get("error", "Unknown error")
318 | status_panel_content.append(
319 | Text.from_markup(f"Error Code: [yellow]{escape(str(error_code))}[/]\n")
320 | )
321 | status_panel_content.append(
322 | Text.from_markup(f"Message: [red]{escape(str(error_msg))}[/]\n")
323 | )
324 | console.print(
325 | Panel(
326 | status_panel_content,
327 | title="Operation Status",
328 | border_style="red",
329 | padding=(1, 2),
330 | expand=False,
331 | )
332 | )
333 | else:
334 | console.print(
335 | Panel(
336 | status_panel_content,
337 | title="Operation Status",
338 | border_style="green",
339 | padding=(0, 1),
340 | expand=False,
341 | )
342 | )
343 |
344 | # --- Top Level Details ---
345 | details_table = Table(
346 | title="Result Summary", box=box.MINIMAL, show_header=False, padding=(0, 1)
347 | )
348 | details_table.add_column("Key", style="cyan", justify="right", no_wrap=True)
349 | details_table.add_column("Value", style="white")
350 | has_details = False
351 | for key, value in result.items():
352 | if key in hide_keys_set or key.startswith("_"):
353 | continue
354 | formatted_value = format_value(key, value, detail_level=detail_level)
355 | details_table.add_row(
356 | escape(str(key)), formatted_value
357 | ) # formatted_value is already string/markup
358 | has_details = True
359 | if has_details:
360 | console.print(details_table)
361 |
362 | # --- Special Section Displays ---
363 |
364 | # Page State
365 | if "page_state" in result and isinstance(result["page_state"], dict):
366 | display_page_state(result["page_state"], title="Page State After Action")
367 | elif "final_page_state" in result and isinstance(result["final_page_state"], dict):
368 | display_page_state(result["final_page_state"], title="Final Page State")
369 |
370 | # Search Results
371 | if "results" in result and isinstance(result["results"], list) and "query" in result:
372 | search_results = result["results"]
373 | search_table = Table(
374 | title=Text.from_markup(
375 | f"Search Results for '{escape(result['query'])}' ({len(search_results)} found)"
376 | ),
377 | box=box.ROUNDED,
378 | show_header=True,
379 | padding=(0, 1),
380 | )
381 | search_table.add_column("#", style="dim")
382 | search_table.add_column("Title", style="cyan")
383 | search_table.add_column("URL", style="blue", no_wrap=False)
384 | search_table.add_column("Snippet", style="white", no_wrap=False)
385 | for i, item in enumerate(search_results, 1):
386 | title = truncate_text_by_lines(item.get("title", ""), 3)
387 | snippet = truncate_text_by_lines(item.get("snippet", ""), 5)
388 | url = item.get("url", "")
389 | search_table.add_row(
390 | str(i), escape(title), f"[link={url}]{escape(url)}[/link]", escape(snippet)
391 | )
392 | console.print(search_table)
393 |
394 | # Download Result
395 | if "download" in result and isinstance(result["download"], dict):
396 | dl_info = result["download"]
397 | dl_table = Table(
398 | title="Download Details", box=box.MINIMAL, show_header=False, padding=(0, 1)
399 | )
400 | dl_table.add_column("Metric", style="cyan", justify="right")
401 | dl_table.add_column("Value", style="white")
402 | dl_table.add_row("File Path", escape(dl_info.get("file_path", "N/A")))
403 | dl_table.add_row("File Name", escape(dl_info.get("file_name", "N/A")))
404 | dl_table.add_row("SHA256", escape(dl_info.get("sha256", "N/A")))
405 | dl_table.add_row("Size", format_value("size_bytes", dl_info.get("size_bytes", -1)))
406 | dl_table.add_row("Source URL", escape(dl_info.get("url", "N/A")))
407 | dl_table.add_row(
408 | "Tables Extracted",
409 | format_value("tables_extracted", dl_info.get("tables_extracted", False)),
410 | )
411 | if dl_info.get("tables"):
412 | # format_value handles potential markup in table preview string
413 | dl_table.add_row("Table Preview", format_value("tables", dl_info.get("tables")))
414 | console.print(
415 | Panel(dl_table, title="Download Result", border_style="green", padding=(1, 2))
416 | )
417 |
418 | # Macro/Autopilot Steps
419 | if "steps" in result and isinstance(result["steps"], list):
420 | steps = result["steps"]
421 | steps_table = Table(
422 | title=Text.from_markup(f"Macro/Autopilot Steps ({len(steps)} executed)"),
423 | box=box.ROUNDED,
424 | show_header=True,
425 | padding=(0, 1),
426 | )
427 | steps_table.add_column("#", style="dim")
428 | steps_table.add_column("Action/Tool", style="cyan")
429 | steps_table.add_column("Arguments/Hint", style="white", no_wrap=False)
430 | steps_table.add_column("Status", style="yellow")
431 | steps_table.add_column("Result/Error", style="white", no_wrap=False)
432 |
433 | for i, step in enumerate(steps, 1):
434 | action = step.get("action", step.get("tool", "?"))
435 | args = step.get("args") # Check if 'args' exists
436 | if args is None: # If no 'args', use the step itself excluding status keys
437 | args = {
438 | k: v
439 | for k, v in step.items()
440 | if k
441 | not in ["action", "tool", "success", "result", "error", "step", "duration_ms"]
442 | }
443 |
444 | args_preview = format_value("args", args, detail_level=0) # format_value handles markup
445 | success_step = step.get("success", False)
446 | status = "[green]OK[/]" if success_step else "[red]FAIL[/]" # Markup string
447 | outcome = step.get("result", step.get("error", ""))
448 | outcome_preview = format_value(
449 | "outcome", outcome, detail_level=0
450 | ) # format_value handles markup
451 | steps_table.add_row(str(i), escape(action), args_preview, status, outcome_preview)
452 | console.print(steps_table)
453 |
454 | # Documentation (assuming it's stored under 'file_path' key now)
455 | if (
456 | "file_path" in result and result.get("pages_collected") is not None
457 | ): # Check for doc collection result structure
458 | doc_file_path = result.get("file_path")
459 | pages_collected = result.get("pages_collected")
460 | if doc_file_path and pages_collected > 0:
461 | content_to_display: Any = f"[dim]Documentation saved to: {escape(doc_file_path)}[/]"
462 | try:
463 | with open(doc_file_path, "r", encoding="utf-8") as f:
464 | content = f.read(1500) # Read preview
465 | content_to_display += f"\n\n[bold]File Preview ({len(content)} chars):[/]\n"
466 | content_to_display += escape(content) + "\n[dim]...[/]"
467 | except Exception as e:
468 | content_to_display += f"\n[yellow]Could not read file preview: {escape(str(e))}[/]"
469 |
470 | console.print(
471 | Panel(
472 | Text.from_markup(content_to_display),
473 | title=f"Collected Documentation ({pages_collected} pages)",
474 | border_style="magenta",
475 | padding=(1, 2),
476 | )
477 | )
478 |
479 | console.print() # Add spacing
480 |
481 |
482 | async def safe_tool_call(
483 | operation_name: str, tool_func: callable, *args, tracker: Optional[CostTracker] = None, **kwargs
484 | ) -> Tuple[bool, Dict[str, Any]]:
485 | """Safely call a tool function, handling exceptions and logging."""
486 | console.print(
487 | f"\n[cyan]Calling Tool:[/][bold] {escape(operation_name)}[/] {timestamp_str(short=True)}"
488 | )
489 | display_options = kwargs.pop("display_options", {})
490 |
491 | log_args_repr = {}
492 | MAX_ARG_LEN = 100
493 | for k, v in kwargs.items():
494 | try:
495 | if isinstance(v, (str, bytes)) and len(v) > MAX_ARG_LEN:
496 | log_args_repr[k] = f"{type(v).__name__}(len={len(v)})"
497 | elif isinstance(v, (list, dict)) and len(v) > 10:
498 | log_args_repr[k] = f"{type(v).__name__}(len={len(v)})"
499 | else:
500 | log_args_repr[k] = repr(v)
501 | except Exception: # Handle potential errors during repr()
502 | log_args_repr[k] = f"<{type(v).__name__} repr_error>"
503 |
504 | logger.debug(f"Executing {operation_name} with args: {args}, kwargs: {log_args_repr}")
505 |
506 | try:
507 | # Call the tool function directly
508 | result = await tool_func(*args, **kwargs)
509 | if not isinstance(result, dict):
510 | logger.error(f"Tool '{operation_name}' returned non-dict type: {type(result)}")
511 | return False, {
512 | "success": False,
513 | "error": f"Tool returned unexpected type: {type(result).__name__}",
514 | "error_code": "INTERNAL_ERROR",
515 | "_display_options": display_options,
516 | }
517 |
518 | # Store display options within the result for the display function
519 | result["_display_options"] = display_options
520 | logger.debug(f"Tool '{operation_name}' completed.")
521 | # Add success=True if missing and no error key present (should usually be set by tool)
522 | if "success" not in result and "error" not in result:
523 | result["success"] = True
524 | return result.get("success", False), result # Return success flag and the result dict
525 | except ToolInputError as e:
526 | logger.warning(f"Input error for {operation_name}: {e}")
527 | return False, {
528 | "success": False,
529 | "error": str(e),
530 | "error_code": getattr(e, "error_code", "INPUT_ERROR"),
531 | "_display_options": display_options,
532 | }
533 | except ToolError as e:
534 | logger.error(f"Tool error during {operation_name}: {e}", exc_info=True)
535 | return False, {
536 | "success": False,
537 | "error": str(e),
538 | "error_code": getattr(e, "error_code", "TOOL_ERROR"),
539 | "_display_options": display_options,
540 | }
541 | except Exception as e:
542 | logger.error(f"Unexpected error during {operation_name}: {e}", exc_info=True)
543 | tb_str = traceback.format_exc(limit=1)
544 | return False, {
545 | "success": False,
546 | "error": f"{type(e).__name__}: {e}\n{tb_str}",
547 | "error_type": type(e).__name__,
548 | "error_code": "UNEXPECTED_ERROR",
549 | "_display_options": display_options,
550 | }
551 |
552 |
553 | # --- Demo Sections ---
554 |
555 | async def demo_section_1_browse(gateway, tracker: CostTracker) -> None:
556 | console.print(Rule("[bold green]Demo 1: Basic Browsing[/]", style="green"))
557 | logger.info("Starting Demo Section 1: Basic Browsing")
558 |
559 | # 1a: Browse Example.com
560 | success, result = await safe_tool_call(
561 | "Browse Example.com", browse, url=URL_EXAMPLE, tracker=tracker
562 | )
563 | display_result("Browse Example.com", result)
564 |
565 | # 1b: Browse Bookstore (wait for specific element)
566 | success, result = await safe_tool_call(
567 | "Browse Bookstore (wait for footer)",
568 | browse,
569 | url=URL_BOOKSTORE,
570 | wait_for_selector="footer.footer",
571 | tracker=tracker,
572 | )
573 | display_result("Browse Bookstore (Wait)", result)
574 |
575 |
576 | async def demo_section_2_interaction(gateway, tracker: CostTracker) -> None:
577 | console.print(Rule("[bold green]Demo 2: Page Interaction[/]", style="green"))
578 | logger.info("Starting Demo Section 2: Page Interaction")
579 |
580 | # 2a: Search on Bookstore
581 | console.print(f"--- Scenario: Search for 'Science' on {URL_BOOKSTORE} ---")
582 | success, initial_state_res = await safe_tool_call(
583 | "Load Bookstore Search Page",
584 | browse,
585 | url=URL_BOOKSTORE,
586 | tracker=tracker,
587 | )
588 | if not success:
589 | console.print("[red]Cannot proceed with interaction demo, failed to load page.[/]")
590 | return
591 | display_result("Bookstore Initial State", initial_state_res)
592 |
593 | # Fill the search form using task hints
594 | fields_to_type = [
595 | {"task_hint": "The search input field", "text": "Science", "enter": False},
596 | ]
597 | success, fill_res = await safe_tool_call(
598 | "Type into Bookstore Search Form",
599 | type_text,
600 | url=URL_BOOKSTORE,
601 | fields=fields_to_type,
602 | submit_hint="The search button",
603 | wait_after_submit_ms=1500,
604 | tracker=tracker,
605 | )
606 | display_result("Type into Bookstore Search Form", fill_res)
607 |
608 | # 2b: Click the first search result (if successful)
609 | if success:
610 | console.print("--- Scenario: Click the first search result ---")
611 | current_url = fill_res.get("page_state", {}).get("url", URL_BOOKSTORE)
612 |
613 | success, click_res = await safe_tool_call(
614 | "Click First Book Result",
615 | click,
616 | url=current_url,
617 | task_hint="The link for the first book shown in the results list",
618 | wait_ms=1000,
619 | tracker=tracker,
620 | )
621 | display_result("Click First Book Result", click_res)
622 |
623 |
624 | async def demo_section_3_search(gateway, tracker: CostTracker) -> None:
625 | console.print(Rule("[bold green]Demo 3: Web Search[/]", style="green"))
626 | logger.info("Starting Demo Section 3: Web Search")
627 |
628 | search_query = "latest advancements in large language models"
629 |
630 | # 3a: Search Bing
631 | success, result = await safe_tool_call(
632 | "Search Bing",
633 | search,
634 | query=search_query,
635 | engine="bing",
636 | max_results=5,
637 | tracker=tracker,
638 | )
639 | display_result(f"Search Bing: '{search_query}'", result)
640 |
641 | # 3b: Search DuckDuckGo
642 | success, result = await safe_tool_call(
643 | "Search DuckDuckGo",
644 | search,
645 | query=search_query,
646 | engine="duckduckgo",
647 | max_results=5,
648 | tracker=tracker,
649 | )
650 | display_result(f"Search DuckDuckGo: '{search_query}'", result)
651 |
652 |
653 | async def demo_section_4_download(gateway, tracker: CostTracker) -> None:
654 | console.print(Rule("[bold green]Demo 4: File Download[/]", style="green"))
655 | logger.info("Starting Demo Section 4: File Download")
656 |
657 | # Ensure local demo output dir exists
658 | DEMO_OUTPUTS_DIR_ABS = DEMO_OUTPUTS_DIR.resolve(strict=False) # Resolve to absolute, allow non-existent
659 | DEMO_OUTPUTS_DIR_ABS.mkdir(parents=True, exist_ok=True) # Ensure it exists after resolving
660 |
661 | # Create the parent directory for PDF downloads if it doesn't exist
662 | pdf_parent_dir = "storage/smart_browser_site_pdfs"
663 | console.print(f"[cyan]Creating parent directory for PDFs: {pdf_parent_dir}[/cyan]")
664 | from ultimate_mcp_server.tools.filesystem import create_directory
665 | parent_dir_result = await create_directory(path=pdf_parent_dir)
666 | if not parent_dir_result.get("success", False):
667 | console.print(f"[yellow]Warning: Could not create parent directory: {parent_dir_result.get('error', 'Unknown error')}[/yellow]")
668 | else:
669 | console.print(f"[green]Successfully created parent directory: {pdf_parent_dir}[/green]")
670 |
671 | # 4a: Download PDFs from a site
672 | console.print("--- Scenario: Find and Download PDFs from Example.com ---")
673 | success, result = await safe_tool_call(
674 | "Download PDFs from Example.com",
675 | download_site_pdfs,
676 | start_url=URL_EXAMPLE,
677 | max_depth=1,
678 | max_pdfs=5,
679 | dest_subfolder="example_com_pdfs",
680 | tracker=tracker,
681 | )
682 | display_result("Download PDFs from Example.com", result)
683 | if result.get("pdf_count", 0) == 0:
684 | console.print("[yellow]Note: No PDFs found on example.com as expected.[/]")
685 |
686 | # 4b: Click-based download
687 | download_page_content = f"""
688 | <!DOCTYPE html>
689 | <html><head><title>Download Test</title></head>
690 | <body><h1>Download Page</h1>
691 | <p>Click the link to download a dummy PDF.</p>
692 | <a href="{URL_PDF_SAMPLE}" id="downloadLink">Download Dummy PDF Now</a>
693 | <p>Another paragraph.</p>
694 | </body></html>
695 | """
696 | download_page_path = DEMO_OUTPUTS_DIR_ABS / "download_test.html"
697 | try:
698 | download_page_path.write_text(download_page_content, encoding="utf-8")
699 | local_url = download_page_path.as_uri()
700 |
701 | console.print("\n--- Scenario: Click a link to download a file ---")
702 | success, result = await safe_tool_call(
703 | "Click to Download PDF",
704 | download,
705 | url=local_url,
706 | task_hint="The 'Download Dummy PDF Now' link",
707 | dest_dir="storage/sb_demo_outputs/clicked_downloads", # Adjusted path
708 | tracker=tracker,
709 | )
710 | display_result("Click to Download PDF", result)
711 | except Exception as e:
712 | console.print(f"[red]Error setting up or running click-download demo: {e}[/]")
713 | finally:
714 | if download_page_path.exists():
715 | try:
716 | download_page_path.unlink()
717 | except OSError:
718 | pass
719 |
720 |
721 | async def demo_section_5_macro(gateway, tracker: CostTracker) -> None:
722 | console.print(Rule("[bold green]Demo 5: Execute Macro[/]", style="green"))
723 | logger.info("Starting Demo Section 5: Execute Macro")
724 |
725 | macro_task = f"Go to {URL_BOOKSTORE}, search for 'History', find the book 'Sapiens: A Brief History of Humankind', and click its link."
726 | console.print("--- Scenario: Execute Macro ---")
727 | console.print(f"[italic]Task:[/italic] {escape(macro_task)}")
728 |
729 | success, result = await safe_tool_call(
730 | "Execute Bookstore Search Macro",
731 | run_macro,
732 | url=URL_BOOKSTORE,
733 | task=macro_task,
734 | max_rounds=5,
735 | tracker=tracker,
736 | )
737 | display_result("Execute Bookstore Search Macro", result)
738 |
739 |
740 | async def demo_section_6_autopilot(gateway, tracker: CostTracker) -> None:
741 | console.print(Rule("[bold green]Demo 6: Autopilot[/]", style="green"))
742 | logger.info("Starting Demo Section 6: Autopilot")
743 |
744 | autopilot_task = "Search the web for the official documentation URL of the 'httpx' Python library, then browse that URL and summarize the main page content."
745 | console.print("--- Scenario: Autopilot ---")
746 | console.print(f"[italic]Task:[/italic] {escape(autopilot_task)}")
747 |
748 | success, result = await safe_tool_call(
749 | "Run Autopilot: Find httpx Docs",
750 | autopilot,
751 | task=autopilot_task,
752 | max_steps=8,
753 | scratch_subdir="autopilot_demo",
754 | tracker=tracker,
755 | )
756 | display_result("Run Autopilot: Find httpx Docs", result)
757 | if result.get("run_log"):
758 | console.print(f"[dim]Autopilot run log saved to: {result['run_log']}[/]")
759 |
760 |
761 | async def demo_section_7_parallel(gateway, tracker: CostTracker) -> None:
762 | console.print(Rule("[bold green]Demo 7: Parallel Processing[/]", style="green"))
763 | logger.info("Starting Demo Section 7: Parallel Processing")
764 |
765 | urls_to_process = [
766 | URL_EXAMPLE,
767 | URL_BOOKSTORE,
768 | URL_QUOTES,
769 | "http://httpbin.org/delay/1",
770 | "https://webscraper.io/test-sites/e-commerce/static",
771 | ]
772 | console.print("--- Scenario: Get Page State for Multiple URLs in Parallel ---")
773 | console.print(f"[dim]URLs:[/dim] {urls_to_process}")
774 |
775 | success, result = await safe_tool_call(
776 | "Parallel Get Page State",
777 | parallel,
778 | urls=urls_to_process,
779 | action="get_state", # Only 'get_state' supported currently
780 | # max_tabs=3 # Can override default here if needed
781 | tracker=tracker,
782 | )
783 |
784 | # Custom display for parallel results (same logic as before)
785 | console.print(Rule("[bold cyan]Parallel Processing Results[/]", style="cyan"))
786 | if success:
787 | console.print(f"Total URLs Processed: {result.get('processed_count', 0)}")
788 | console.print(f"Successful: {result.get('successful_count', 0)}")
789 | console.print("-" * 20)
790 | for i, item_result in enumerate(result.get("results", [])):
791 | url = item_result.get("url", f"URL {i + 1}")
792 | item_success = item_result.get("success", False)
793 | panel_title = f"Result for: {escape(url)}"
794 | border = "green" if item_success else "red"
795 | content = ""
796 | if item_success:
797 | state = item_result.get("page_state", {})
798 | content = f"Title: {escape(state.get('title', 'N/A'))}\nElements Found: {len(state.get('elements', []))}"
799 | else:
800 | content = f"[red]Error:[/red] {escape(item_result.get('error', 'Unknown'))}"
801 | console.print(
802 | Panel(content, title=panel_title, border_style=border, padding=(0, 1), expand=False)
803 | )
804 | else:
805 | console.print(
806 | Panel(
807 | f"[red]Parallel processing tool call failed:[/red]\n{escape(result.get('error', '?'))}",
808 | border_style="red",
809 | )
810 | )
811 | console.print()
812 |
813 |
814 | async def demo_section_8_docs(gateway, tracker: CostTracker) -> None:
815 | console.print(Rule("[bold green]Demo 8: Documentation Collection[/]", style="green"))
816 | logger.info("Starting Demo Section 8: Documentation Collection")
817 |
818 | package_name = "fastapi" # Use a different package
819 | console.print(f"--- Scenario: Collect Documentation for '{package_name}' ---")
820 |
821 | success, result = await safe_tool_call(
822 | f"Collect Docs: {package_name}",
823 | collect_documentation,
824 | package=package_name,
825 | max_pages=15,
826 | rate_limit_rps=2.0,
827 | tracker=tracker,
828 | )
829 | # Use the updated display logic that looks for file_path and pages_collected
830 | display_result(f"Collect Docs: {package_name}", result)
831 |
832 |
833 | # --- Main Function ---
834 | async def main() -> int:
835 | """Run the SmartBrowser tools demo."""
836 | console.print(Rule("[bold magenta]Smart Browser Tools Demo[/bold magenta]"))
837 |
838 | exit_code = 0
839 | gateway = None
840 |
841 | # Ensure local demo output directory exists
842 | DEMO_OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)
843 | console.print(f"[dim]Demo-specific outputs will be saved in: {DEMO_OUTPUTS_DIR}[/]")
844 |
845 | try:
846 | # --- Initialize Gateway for providers only ---
847 | console.print("[cyan]Initializing MCP Gateway...[/]")
848 | gateway = Gateway("smart-browser-demo")
849 | console.print("[cyan]Initializing Providers (for LLM tools)...[/]")
850 | await gateway._initialize_providers()
851 |
852 | # --- Initialize Smart Browser module ---
853 | console.print("[cyan]Initializing Smart Browser tool...[/]")
854 | # await initialize()
855 |
856 | # Initialize CostTracker
857 | tracker = CostTracker()
858 |
859 | # Run Demo Sections (passing gateway and tracker)
860 | await demo_section_1_browse(gateway, tracker)
861 | await demo_section_2_interaction(gateway, tracker)
862 | await demo_section_3_search(gateway, tracker)
863 | await demo_section_4_download(gateway, tracker)
864 | await demo_section_5_macro(gateway, tracker)
865 | await demo_section_6_autopilot(gateway, tracker) # Uncomment to run autopilot
866 | # console.print(
867 | # "[yellow]Skipping Autopilot demo section (can be intensive). Uncomment to run.[/]"
868 | # )
869 | await demo_section_7_parallel(gateway, tracker)
870 | await demo_section_8_docs(gateway, tracker)
871 |
872 | console.print(Rule("[bold magenta]Demo Complete[/bold magenta]"))
873 |
874 | except Exception as e:
875 | logger.critical(f"Demo failed with critical error: {e}", exc_info=True)
876 | console.print("[bold red]CRITICAL ERROR DURING DEMO:[/]")
877 | console.print_exception(show_locals=True)
878 | exit_code = 1
879 | finally:
880 | # Shutdown Smart Browser
881 | console.print("[cyan]Shutting down Smart Browser tool...[/]")
882 | try:
883 | await shutdown()
884 | except Exception as e:
885 | logger.error(f"Error during Smart Browser shutdown: {e}")
886 |
887 | return exit_code
888 |
889 |
890 | if __name__ == "__main__":
891 | # Ensure the script is run with asyncio
892 | try:
893 | exit_code = asyncio.run(main())
894 | sys.exit(exit_code)
895 | except KeyboardInterrupt:
896 | console.print("\n[yellow]Demo interrupted by user. Shutting down...[/]")
897 | # Try to run shutdown asynchronously even on keyboard interrupt
898 | try:
899 | asyncio.run(shutdown())
900 | except Exception as e:
901 | print(f"Error during emergency shutdown: {e}")
902 | sys.exit(1)
903 |
```
--------------------------------------------------------------------------------
/examples/grok_integration_demo.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python
2 | """Grok integration demonstration using Ultimate MCP Server."""
3 | import asyncio
4 | import json
5 | import sys
6 | import time
7 | from pathlib import Path
8 |
9 | # Add project root to path for imports when running as script
10 | sys.path.insert(0, str(Path(__file__).parent.parent))
11 |
12 | # Third-party imports
13 | from rich import box
14 | from rich.align import Align
15 | from rich.columns import Columns
16 | from rich.console import Console, Group
17 | from rich.live import Live
18 | from rich.markup import escape
19 | from rich.panel import Panel
20 | from rich.progress import (
21 | BarColumn,
22 | Progress,
23 | TaskProgressColumn,
24 | TextColumn,
25 | TimeElapsedColumn,
26 | )
27 | from rich.rule import Rule
28 | from rich.table import Table
29 | from rich.text import Text
30 | from rich.tree import Tree
31 |
32 | # Project imports
33 | from ultimate_mcp_server.constants import Provider
34 | from ultimate_mcp_server.core.server import Gateway
35 | from ultimate_mcp_server.utils import get_logger
36 | from ultimate_mcp_server.utils.display import CostTracker
37 | from ultimate_mcp_server.utils.logging.console import console
38 |
39 | # Initialize logger
40 | logger = get_logger("example.grok_integration")
41 |
42 | # Create a separate console for detailed debugging output
43 | debug_console = Console(stderr=True, highlight=False)
44 |
45 |
46 | async def compare_grok_models(tracker: CostTracker):
47 | """Compare different Grok models."""
48 | console.print(Rule("[bold cyan]⚡ Grok Model Comparison [/bold cyan]", style="bold blue"))
49 | logger.info("Starting Grok models comparison", emoji_key="start")
50 |
51 | # Create Gateway instance - this handles provider initialization
52 | gateway = Gateway("grok-demo", register_tools=False, provider_exclusions=[Provider.OPENROUTER.value])
53 |
54 | # Initialize providers
55 | logger.info("Initializing providers...", emoji_key="provider")
56 | await gateway._initialize_providers()
57 |
58 | provider_name = Provider.GROK
59 | try:
60 | # Get the provider from the gateway
61 | provider = gateway.providers.get(provider_name)
62 | if not provider:
63 | logger.error(f"Provider {provider_name} not available or initialized", emoji_key="error")
64 | return
65 |
66 | logger.info(f"Using provider: {provider_name}", emoji_key="provider")
67 |
68 | models = await provider.list_models()
69 | model_names = [m["id"] for m in models] # Extract names from model dictionaries
70 |
71 | # Display available models in a tree structure with consistent padding
72 | model_tree = Tree("[bold cyan]Available Grok Models[/bold cyan]")
73 | for model in model_names:
74 | # Only display grok-3 models
75 | if not model.startswith("grok-3"):
76 | continue
77 |
78 | if "fast" in model:
79 | model_tree.add(f"[bold yellow]{model}[/bold yellow] [dim](optimized for speed)[/dim]")
80 | elif "mini" in model:
81 | model_tree.add(f"[bold green]{model}[/bold green] [dim](optimized for reasoning)[/dim]")
82 | else:
83 | model_tree.add(f"[bold magenta]{model}[/bold magenta] [dim](general purpose)[/dim]")
84 |
85 | # Add padding around the tree
86 | console.print(Panel(model_tree, border_style="dim cyan", padding=(1, 2)))
87 |
88 | # Select specific models to compare
89 | grok_models = [
90 | "grok-3-latest",
91 | "grok-3-mini-latest"
92 | ]
93 |
94 | # Filter based on available models
95 | models_to_compare = [m for m in grok_models if m in model_names]
96 | if not models_to_compare:
97 | # Only use grok-3 models
98 | models_to_compare = [m for m in model_names if m.startswith("grok-3")][:2]
99 |
100 | if not models_to_compare:
101 | logger.warning("No grok-3 models available for comparison.", emoji_key="warning")
102 | return
103 |
104 | # Consistent panel styling
105 | console.print(Panel(
106 | f"Comparing models: [yellow]{escape(', '.join(models_to_compare))}[/yellow]",
107 | title="[bold]Comparison Setup[/bold]",
108 | border_style="blue", # Use blue for info
109 | padding=(1, 2)
110 | ))
111 |
112 | prompt = """
113 | Explain the concept of quantum entanglement in a way that a high school student would understand.
114 | Keep your response brief and accessible.
115 | """
116 |
117 | # Consistent panel styling for prompt
118 | console.print(Panel(
119 | escape(prompt.strip()),
120 | title="[bold]Test Prompt[/bold]",
121 | border_style="yellow", # Yellow for prompts
122 | expand=False,
123 | padding=(1, 2)
124 | ))
125 |
126 | results_data = []
127 |
128 | # Create progress display with TaskProgressColumn
129 | with Progress(
130 | TextColumn("[bold blue]{task.description}"),
131 | BarColumn(complete_style="green", finished_style="green"),
132 | TaskProgressColumn(),
133 | TextColumn("[green]{task.completed} of {task.total}"),
134 | TimeElapsedColumn(),
135 | console=console,
136 | expand=True
137 | ) as progress:
138 | task_id = progress.add_task("[cyan]Testing models...", total=len(models_to_compare))
139 |
140 | for model_name in models_to_compare:
141 | progress.update(task_id, description=f"[cyan]Testing model: [bold]{model_name}[/bold]")
142 |
143 | try:
144 | logger.info(f"Testing model: {model_name}", emoji_key="model")
145 | start_time = time.time()
146 | result = await provider.generate_completion(
147 | prompt=prompt,
148 | model=model_name,
149 | temperature=0.3,
150 | max_tokens=300
151 | )
152 | processing_time = time.time() - start_time
153 |
154 | # Track the cost
155 | tracker.add_call(result)
156 |
157 | # Log detailed timing info to debug console
158 | debug_console.print(f"[dim]Model {model_name} processing details:[/dim]")
159 | debug_console.print(f"[dim]Time: {processing_time:.2f}s | Tokens: {result.total_tokens}[/dim]")
160 |
161 | # Check if model is a mini model with reasoning output
162 | reasoning_content = None
163 | reasoning_tokens = None
164 | if "mini" in model_name and result.metadata:
165 | reasoning_content = result.metadata.get("reasoning_content")
166 | reasoning_tokens = result.metadata.get("reasoning_tokens")
167 |
168 | results_data.append({
169 | "model": model_name,
170 | "text": result.text,
171 | "tokens": {
172 | "input": result.input_tokens,
173 | "output": result.output_tokens,
174 | "total": result.total_tokens
175 | },
176 | "reasoning_content": reasoning_content,
177 | "reasoning_tokens": reasoning_tokens,
178 | "cost": result.cost,
179 | "time": processing_time
180 | })
181 |
182 | logger.success(
183 | f"Completion for {model_name} successful",
184 | emoji_key="success",
185 | )
186 |
187 | except Exception as e:
188 | logger.error(f"Error testing model {model_name}: {str(e)}", emoji_key="error", exc_info=True)
189 | debug_console.print_exception()
190 | results_data.append({
191 | "model": model_name,
192 | "error": str(e)
193 | })
194 |
195 | progress.advance(task_id)
196 |
197 | # Display comparison results using Rich
198 | if results_data:
199 | # Bolder rule style
200 | console.print(Rule("[bold green]⚡ Comparison Results [/bold green]", style="bold green"))
201 |
202 | # Store panels for potential column layout
203 | comparison_panels = []
204 |
205 | for result_item in results_data:
206 | model = result_item["model"]
207 |
208 | if "error" in result_item:
209 | # Handle error case with consistent styling
210 | error_panel = Panel(
211 | f"[red]{escape(result_item['error'])}[/red]",
212 | title=f"[bold red]{escape(model)} - ERROR[/bold red]",
213 | border_style="red", # Red for errors
214 | expand=False,
215 | padding=(1, 2)
216 | )
217 | comparison_panels.append(error_panel)
218 | continue
219 |
220 | time_s = result_item["time"]
221 | tokens = result_item.get("tokens", {})
222 | input_tokens = tokens.get("input", 0)
223 | output_tokens = tokens.get("output", 0)
224 | total_tokens = tokens.get("total", 0)
225 |
226 | tokens_per_second = total_tokens / time_s if time_s > 0 else 0
227 | cost = result_item.get("cost", 0.0)
228 | text = result_item.get("text", "[red]Error generating response[/red]").strip()
229 |
230 | # Determine border color based on model type (consistent scheme)
231 | border_style = "magenta" # Magenta for general models
232 | if "mini" in model:
233 | border_style = "green" # Green for reasoning
234 | elif "fast" in model:
235 | border_style = "yellow" # Yellow for speed
236 |
237 | # Create the panel for this model's output
238 | model_panel = Panel(
239 | escape(text),
240 | title=f"[bold {border_style}]{escape(model)}[/bold {border_style}]", # Use border color in title
241 | subtitle="[dim]Response Text[/dim]",
242 | border_style=border_style,
243 | expand=True,
244 | # height=len(text.splitlines()) + 4, # Adjust height dynamically based on padding
245 | padding=(1, 2) # Standard padding
246 | )
247 |
248 | # Create beautiful stats table with a slightly different box
249 | stats_table = Table(box=box.MINIMAL, show_header=False, expand=True, padding=0)
250 | stats_table.add_column("Metric", style="dim cyan", width=15)
251 | stats_table.add_column("Value", style="white")
252 | stats_table.add_row("Input Tokens", f"[yellow]{input_tokens}[/yellow]")
253 | stats_table.add_row("Output Tokens", f"[green]{output_tokens}[/green]")
254 | stats_table.add_row("Total Tokens", f"[bold cyan]{total_tokens}[/bold cyan]")
255 | stats_table.add_row("Time", f"[yellow]{time_s:.2f}s[/yellow]")
256 | stats_table.add_row("Speed", f"[blue]{tokens_per_second:.1f} tok/s[/blue]")
257 | stats_table.add_row("Cost", f"[green]${cost:.6f}[/green]")
258 |
259 | # Combine as a single compact panel
260 | combined_panel = Panel(
261 | Group(
262 | model_panel,
263 | Align.center(stats_table)
264 | ),
265 | border_style=border_style,
266 | padding=(1, 1), # Reduced padding for combined view
267 | title=f"[bold]Response from {escape(model)}[/bold]"
268 | )
269 |
270 | # If there's reasoning content, show it directly
271 | reasoning_content = result_item.get("reasoning_content")
272 | reasoning_tokens = result_item.get("reasoning_tokens")
273 |
274 | if reasoning_content:
275 | reasoning_panel = Panel(
276 | escape(reasoning_content),
277 | title="[bold cyan]Reasoning Process[/bold cyan]",
278 | subtitle=f"[dim]Reasoning Tokens: {reasoning_tokens}[/dim]",
279 | border_style="cyan", # Cyan for reasoning/tools
280 | expand=True,
281 | # height=len(reasoning_content.splitlines()) + 4, # Adjust height
282 | padding=(1, 2) # Standard padding
283 | )
284 | # Group main result and reasoning
285 | comparison_panels.append(Group(combined_panel, reasoning_panel))
286 | else:
287 | comparison_panels.append(combined_panel)
288 |
289 | # Use Columns layout if exactly two results (and no errors resulted in fewer panels)
290 | if len(comparison_panels) == 2 and len(comparison_panels) == len(results_data):
291 | console.print(Columns(comparison_panels, equal=True, expand=True))
292 | else:
293 | # Otherwise, print panels sequentially
294 | for panel in comparison_panels:
295 | console.print(panel)
296 |
297 | except Exception as e:
298 | logger.error(f"Error in model comparison: {str(e)}", emoji_key="error", exc_info=True)
299 |
300 |
301 | async def demonstrate_reasoning(tracker: CostTracker):
302 | """Demonstrate Grok-mini reasoning capabilities."""
303 | console.print(Rule("[bold cyan]⚡ Grok Reasoning Demonstration [/bold cyan]", style="bold blue"))
304 | logger.info("Demonstrating Grok-mini reasoning capabilities", emoji_key="start")
305 |
306 | # Create Gateway instance - this handles provider initialization
307 | gateway = Gateway("grok-demo", register_tools=False, provider_exclusions=[Provider.OPENROUTER.value])
308 |
309 | # Initialize providers
310 | logger.info("Initializing providers...", emoji_key="provider")
311 | await gateway._initialize_providers()
312 |
313 | provider_name = Provider.GROK
314 | try:
315 | # Get the provider from the gateway
316 | provider = gateway.providers.get(provider_name)
317 | if not provider:
318 | logger.error(f"Provider {provider_name} not available or initialized", emoji_key="error")
319 | return
320 |
321 | # Use a Grok mini model (ensure it's available)
322 | model = "grok-3-mini-latest"
323 | available_models = await provider.list_models()
324 | model_names = [m["id"] for m in available_models]
325 |
326 | if model not in model_names:
327 | # Find any mini model
328 | for m in model_names:
329 | if "mini" in m:
330 | model = m
331 | break
332 | else:
333 | logger.warning("No mini model available for reasoning demo. Using default model.", emoji_key="warning")
334 | model = provider.get_default_model()
335 |
336 | logger.info(f"Using model: {model}", emoji_key="model")
337 |
338 | # Problem requiring reasoning
339 | problem = """
340 | A cylindrical water tank has a radius of 3 meters and a height of 4 meters.
341 | If water flows in at a rate of 2 cubic meters per minute, how long will it take to fill the tank?
342 | Show your work step by step.
343 | """
344 |
345 | # Consistent panel styling for prompt
346 | console.print(Panel(
347 | escape(problem.strip()),
348 | title="[bold yellow]Math Problem[/bold yellow]",
349 | border_style="yellow", # Yellow for prompts
350 | expand=False,
351 | padding=(1, 2) # Standard padding
352 | ))
353 |
354 | with Progress(
355 | TextColumn("[bold blue]Status:"),
356 | BarColumn(complete_style="green", finished_style="green"),
357 | TaskProgressColumn(),
358 | TextColumn("[cyan]{task.description}"),
359 | TimeElapsedColumn(),
360 | console=console,
361 | expand=True
362 | ) as progress:
363 | task = progress.add_task("[cyan]Thinking...", total=1)
364 |
365 | logger.info("Generating solution with reasoning", emoji_key="processing")
366 |
367 | result = await provider.generate_completion(
368 | prompt=problem,
369 | model=model,
370 | temperature=0.3,
371 | reasoning_effort="high", # Use high reasoning effort
372 | max_tokens=1000
373 | )
374 |
375 | # Track the cost
376 | tracker.add_call(result)
377 |
378 | progress.update(task, description="Complete!", completed=1)
379 |
380 | logger.success("Reasoning solution completed", emoji_key="success")
381 |
382 | # Extract reasoning content
383 | reasoning_content = None
384 | reasoning_tokens = None
385 | if result.metadata:
386 | reasoning_content = result.metadata.get("reasoning_content")
387 | reasoning_tokens = result.metadata.get("reasoning_tokens")
388 |
389 | # Create a more compact layout for reasoning demo
390 | if reasoning_content:
391 | reasoning_panel = Panel(
392 | escape(reasoning_content),
393 | title="[bold cyan]Thinking Process[/bold cyan]",
394 | subtitle=f"[dim]Reasoning Tokens: {reasoning_tokens}[/dim]",
395 | border_style="cyan", # Cyan for reasoning/tools
396 | expand=True,
397 | # height=len(reasoning_content.splitlines()) + 4, # Adjust height
398 | padding=(1, 2) # Standard padding
399 | )
400 | else:
401 | reasoning_panel = Panel(
402 | "[italic]No explicit reasoning process available[/italic]",
403 | title="[bold cyan]Thinking Process[/bold cyan]",
404 | border_style="cyan", # Cyan for reasoning/tools
405 | expand=True,
406 | padding=(1, 2) # Standard padding
407 | )
408 |
409 | # Format the answer
410 | answer_panel = Panel(
411 | escape(result.text.strip()),
412 | title="[bold green]Final Solution[/bold green]",
413 | subtitle=f"[dim]Tokens: {result.input_tokens} in, {result.output_tokens} out | Cost: ${result.cost:.6f} | Time: {result.processing_time:.2f}s[/dim]",
414 | border_style="green", # Green for success/final result
415 | expand=True,
416 | # height=len(result.text.strip().splitlines()) + 4, # Adjust height
417 | padding=(1, 2) # Standard padding
418 | )
419 |
420 | # Use Group for better vertical spacing control than grid
421 | console.print(Group(reasoning_panel, answer_panel))
422 |
423 | except Exception as e:
424 | logger.error(f"Error in reasoning demonstration: {str(e)}", emoji_key="error", exc_info=True)
425 |
426 |
427 | async def demonstrate_function_calling(tracker: CostTracker):
428 | """Demonstrate Grok function calling capabilities."""
429 | console.print(Rule("[bold cyan]⚡ Grok Function Calling Demonstration [/bold cyan]", style="bold blue"))
430 | logger.info("Demonstrating Grok function calling capabilities", emoji_key="start")
431 |
432 | # Create Gateway instance - this handles provider initialization
433 | gateway = Gateway("grok-demo", register_tools=False, provider_exclusions=[Provider.OPENROUTER.value])
434 |
435 | # Initialize providers
436 | logger.info("Initializing providers...", emoji_key="provider")
437 | await gateway._initialize_providers()
438 |
439 | provider_name = Provider.GROK
440 | try:
441 | # Get the provider from the gateway
442 | provider = gateway.providers.get(provider_name)
443 | if not provider:
444 | logger.error(f"Provider {provider_name} not available or initialized", emoji_key="error")
445 | return
446 |
447 | # Use default Grok model
448 | model = provider.get_default_model()
449 | logger.info(f"Using model: {model}", emoji_key="model")
450 |
451 | # Define tools for the model to use
452 | tools = [
453 | {
454 | "type": "function",
455 | "function": {
456 | "name": "get_weather",
457 | "description": "Get the current weather in a given location",
458 | "parameters": {
459 | "type": "object",
460 | "properties": {
461 | "location": {
462 | "type": "string",
463 | "description": "The city and state, e.g. San Francisco, CA"
464 | },
465 | "unit": {
466 | "type": "string",
467 | "enum": ["celsius", "fahrenheit"],
468 | "description": "The unit of temperature to use"
469 | }
470 | },
471 | "required": ["location"]
472 | }
473 | }
474 | },
475 | {
476 | "type": "function",
477 | "function": {
478 | "name": "get_flight_info",
479 | "description": "Get flight information between two cities",
480 | "parameters": {
481 | "type": "object",
482 | "properties": {
483 | "departure_city": {
484 | "type": "string",
485 | "description": "The departure city"
486 | },
487 | "arrival_city": {
488 | "type": "string",
489 | "description": "The arrival city"
490 | },
491 | "date": {
492 | "type": "string",
493 | "description": "The date of travel in YYYY-MM-DD format"
494 | }
495 | },
496 | "required": ["departure_city", "arrival_city"]
497 | }
498 | }
499 | }
500 | ]
501 |
502 | # Display tools in a Panel for consistency
503 | tools_table = Table(title="[bold cyan]Available Tools[/bold cyan]", box=box.MINIMAL, show_header=True, header_style="bold magenta")
504 | tools_table.add_column("Tool Name", style="cyan", no_wrap=True)
505 | tools_table.add_column("Description", style="white")
506 | tools_table.add_column("Parameters", style="green")
507 |
508 | for tool in tools:
509 | function = tool["function"]
510 | name = function["name"]
511 | description = function["description"]
512 | params = ", ".join([p for p in function["parameters"]["properties"]])
513 | tools_table.add_row(name, description, params)
514 |
515 | console.print(Panel(tools_table, border_style="cyan", padding=(1, 2))) # Cyan for tools
516 |
517 | # User query
518 | user_query = "I'm planning a trip from New York to Los Angeles next week. What's the weather like in LA, and can you help me find flight information?"
519 |
520 | # Consistent panel styling for prompt
521 | console.print(Panel(
522 | escape(user_query),
523 | title="[bold yellow]User Query[/bold yellow]",
524 | border_style="yellow", # Yellow for prompts
525 | expand=False,
526 | padding=(1, 2) # Standard padding
527 | ))
528 |
529 | with Progress(
530 | TextColumn("[bold blue]Status:"),
531 | BarColumn(complete_style="green", finished_style="green"),
532 | TaskProgressColumn(),
533 | TextColumn("[cyan]{task.description}"),
534 | TimeElapsedColumn(),
535 | console=console,
536 | expand=True
537 | ) as progress:
538 | task = progress.add_task("[cyan]Processing...", total=1)
539 |
540 | logger.info("Generating completion with function calling", emoji_key="processing")
541 |
542 | result = await provider.generate_completion(
543 | prompt=user_query,
544 | model=model,
545 | temperature=0.7,
546 | tools=tools,
547 | tool_choice="auto"
548 | )
549 |
550 | # Track the cost
551 | tracker.add_call(result)
552 |
553 | progress.update(task, description="Complete!", completed=1)
554 |
555 | logger.success("Function calling completed", emoji_key="success")
556 |
557 | # Check if there are tool calls in the response
558 | tool_calls = None
559 | if hasattr(result.raw_response.choices[0].message, 'tool_calls') and \
560 | result.raw_response.choices[0].message.tool_calls:
561 | tool_calls = result.raw_response.choices[0].message.tool_calls
562 |
563 | if tool_calls:
564 | # Format the model response
565 | response_text = escape(result.text.strip()) if result.text else "[italic dim]No direct text response, only tool calls.[/italic dim]"
566 | response_info = f"[dim]Input Tokens: {result.input_tokens} | Output Tokens: {result.output_tokens} | Cost: ${result.cost:.6f}[/dim]"
567 |
568 | model_response_panel = Panel(
569 | response_text,
570 | title="[bold green]Model Response[/bold green]",
571 | subtitle=response_info,
572 | padding=(1, 2), # Standard padding
573 | border_style="green" # Green for success/results
574 | )
575 |
576 | # Prepare panels for tool calls
577 | tool_panels = []
578 | for tool_call in tool_calls:
579 | # Parse JSON arguments
580 | try:
581 | args = json.loads(tool_call.function.arguments)
582 | args_formatted = f"[json]{escape(json.dumps(args, indent=2))}[/json]"
583 | except Exception:
584 | args_formatted = escape(tool_call.function.arguments)
585 |
586 | # Create compact tool call display content
587 | call_content_lines = [
588 | f"[bold cyan]Function:[/bold cyan] [magenta]{tool_call.function.name}[/magenta]",
589 | f"[bold cyan]Arguments:[/bold cyan]\n{args_formatted}"
590 | ]
591 |
592 | # Add mock function result if available
593 | result_data = None
594 | if tool_call.function.name == "get_weather":
595 | location = args.get("location", "Unknown")
596 | unit = args.get("unit", "fahrenheit")
597 | temp = 75 if unit == "fahrenheit" else 24
598 | result_data = {
599 | "location": location,
600 | "temperature": temp,
601 | "unit": unit,
602 | "condition": "Sunny",
603 | "humidity": 65
604 | }
605 | elif tool_call.function.name == "get_flight_info":
606 | departure = args.get("departure_city", "Unknown")
607 | arrival = args.get("arrival_city", "Unknown")
608 | date = args.get("date", "2025-04-20") # noqa: F841
609 | result_data = {
610 | "flights": [
611 | {
612 | "airline": "Delta", "flight": "DL1234",
613 | "departure": f"{departure} 08:30 AM", "arrival": f"{arrival} 11:45 AM",
614 | "price": "$349.99"
615 | },
616 | {
617 | "airline": "United", "flight": "UA567",
618 | "departure": f"{departure} 10:15 AM", "arrival": f"{arrival} 1:30 PM",
619 | "price": "$289.99"
620 | }
621 | ]
622 | }
623 |
624 | if result_data:
625 | result_formatted = f"[json]{escape(json.dumps(result_data, indent=2))}[/json]"
626 | call_content_lines.append(f"\n[bold blue]Mock Result:[/bold blue]\n{result_formatted}")
627 |
628 | # Join content lines for the panel
629 | call_content = "\n".join(call_content_lines)
630 |
631 | tool_panel = Panel(
632 | call_content,
633 | title=f"[bold magenta]Tool Call: {tool_call.function.name}[/bold magenta]",
634 | subtitle=f"[dim]ID: {tool_call.id}[/dim]",
635 | border_style="magenta", # Magenta for specific tool calls
636 | padding=(1, 2) # Standard padding
637 | )
638 | tool_panels.append(tool_panel)
639 |
640 | # Use Columns for horizontal layout if multiple tool calls
641 | if len(tool_panels) > 1:
642 | tool_call_display = Columns(tool_panels, equal=True, expand=True)
643 | elif tool_panels:
644 | tool_call_display = tool_panels[0]
645 | else: # Should not happen if tool_calls is true, but handle defensively
646 | tool_call_display = Text("No tool calls processed.", style="dim")
647 |
648 | # Create combined panel with response and tool calls
649 | combined_panel = Panel(
650 | Group(
651 | model_response_panel,
652 | tool_call_display
653 | ),
654 | title="[bold green]Function Calling Results[/bold green]",
655 | border_style="green", # Green for overall success
656 | padding=(1, 1) # Slightly reduced outer padding
657 | )
658 |
659 | console.print(combined_panel)
660 | else:
661 | # No tool calls, just display the response with consistent styling
662 | console.print(Panel(
663 | escape(result.text.strip()),
664 | title="[bold green]Model Response (No Tool Calls)[/bold green]",
665 | subtitle=f"[dim]Tokens: {result.input_tokens} in, {result.output_tokens} out | Cost: ${result.cost:.6f}[/dim]",
666 | border_style="green", # Green for success/result
667 | padding=(1, 2) # Standard padding
668 | ))
669 |
670 | console.print() # Keep spacing
671 |
672 | except Exception as e:
673 | logger.error(f"Error in function calling demonstration: {str(e)}", emoji_key="error", exc_info=True)
674 |
675 |
676 | async def streaming_example(tracker: CostTracker):
677 | """Demonstrate Grok streaming capabilities."""
678 | console.print(Rule("[bold cyan]⚡ Grok Streaming Demonstration [/bold cyan]", style="bold blue"))
679 | logger.info("Demonstrating Grok streaming capabilities", emoji_key="start")
680 |
681 | # Create Gateway instance - this handles provider initialization
682 | gateway = Gateway("grok-demo", register_tools=False, provider_exclusions=[Provider.OPENROUTER.value])
683 |
684 | # Initialize providers
685 | logger.info("Initializing providers...", emoji_key="provider")
686 | await gateway._initialize_providers()
687 |
688 | provider_name = Provider.GROK
689 | try:
690 | # Get the provider from the gateway
691 | provider = gateway.providers.get(provider_name)
692 | if not provider:
693 | logger.error(f"Provider {provider_name} not available or initialized", emoji_key="error")
694 | return
695 |
696 | # Use default Grok model
697 | model = provider.get_default_model()
698 | logger.info(f"Using model: {model}", emoji_key="model")
699 |
700 | # Create prompt for streaming
701 | prompt = "Write a short story about an AI that discovers emotions for the first time."
702 |
703 | # Consistent panel styling for prompt
704 | console.print(Panel(
705 | escape(prompt),
706 | title="[bold yellow]Streaming Prompt[/bold yellow]",
707 | border_style="yellow", # Yellow for prompts
708 | expand=False,
709 | padding=(1, 2) # Standard padding
710 | ))
711 |
712 | # Create streaming panel with consistent styling
713 | stream_panel = Panel(
714 | "",
715 | title=f"[bold green]Streaming Output from {model}[/bold green]",
716 | subtitle="[dim]Live output...[/dim]",
717 | border_style="green", # Green for results
718 | expand=True,
719 | height=15, # Slightly increased height for better visibility
720 | padding=(1, 2) # Standard padding
721 | )
722 |
723 | # Setup for streaming
724 | logger.info("Starting stream", emoji_key="processing")
725 | stream = provider.generate_completion_stream(
726 | prompt=prompt,
727 | model=model,
728 | temperature=0.7,
729 | max_tokens=500
730 | )
731 |
732 | full_text = ""
733 | chunk_count = 0
734 | start_time = time.time()
735 |
736 | # Display streaming content with Rich Live display
737 | with Live(stream_panel, console=console, refresh_per_second=10, vertical_overflow="visible") as live:
738 | async for content, _metadata in stream:
739 | chunk_count += 1
740 | full_text += content
741 |
742 | # Update the live display
743 | # Ensure renderable is Text for better control if needed, though escape works
744 | stream_panel.renderable = Text(escape(full_text))
745 | stream_panel.subtitle = f"[dim]Received {chunk_count} chunks...[/dim]"
746 | live.update(stream_panel) # No need to pass stream_panel again
747 |
748 | # Final update to show completion
749 | stream_panel.subtitle = f"[bold green]Stream Complete ({chunk_count} chunks)[/bold green]"
750 | # Update the panel content one last time outside the live context
751 | console.print(stream_panel)
752 |
753 | # Calculate stats
754 | processing_time = time.time() - start_time
755 | # More accurate token estimation might involve encoding, but keep simple for demo
756 | estimated_tokens = len(full_text.split()) * 1.3 # Rough estimate
757 | tokens_per_second = estimated_tokens / processing_time if processing_time > 0 else 0
758 |
759 | # Display final stats in a Panel with a Table
760 | stats_table = Table(title="[bold blue]Streaming Stats[/bold blue]", box=box.MINIMAL, padding=(0,1), show_header=False)
761 | stats_table.add_column("Metric", style="dim cyan")
762 | stats_table.add_column("Value", style="white")
763 | stats_table.add_row("Total Time", f"[yellow]{processing_time:.2f}s[/yellow]")
764 | stats_table.add_row("Chunks Received", f"[green]{chunk_count}[/green]")
765 | stats_table.add_row("Est. Output Tokens", f"[cyan]~{int(estimated_tokens)}[/cyan]")
766 | stats_table.add_row("Est. Speed", f"[blue]{tokens_per_second:.1f} tok/s[/blue]")
767 |
768 | console.print(Panel(stats_table, border_style="blue", padding=(1, 2))) # Blue for info/stats
769 | logger.success("Streaming completed", emoji_key="success")
770 |
771 | except Exception as e:
772 | logger.error(f"Error in streaming demonstration: {str(e)}", emoji_key="error", exc_info=True)
773 |
774 |
775 | async def main():
776 | """Run Grok integration examples."""
777 | tracker = CostTracker()
778 | try:
779 | # Create title with padding
780 | title = Text("⚡ Grok Integration Showcase ⚡", style="bold white on blue")
781 | title.justify = "center"
782 | # Add padding to the main title panel
783 | console.print(Panel(title, box=box.DOUBLE_EDGE, padding=(1, 0))) # Vertical padding
784 |
785 | debug_console.print("[dim]Starting Grok integration demo in debug mode[/dim]")
786 |
787 | # Run model comparison
788 | await compare_grok_models(tracker)
789 |
790 | console.print() # Add space between sections
791 |
792 | # Run reasoning demonstration
793 | await demonstrate_reasoning(tracker)
794 |
795 | console.print() # Add space between sections
796 |
797 | # Run function calling demonstration
798 | await demonstrate_function_calling(tracker)
799 |
800 | console.print() # Add space between sections
801 |
802 | # Run streaming example
803 | await streaming_example(tracker)
804 |
805 | # Display final summary
806 | tracker.display_summary(console)
807 |
808 | except Exception as e:
809 | logger.critical(f"Example failed: {str(e)}", emoji_key="critical", exc_info=True)
810 | debug_console.print_exception(show_locals=True)
811 | return 1
812 |
813 | logger.success("Grok Integration Demo Finished Successfully!", emoji_key="complete")
814 | return 0
815 |
816 |
817 | if __name__ == "__main__":
818 | exit_code = asyncio.run(main())
819 | sys.exit(exit_code)
```