dicklesworthstone/llm_gateway_mcp

This is page 12 of 45. Use http://codebase.md/dicklesworthstone/llm_gateway_mcp_server?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .cursorignore
├── .env.example
├── .envrc
├── .gitignore
├── additional_features.md
├── check_api_keys.py
├── completion_support.py
├── comprehensive_test.py
├── docker-compose.yml
├── Dockerfile
├── empirically_measured_model_speeds.json
├── error_handling.py
├── example_structured_tool.py
├── examples
│   ├── __init__.py
│   ├── advanced_agent_flows_using_unified_memory_system_demo.py
│   ├── advanced_extraction_demo.py
│   ├── advanced_unified_memory_system_demo.py
│   ├── advanced_vector_search_demo.py
│   ├── analytics_reporting_demo.py
│   ├── audio_transcription_demo.py
│   ├── basic_completion_demo.py
│   ├── cache_demo.py
│   ├── claude_integration_demo.py
│   ├── compare_synthesize_demo.py
│   ├── cost_optimization.py
│   ├── data
│   │   ├── sample_event.txt
│   │   ├── Steve_Jobs_Introducing_The_iPhone_compressed.md
│   │   └── Steve_Jobs_Introducing_The_iPhone_compressed.mp3
│   ├── docstring_refiner_demo.py
│   ├── document_conversion_and_processing_demo.py
│   ├── entity_relation_graph_demo.py
│   ├── filesystem_operations_demo.py
│   ├── grok_integration_demo.py
│   ├── local_text_tools_demo.py
│   ├── marqo_fused_search_demo.py
│   ├── measure_model_speeds.py
│   ├── meta_api_demo.py
│   ├── multi_provider_demo.py
│   ├── ollama_integration_demo.py
│   ├── prompt_templates_demo.py
│   ├── python_sandbox_demo.py
│   ├── rag_example.py
│   ├── research_workflow_demo.py
│   ├── sample
│   │   ├── article.txt
│   │   ├── backprop_paper.pdf
│   │   ├── buffett.pdf
│   │   ├── contract_link.txt
│   │   ├── legal_contract.txt
│   │   ├── medical_case.txt
│   │   ├── northwind.db
│   │   ├── research_paper.txt
│   │   ├── sample_data.json
│   │   └── text_classification_samples
│   │       ├── email_classification.txt
│   │       ├── news_samples.txt
│   │       ├── product_reviews.txt
│   │       └── support_tickets.txt
│   ├── sample_docs
│   │   └── downloaded
│   │       └── attention_is_all_you_need.pdf
│   ├── sentiment_analysis_demo.py
│   ├── simple_completion_demo.py
│   ├── single_shot_synthesis_demo.py
│   ├── smart_browser_demo.py
│   ├── sql_database_demo.py
│   ├── sse_client_demo.py
│   ├── test_code_extraction.py
│   ├── test_content_detection.py
│   ├── test_ollama.py
│   ├── text_classification_demo.py
│   ├── text_redline_demo.py
│   ├── tool_composition_examples.py
│   ├── tournament_code_demo.py
│   ├── tournament_text_demo.py
│   ├── unified_memory_system_demo.py
│   ├── vector_search_demo.py
│   ├── web_automation_instruction_packs.py
│   └── workflow_delegation_demo.py
├── LICENSE
├── list_models.py
├── marqo_index_config.json.example
├── mcp_protocol_schema_2025-03-25_version.json
├── mcp_python_lib_docs.md
├── mcp_tool_context_estimator.py
├── model_preferences.py
├── pyproject.toml
├── quick_test.py
├── README.md
├── resource_annotations.py
├── run_all_demo_scripts_and_check_for_errors.py
├── storage
│   └── smart_browser_internal
│       ├── locator_cache.db
│       ├── readability.js
│       └── storage_state.enc
├── test_client.py
├── test_connection.py
├── TEST_README.md
├── test_sse_client.py
├── test_stdio_client.py
├── tests
│   ├── __init__.py
│   ├── conftest.py
│   ├── integration
│   │   ├── __init__.py
│   │   └── test_server.py
│   ├── manual
│   │   ├── test_extraction_advanced.py
│   │   └── test_extraction.py
│   └── unit
│       ├── __init__.py
│       ├── test_cache.py
│       ├── test_providers.py
│       └── test_tools.py
├── TODO.md
├── tool_annotations.py
├── tools_list.json
├── ultimate_mcp_banner.webp
├── ultimate_mcp_logo.webp
├── ultimate_mcp_server
│   ├── __init__.py
│   ├── __main__.py
│   ├── cli
│   │   ├── __init__.py
│   │   ├── __main__.py
│   │   ├── commands.py
│   │   ├── helpers.py
│   │   └── typer_cli.py
│   ├── clients
│   │   ├── __init__.py
│   │   ├── completion_client.py
│   │   └── rag_client.py
│   ├── config
│   │   └── examples
│   │       └── filesystem_config.yaml
│   ├── config.py
│   ├── constants.py
│   ├── core
│   │   ├── __init__.py
│   │   ├── evaluation
│   │   │   ├── base.py
│   │   │   └── evaluators.py
│   │   ├── providers
│   │   │   ├── __init__.py
│   │   │   ├── anthropic.py
│   │   │   ├── base.py
│   │   │   ├── deepseek.py
│   │   │   ├── gemini.py
│   │   │   ├── grok.py
│   │   │   ├── ollama.py
│   │   │   ├── openai.py
│   │   │   └── openrouter.py
│   │   ├── server.py
│   │   ├── state_store.py
│   │   ├── tournaments
│   │   │   ├── manager.py
│   │   │   ├── tasks.py
│   │   │   └── utils.py
│   │   └── ums_api
│   │       ├── __init__.py
│   │       ├── ums_database.py
│   │       ├── ums_endpoints.py
│   │       ├── ums_models.py
│   │       └── ums_services.py
│   ├── exceptions.py
│   ├── graceful_shutdown.py
│   ├── services
│   │   ├── __init__.py
│   │   ├── analytics
│   │   │   ├── __init__.py
│   │   │   ├── metrics.py
│   │   │   └── reporting.py
│   │   ├── cache
│   │   │   ├── __init__.py
│   │   │   ├── cache_service.py
│   │   │   ├── persistence.py
│   │   │   ├── strategies.py
│   │   │   └── utils.py
│   │   ├── cache.py
│   │   ├── document.py
│   │   ├── knowledge_base
│   │   │   ├── __init__.py
│   │   │   ├── feedback.py
│   │   │   ├── manager.py
│   │   │   ├── rag_engine.py
│   │   │   ├── retriever.py
│   │   │   └── utils.py
│   │   ├── prompts
│   │   │   ├── __init__.py
│   │   │   ├── repository.py
│   │   │   └── templates.py
│   │   ├── prompts.py
│   │   └── vector
│   │       ├── __init__.py
│   │       ├── embeddings.py
│   │       └── vector_service.py
│   ├── tool_token_counter.py
│   ├── tools
│   │   ├── __init__.py
│   │   ├── audio_transcription.py
│   │   ├── base.py
│   │   ├── completion.py
│   │   ├── docstring_refiner.py
│   │   ├── document_conversion_and_processing.py
│   │   ├── enhanced-ums-lookbook.html
│   │   ├── entity_relation_graph.py
│   │   ├── excel_spreadsheet_automation.py
│   │   ├── extraction.py
│   │   ├── filesystem.py
│   │   ├── html_to_markdown.py
│   │   ├── local_text_tools.py
│   │   ├── marqo_fused_search.py
│   │   ├── meta_api_tool.py
│   │   ├── ocr_tools.py
│   │   ├── optimization.py
│   │   ├── provider.py
│   │   ├── pyodide_boot_template.html
│   │   ├── python_sandbox.py
│   │   ├── rag.py
│   │   ├── redline-compiled.css
│   │   ├── sentiment_analysis.py
│   │   ├── single_shot_synthesis.py
│   │   ├── smart_browser.py
│   │   ├── sql_databases.py
│   │   ├── text_classification.py
│   │   ├── text_redline_tools.py
│   │   ├── tournament.py
│   │   ├── ums_explorer.html
│   │   └── unified_memory_system.py
│   ├── utils
│   │   ├── __init__.py
│   │   ├── async_utils.py
│   │   ├── display.py
│   │   ├── logging
│   │   │   ├── __init__.py
│   │   │   ├── console.py
│   │   │   ├── emojis.py
│   │   │   ├── formatter.py
│   │   │   ├── logger.py
│   │   │   ├── panels.py
│   │   │   ├── progress.py
│   │   │   └── themes.py
│   │   ├── parse_yaml.py
│   │   ├── parsing.py
│   │   ├── security.py
│   │   └── text.py
│   └── working_memory_api.py
├── unified_memory_system_technical_analysis.md
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/ultimate_mcp_server/tools/html_to_markdown.py:
--------------------------------------------------------------------------------

```python
  1 | """HTML to Markdown conversion tools for Ultimate MCP Server."""
  2 | import re
  3 | import time
  4 | from typing import Any, Dict, List
  5 | 
  6 | import html2text
  7 | import readability
  8 | import trafilatura
  9 | from bs4 import BeautifulSoup
 10 | from markdownify import markdownify as md
 11 | 
 12 | from ultimate_mcp_server.exceptions import ToolInputError
 13 | from ultimate_mcp_server.tools.base import with_error_handling, with_tool_metrics
 14 | from ultimate_mcp_server.utils import get_logger
 15 | 
 16 | logger = get_logger("ultimate_mcp_server.tools.html_to_markdown")
 17 | 
 18 | # --- Helper Functions ---
 19 | 
 20 | def _is_html_fragment(text: str) -> bool:
 21 |     """Detect if text is likely an HTML fragment.
 22 |     
 23 |     Args:
 24 |         text: Input text to check
 25 |         
 26 |     Returns:
 27 |         bool: True if the text appears to be HTML, False otherwise
 28 |     """
 29 |     # Simple heuristics to check if the text contains HTML
 30 |     html_patterns = [
 31 |         r"<\s*[a-zA-Z]+[^>]*>",  # Basic HTML tag pattern
 32 |         r"<\s*/\s*[a-zA-Z]+\s*>",  # Closing HTML tag
 33 |         r"&[a-zA-Z]+;",  # HTML entities
 34 |         r"<!\s*DOCTYPE",  # DOCTYPE declaration
 35 |         r"<!\s*--",  # HTML comment
 36 |         r"style\s*=\s*['\"]",  # style attribute
 37 |         r"class\s*=\s*['\"]",  # class attribute
 38 |         r"id\s*=\s*['\"]",  # id attribute
 39 |         r"href\s*=\s*['\"]",  # href attribute
 40 |         r"src\s*=\s*['\"]",  # src attribute
 41 |     ]
 42 |     
 43 |     # Check if the text matches any of the patterns
 44 |     for pattern in html_patterns:
 45 |         if re.search(pattern, text, re.IGNORECASE):
 46 |             return True
 47 |     
 48 |     return False
 49 | 
 50 | def _clean_html_with_beautifulsoup(html: str) -> str:
 51 |     """Clean HTML using BeautifulSoup.
 52 |     
 53 |     Args:
 54 |         html: HTML content to clean
 55 |         
 56 |     Returns:
 57 |         Cleaned HTML string with unwanted elements removed
 58 |     """
 59 |     try:
 60 |         soup = BeautifulSoup(html, 'html.parser')
 61 |         
 62 |         # Remove unwanted elements
 63 |         for element in soup(['script', 'style', 'svg', 'iframe', 'canvas', 'noscript']):
 64 |             element.decompose()
 65 |         
 66 |         # Remove base64 data attributes and other potentially problematic attributes
 67 |         for tag in soup.find_all(True):
 68 |             for attr in list(tag.attrs):
 69 |                 # Clean data URLs
 70 |                 if attr == 'src' and isinstance(tag.attrs[attr], str) and 'data:' in tag.attrs[attr]:
 71 |                     del tag.attrs[attr]
 72 |                 # Remove other problematic attributes
 73 |                 elif attr.startswith('on') or attr == 'style' or attr.startswith('data-'):
 74 |                     del tag.attrs[attr]
 75 |         
 76 |         return str(soup)
 77 |     except Exception as e:
 78 |         logger.warning(f"Error cleaning HTML with BeautifulSoup: {str(e)}")
 79 |         # If BeautifulSoup fails, return the original HTML
 80 |         return html
 81 | 
 82 | def _html_to_markdown_with_html2text(html: str) -> str:
 83 |     """Convert HTML to Markdown using html2text.
 84 |     
 85 |     Args:
 86 |         html: HTML content to convert
 87 |         
 88 |     Returns:
 89 |         Markdown formatted text
 90 |     """
 91 |     try:
 92 |         h = html2text.HTML2Text()
 93 |         h.ignore_links = False
 94 |         h.ignore_images = False
 95 |         h.ignore_tables = False
 96 |         h.unicode_snob = True  # Use Unicode instead of ASCII
 97 |         h.body_width = 0  # No wrapping
 98 |         
 99 |         return h.handle(html)
100 |     except Exception as e:
101 |         logger.warning(f"Error converting HTML to Markdown with html2text: {str(e)}")
102 |         # If html2text fails, try a simpler approach
103 |         return html
104 | 
105 | def _html_to_markdown_with_markdownify(html: str) -> str:
106 |     """Convert HTML to Markdown using markdownify.
107 |     
108 |     Args:
109 |         html: HTML content to convert
110 |         
111 |     Returns:
112 |         Markdown formatted text
113 |     """
114 |     try:
115 |         return md(html, heading_style="ATX")
116 |     except Exception as e:
117 |         logger.warning(f"Error converting HTML to Markdown with markdownify: {str(e)}")
118 |         # If markdownify fails, try a simpler approach
119 |         return html
120 | 
121 | def _extract_content_with_readability(html: str) -> str:
122 |     """Extract main content from HTML using readability.
123 |     
124 |     Args:
125 |         html: HTML content to process
126 |         
127 |     Returns:
128 |         HTML string containing only the main content
129 |     """
130 |     try:
131 |         doc = readability.Document(html)
132 |         content = doc.summary()
133 |         return content
134 |     except Exception as e:
135 |         logger.warning(f"Error extracting content with readability: {str(e)}")
136 |         # If readability fails, return the original HTML
137 |         return html
138 | 
139 | def _extract_content_with_trafilatura(html: str) -> str:
140 |     """Extract main content from HTML using trafilatura.
141 |     
142 |     Args:
143 |         html: HTML content to process
144 |         
145 |     Returns:
146 |         Extracted text content
147 |     """
148 |     try:
149 |         extracted_text = trafilatura.extract(html, include_comments=False, include_tables=True)
150 |         if extracted_text:
151 |             return extracted_text
152 |         # Fall back to HTML extraction if text extraction fails
153 |         extracted_html = trafilatura.extract(html, output_format='html', include_comments=False, include_tables=True)
154 |         return extracted_html or html
155 |     except Exception as e:
156 |         logger.warning(f"Error extracting content with trafilatura: {str(e)}")
157 |         # If trafilatura fails, return the original HTML
158 |         return html
159 | 
160 | def _sanitize_markdown(markdown: str) -> str:
161 |     """Clean up and format the markdown to be more readable.
162 |     
163 |     Args:
164 |         markdown: Markdown text to sanitize
165 |         
166 |     Returns:
167 |         Cleaned markdown text
168 |     """
169 |     # Fix excessive newlines (more than 2 consecutive)
170 |     sanitized = re.sub(r'\n{3,}', '\n\n', markdown)
171 |     
172 |     # Fix list item spacing
173 |     sanitized = re.sub(r'(\n[*-].*\n)(?!\n)', r'\1\n', sanitized)
174 |     
175 |     # Remove trailing whitespace from lines
176 |     sanitized = re.sub(r' +$', '', sanitized, flags=re.MULTILINE)
177 |     
178 |     # Fix markdown heading formatting (ensure space after #)
179 |     sanitized = re.sub(r'(^|\n)(#{1,6})([^#\s])', r'\1\2 \3', sanitized)
180 |     
181 |     # Fix code block formatting
182 |     sanitized = re.sub(r'```\s*\n', '```\n', sanitized)
183 |     sanitized = re.sub(r'\n\s*```', '\n```', sanitized)
184 |     
185 |     # Ensure proper code block syntax (start with language or leave empty)
186 |     sanitized = re.sub(r'```([^a-zA-Z\s\n][^`\n]*)$', '```\n\\1', sanitized, flags=re.MULTILINE)
187 |     
188 |     # Normalize list indicators (consistent use of - or * for unordered lists)
189 |     sanitized = re.sub(r'^[*+] ', '- ', sanitized, flags=re.MULTILINE)
190 |     
191 |     return sanitized
192 | 
193 | def _improve_markdown_formatting(markdown: str) -> str:
194 |     """Improve the formatting of the markdown to make it more readable.
195 |     
196 |     Args:
197 |         markdown: Markdown text to improve
198 |         
199 |     Returns:
200 |         Improved markdown text
201 |     """
202 |     # Ensure proper spacing for headings
203 |     improved = re.sub(r'(\n#{1,6}[^\n]+)(\n[^\n#])', r'\1\n\2', markdown)
204 |     
205 |     # Ensure paragraphs have proper spacing
206 |     improved = re.sub(r'(\n[^\s#>*-][^\n]+)(\n[^\s#>*-])', r'\1\n\2', improved)
207 |     
208 |     # Fix blockquote formatting
209 |     improved = re.sub(r'(\n>[ ][^\n]+)(\n[^>\s])', r'\1\n\2', improved)
210 |     
211 |     # Fix nested list formatting
212 |     improved = re.sub(r'(\n[ ]{2,}[*-][ ][^\n]+)(\n[^\s*-])', r'\1\n\2', improved)
213 |     
214 |     # Add horizontal rules for clear section breaks (if large content gaps exist)
215 |     improved = re.sub(r'\n\n\n\n+', '\n\n---\n\n', improved)
216 |     
217 |     return improved
218 | 
219 | def _convert_html_tables_to_markdown(html: str) -> str:
220 |     """Specifically handle HTML tables and convert them to markdown tables.
221 |     
222 |     Args:
223 |         html: HTML content with tables to convert
224 |         
225 |     Returns:
226 |         Markdown text with properly formatted tables
227 |     """
228 |     try:
229 |         soup = BeautifulSoup(html, 'html.parser')
230 |         tables = soup.find_all('table')
231 |         
232 |         # If no tables, return original HTML
233 |         if not tables:
234 |             return html
235 |         
236 |         for table in tables:
237 |             rows = table.find_all('tr')
238 |             if not rows:
239 |                 continue
240 |                 
241 |             markdown_table = []
242 |             
243 |             # Process header row
244 |             header_cells = rows[0].find_all(['th', 'td'])
245 |             if header_cells:
246 |                 header_row = '| ' + ' | '.join([cell.get_text().strip() for cell in header_cells]) + ' |'
247 |                 markdown_table.append(header_row)
248 |                 
249 |                 # Add separator row
250 |                 separator_row = '| ' + ' | '.join(['---' for _ in header_cells]) + ' |'
251 |                 markdown_table.append(separator_row)
252 |             
253 |             # Process data rows
254 |             for row in rows[1:]:
255 |                 cells = row.find_all('td')
256 |                 if cells:
257 |                     data_row = '| ' + ' | '.join([cell.get_text().strip() for cell in cells]) + ' |'
258 |                     markdown_table.append(data_row)
259 |             
260 |             # Replace the table with its markdown equivalent
261 |             table_html = str(table)
262 |             table_markdown = '\n'.join(markdown_table)
263 |             html = html.replace(table_html, table_markdown)
264 |     
265 |         return html
266 |         
267 |     except Exception as e:
268 |         logger.warning(f"Error converting HTML tables to Markdown: {str(e)}")
269 |         # If conversion fails, return the original HTML
270 |         return html
271 | 
272 | # --- Main Tool Function ---
273 | 
274 | @with_tool_metrics
275 | @with_error_handling
276 | async def clean_and_format_text_as_markdown(
277 |     text: str,
278 |     force_markdown_conversion: bool = False,
279 |     extraction_method: str = "auto",
280 |     preserve_tables: bool = True,
281 |     preserve_links: bool = True,
282 |     preserve_images: bool = False,
283 |     max_line_length: int = 0  # 0 means no wrapping
284 | ) -> Dict[str, Any]:
285 |     """Converts plain text or HTML to clean, well-formatted markdown.
286 |     
287 |     Automatically detects if input is HTML, then cleans and converts it.
288 |     For non-HTML text, it applies minimal formatting to create valid markdown.
289 |     
290 |     Args:
291 |         text: The input text to clean and format (plain text or HTML).
292 |         force_markdown_conversion: Whether to force markdown conversion even if the text doesn't
293 |                                   look like HTML. Default is False.
294 |         extraction_method: Method to extract content from HTML. Options:
295 |                           - "auto": Automatically choose the best method
296 |                           - "readability": Use Mozilla's Readability algorithm
297 |                           - "trafilatura": Use trafilatura library
298 |                           - "raw": Don't extract main content, convert the whole document
299 |                           Default is "auto".
300 |         preserve_tables: Whether to preserve and convert HTML tables to markdown tables.
301 |                         Default is True.
302 |         preserve_links: Whether to preserve and convert HTML links to markdown links.
303 |                        Default is True.
304 |         preserve_images: Whether to preserve and convert HTML images to markdown image syntax.
305 |                         Default is False.
306 |         max_line_length: Maximum line length for text wrapping. 0 means no wrapping.
307 |                         Default is 0.
308 |     
309 |     Returns:
310 |         Dictionary containing:
311 |         {
312 |             "markdown_text": "Cleaned and formatted markdown text",
313 |             "was_html": true,  # Whether the input was detected as HTML
314 |             "extraction_method_used": "readability",  # Which extraction method was used
315 |             "processing_time": 0.35,  # Time taken in seconds
316 |             "success": true
317 |         }
318 |     
319 |     Raises:
320 |         ToolInputError: If the input text is empty or not a string.
321 |     """
322 |     start_time = time.time()
323 |     
324 |     # Input validation
325 |     if not text:
326 |         raise ToolInputError("Input text cannot be empty")
327 |     if not isinstance(text, str):
328 |         raise ToolInputError("Input text must be a string")
329 |     
330 |     # Determine if input is HTML
331 |     is_html = _is_html_fragment(text) or force_markdown_conversion
332 |     
333 |     # Process based on content type
334 |     if is_html:
335 |         logger.info("Input detected as HTML, processing for conversion to markdown")
336 |         
337 |         # Convert HTML tables to markdown before main processing
338 |         if preserve_tables:
339 |             text = _convert_html_tables_to_markdown(text)
340 |         
341 |         # Extract main content based on specified method
342 |         extraction_method_used = extraction_method
343 |         if extraction_method == "auto":
344 |             # If the text is a small fragment, use raw conversion
345 |             if len(text) < 1000:
346 |                 extraction_method_used = "raw"
347 |             else:
348 |                 # Try trafilatura first, fallback to readability
349 |                 try:
350 |                     extracted = _extract_content_with_trafilatura(text)
351 |                     if extracted and len(extracted) > 0.2 * len(text):  # Ensure we got meaningful extraction
352 |                         text = extracted
353 |                         extraction_method_used = "trafilatura"
354 |                     else:
355 |                         text = _extract_content_with_readability(text)
356 |                         extraction_method_used = "readability"
357 |                 except Exception:
358 |                     text = _extract_content_with_readability(text)
359 |                     extraction_method_used = "readability"
360 |         elif extraction_method == "readability":
361 |             text = _extract_content_with_readability(text)
362 |         elif extraction_method == "trafilatura":
363 |             text = _extract_content_with_trafilatura(text)
364 |         # For "raw", we use the text as is
365 |         
366 |         # Clean HTML before conversion
367 |         text = _clean_html_with_beautifulsoup(text)
368 |         
369 |         # Set up conversion options based on parameters
370 |         h = html2text.HTML2Text()
371 |         h.ignore_links = not preserve_links
372 |         h.ignore_images = not preserve_images
373 |         h.ignore_tables = not preserve_tables
374 |         h.body_width = max_line_length
375 |         h.unicode_snob = True
376 |         
377 |         # Try multiple conversion methods and use the best result
378 |         try:
379 |             markdown_text = h.handle(text)
380 |             
381 |             # Fallback to markdownify if html2text result looks problematic
382 |             if '&lt;' in markdown_text or '&gt;' in markdown_text or len(markdown_text.strip()) < 100 and len(text) > 500:
383 |                 try:
384 |                     alternative = _html_to_markdown_with_markdownify(text)
385 |                     if len(alternative.strip()) > len(markdown_text.strip()):
386 |                         markdown_text = alternative
387 |                 except Exception:
388 |                     pass
389 |         except Exception as e:
390 |             logger.warning(f"Primary markdown conversion failed: {str(e)}")
391 |             try:
392 |                 markdown_text = _html_to_markdown_with_markdownify(text)
393 |             except Exception:
394 |                 # Last resort: strip tags and return plain text
395 |                 markdown_text = re.sub(r'<[^>]*>', '', text)
396 |     else:
397 |         logger.info("Input detected as plain text, applying minimal markdown formatting")
398 |         # For plain text, just clean it up a bit
399 |         markdown_text = text
400 |         extraction_method_used = "none"
401 |     
402 |     # Final cleanup and formatting of the markdown
403 |     markdown_text = _sanitize_markdown(markdown_text)
404 |     markdown_text = _improve_markdown_formatting(markdown_text)
405 |     
406 |     processing_time = time.time() - start_time
407 |     logger.info(f"Text cleaned and formatted as markdown in {processing_time:.2f}s")
408 |     
409 |     return {
410 |         "markdown_text": markdown_text,
411 |         "was_html": is_html,
412 |         "extraction_method_used": extraction_method_used,
413 |         "processing_time": processing_time,
414 |         "success": True
415 |     }
416 | 
417 | # --- Additional Tool Functions ---
418 | 
419 | @with_tool_metrics
420 | @with_error_handling
421 | async def detect_content_type(text: str) -> Dict[str, Any]:
422 |     """Analyzes text to detect its type: HTML, markdown, code, or plain text.
423 |     
424 |     Applies multiple heuristics to determine the most likely content type
425 |     of the provided text string.
426 |     
427 |     Args:
428 |         text: The input text to analyze
429 |     
430 |     Returns:
431 |         Dictionary containing:
432 |         {
433 |             "content_type": "html",  # One of: "html", "markdown", "code", "plain_text"
434 |             "confidence": 0.85,  # Confidence score (0.0-1.0)
435 |             "details": {
436 |                 "html_markers": 12,  # Count of HTML markers found
437 |                 "markdown_markers": 3,  # Count of markdown markers found
438 |                 "code_markers": 1,  # Count of code markers found
439 |                 "detected_language": "javascript"  # If code is detected
440 |             },
441 |             "success": true
442 |         }
443 |     
444 |     Raises:
445 |         ToolInputError: If the input text is empty or not a string.
446 |     """
447 |     if not text:
448 |         raise ToolInputError("Input text cannot be empty")
449 |     if not isinstance(text, str):
450 |         raise ToolInputError("Input text must be a string")
451 |     
452 |     # Initialize counters for markers
453 |     html_markers = 0
454 |     markdown_markers = 0
455 |     code_markers = 0
456 |     detected_language = None
457 |     
458 |     # Check for HTML markers
459 |     html_patterns = [
460 |         (r"<\s*[a-zA-Z]+[^>]*>", 1),  # HTML tag
461 |         (r"<\s*/\s*[a-zA-Z]+\s*>", 1),  # Closing HTML tag
462 |         (r"&[a-zA-Z]+;", 0.5),  # HTML entity
463 |         (r"<!\s*DOCTYPE", 2),  # DOCTYPE
464 |         (r"<!\s*--", 1),  # HTML comment
465 |         (r"<!--.*?-->", 1),  # Complete HTML comment
466 |         (r"<(div|span|p|a|img|table|ul|ol|li|h[1-6])\b", 1.5),  # Common HTML tags
467 |         (r"</(div|span|p|a|img|table|ul|ol|li|h[1-6])>", 1.5),  # Common closing tags
468 |         (r"<(html|head|body|meta|link|script|style)\b", 2),  # Structure tags
469 |         (r"</(html|head|body|script|style)>", 2),  # Structure closing tags
470 |         (r"style\s*=\s*['\"]", 1),  # style attribute
471 |         (r"class\s*=\s*['\"]", 1),  # class attribute
472 |         (r"id\s*=\s*['\"]", 1),  # id attribute
473 |         (r"href\s*=\s*['\"]", 1),  # href attribute
474 |         (r"src\s*=\s*['\"]", 1)  # src attribute
475 |     ]
476 |     
477 |     for pattern, weight in html_patterns:
478 |         matches = re.findall(pattern, text, re.IGNORECASE)
479 |         html_markers += len(matches) * weight
480 |     
481 |     # Check for Markdown markers
482 |     markdown_patterns = [
483 |         (r"^#\s+.+$", 2),  # Heading level 1
484 |         (r"^#{2,6}\s+.+$", 1.5),  # Headings levels 2-6
485 |         (r"^\s*[*-]\s+.+$", 1),  # Unordered list
486 |         (r"^\s*\d+\.\s+.+$", 1),  # Ordered list
487 |         (r"^\s*>\s+.+$", 1.5),  # Blockquote
488 |         (r"\[.+?\]\(.+?\)", 2),  # Link
489 |         (r"!\[.+?\]\(.+?\)", 2),  # Image
490 |         (r"`[^`\n]+`", 1),  # Inline code
491 |         (r"^```\s*\w*$", 2),  # Code block start
492 |         (r"^```$", 2),  # Code block end
493 |         (r"\*\*.+?\*\*", 1),  # Bold
494 |         (r"\*.+?\*", 0.5),  # Italic
495 |         (r"__(.+?)__", 1),  # Bold with underscore
496 |         (r"_(.+?)_", 0.5),  # Italic with underscore
497 |         (r"~~.+?~~", 1),  # Strikethrough
498 |         (r"^\s*[-*_]{3,}\s*$", 1.5),  # Horizontal rule
499 |         (r"^\s*\|(.+\|)+\s*$", 2),  # Table row
500 |         (r"^\s*\|([-:]+\|)+\s*$", 3)  # Table header/divider
501 |     ]
502 |     
503 |     for pattern, weight in markdown_patterns:
504 |         matches = re.findall(pattern, text, re.MULTILINE)
505 |         markdown_markers += len(matches) * weight
506 |     
507 |     # Check for code markers
508 |     code_patterns = [
509 |         (r"function\s+\w+\s*\(.*?\)\s*\{", 2),  # Function declaration
510 |         (r"(var|let|const)\s+\w+\s*=", 1.5),  # Variable declaration JS
511 |         (r"if\s*\(.*?\)\s*\{", 1),  # If statement
512 |         (r"for\s*\(.*?;.*?;.*?\)\s*\{", 2),  # For loop
513 |         (r"while\s*\(.*?\)\s*\{", 2),  # While loop
514 |         (r"class\s+\w+(\s+extends\s+\w+)?\s*\{", 2),  # Class declaration
515 |         (r"import\s+.*?from\s+['\"].*?['\"]", 2),  # ES6 Import
516 |         (r"def\s+\w+\s*\(.*?\):", 2),  # Python function
517 |         (r"class\s+\w+(\(\w+\))?:", 2),  # Python class
518 |         (r"import\s+\w+(\s+as\s+\w+)?", 1.5),  # Python import
519 |         (r"from\s+\w+(\.\w+)*\s+import", 1.5),  # Python from import
520 |         (r"public\s+(static\s+)?(void|int|String)\s+\w+\s*\(", 2),  # Java method
521 |         (r"#include\s*<.*?>", 2),  # C/C++ include
522 |         (r"^\s*package\s+[\w\.]+;", 2),  # Java/Kotlin package
523 |         (r"^\s*using\s+[\w\.]+;", 2),  # C# using
524 |         (r"^\s*(public|private|protected)\s+class", 2)  # Access modifier
525 |     ]
526 |     
527 |     for pattern, weight in code_patterns:
528 |         matches = re.findall(pattern, text, re.MULTILINE)
529 |         code_markers += len(matches) * weight
530 |     
531 |     # Detect programming language if it looks like code
532 |     if code_markers > 5:
533 |         # Very basic language detection based on unique syntax
534 |         language_patterns = [
535 |             (r"function\s+\w+|var\s+\w+|let\s+\w+|const\s+\w+|document\.|\$\(", "javascript"),
536 |             (r"<\?php|\$[a-zA-Z_]", "php"),
537 |             (r"def\s+\w+\s*\(.*?\):|import\s+\w+|from\s+\w+\s+import", "python"),
538 |             (r"public\s+class\s+\w+|public\s+static\s+void\s+main", "java"),
539 |             (r"#include\s*<.*?>|int\s+main\s*\(", "c/c++"),
540 |             (r"^\s*using\s+System;|namespace\s+\w+|public\s+class\s+\w+\s*:", "c#"),
541 |             (r"module\s+\w+|fn\s+\w+|let\s+\w+|impl", "rust"),
542 |             (r"^\s*import\s+\w+\s+from\s+['\"]|export\s+(default\s+)?", "typescript"),
543 |             (r"^package\s+main|func\s+\w+\(|import\s+\([^)]*\)", "go")
544 |         ]
545 |         
546 |         for pattern, lang in language_patterns:
547 |             if re.search(pattern, text, re.MULTILINE | re.IGNORECASE):
548 |                 detected_language = lang
549 |                 break
550 |     
551 |     # Calculate final scores and confidence
552 |     html_score = html_markers / max(len(text) / 100, 1)
553 |     markdown_score = markdown_markers / max(len(text.split('\n')), 1)
554 |     code_score = code_markers / max(len(text.split('\n')), 1)
555 |     
556 |     # Plain text has no specific markers, so it's the default fallback
557 |     plain_text_score = 1.0 - max(min(html_score / 10, 1), min(markdown_score / 5, 1), min(code_score / 5, 1))
558 |     
559 |     # Determine the content type
560 |     scores = {
561 |         "html": html_score,
562 |         "markdown": markdown_score,
563 |         "code": code_score,
564 |         "plain_text": plain_text_score
565 |     }
566 |     
567 |     content_type = max(scores, key=scores.get)
568 |     max_score = scores[content_type]
569 |     
570 |     # Calculate confidence based on how dominant the max score is
571 |     total_score = sum(scores.values())
572 |     if total_score > 0:
573 |         confidence = max_score / total_score
574 |     else:
575 |         confidence = 0.25  # Equal probability for all types
576 |     
577 |     # Adjust confidence if very few markers were found
578 |     if content_type != "plain_text" and (html_markers + markdown_markers + code_markers) < 3:
579 |         confidence *= 0.7
580 |     
581 |     return {
582 |         "content_type": content_type,
583 |         "confidence": min(confidence, 1.0),
584 |         "details": {
585 |             "html_markers": html_markers,
586 |             "markdown_markers": markdown_markers,
587 |             "code_markers": code_markers,
588 |             "detected_language": detected_language if content_type == "code" else None
589 |         },
590 |         "success": True
591 |     }
592 | 
593 | @with_tool_metrics
594 | @with_error_handling
595 | async def batch_format_texts(
596 |     texts: List[str],
597 |     force_markdown_conversion: bool = False,
598 |     extraction_method: str = "auto",
599 |     max_concurrency: int = 5,
600 |     preserve_tables: bool = True
601 | ) -> Dict[str, Any]:
602 |     """Processes multiple text inputs in parallel, converting each to markdown.
603 |     
604 |     Efficiently handles a batch of text inputs by processing them concurrently
605 |     up to a specified concurrency limit.
606 |     
607 |     Args:
608 |         texts: List of text strings to clean and format.
609 |         force_markdown_conversion: Whether to force markdown conversion for all inputs.
610 |                                   Default is False.
611 |         extraction_method: Method to extract content from HTML. Options:
612 |                           - "auto": Automatically choose the best method
613 |                           - "readability": Use Mozilla's Readability algorithm
614 |                           - "trafilatura": Use trafilatura library
615 |                           - "raw": Don't extract main content, convert the whole document
616 |                           Default is "auto".
617 |         max_concurrency: Maximum number of texts to process simultaneously.
618 |                         Default is 5.
619 |         preserve_tables: Whether to preserve and convert HTML tables to markdown tables.
620 |                         Default is True.
621 |     
622 |     Returns:
623 |         Dictionary containing:
624 |         {
625 |             "results": [
626 |                 {
627 |                     "markdown_text": "Cleaned and formatted markdown text",
628 |                     "was_html": true,
629 |                     "extraction_method_used": "readability"
630 |                 },
631 |                 ...
632 |             ],
633 |             "total_processing_time": 2.45,  # Total time in seconds
634 |             "success_count": 5,  # Number of successfully processed texts
635 |             "failure_count": 0,  # Number of failed texts
636 |             "success": true
637 |         }
638 |     
639 |     Raises:
640 |         ToolInputError: If the input list is empty or not a list of strings.
641 |     """
642 |     import asyncio
643 |     
644 |     start_time = time.time()
645 |     
646 |     # Input validation
647 |     if not texts:
648 |         raise ToolInputError("Input texts list cannot be empty")
649 |     if not isinstance(texts, list):
650 |         raise ToolInputError("Input must be a list of text strings")
651 |     
652 |     # Set up concurrency control
653 |     semaphore = asyncio.Semaphore(max_concurrency)
654 |     
655 |     async def process_text(text, index):
656 |         """Process a single text with semaphore control."""
657 |         async with semaphore:
658 |             try:
659 |                 result = await clean_and_format_text_as_markdown(
660 |                     text=text,
661 |                     force_markdown_conversion=force_markdown_conversion,
662 |                     extraction_method=extraction_method,
663 |                     preserve_tables=preserve_tables
664 |                 )
665 |                 result["index"] = index  # Add original index for ordering
666 |                 return result
667 |             except Exception as e:
668 |                 logger.error(f"Error processing text at index {index}: {str(e)}")
669 |                 return {
670 |                     "index": index,
671 |                     "error": str(e),
672 |                     "success": False
673 |                 }
674 |     
675 |     # Process all texts concurrently
676 |     tasks = [process_text(text, i) for i, text in enumerate(texts)]
677 |     results = await asyncio.gather(*tasks)
678 |     
679 |     # Sort results by original index
680 |     sorted_results = sorted(results, key=lambda x: x.get("index", 0))
681 |     
682 |     # Remove index from results
683 |     for result in sorted_results:
684 |         if "index" in result:
685 |             del result["index"]
686 |     
687 |     # Calculate statistics
688 |     success_count = sum(1 for result in sorted_results if result.get("success", False))
689 |     failure_count = len(sorted_results) - success_count
690 |     total_time = time.time() - start_time
691 |     
692 |     return {
693 |         "results": sorted_results,
694 |         "total_processing_time": total_time,
695 |         "success_count": success_count,
696 |         "failure_count": failure_count,
697 |         "success": True
698 |     }
699 | 
700 | @with_tool_metrics
701 | @with_error_handling
702 | async def optimize_markdown_formatting(
703 |     markdown: str,
704 |     normalize_headings: bool = False,
705 |     fix_lists: bool = True,
706 |     fix_links: bool = True,
707 |     add_line_breaks: bool = True,
708 |     compact_mode: bool = False,
709 |     max_line_length: int = 0
710 | ) -> Dict[str, Any]:
711 |     """Optimizes and improves the formatting of existing markdown text.
712 |     
713 |     Takes markdown text and enhances its formatting by fixing common issues
714 |     and applying stylistic improvements.
715 |     
716 |     Args:
717 |         markdown: The markdown text to optimize.
718 |         normalize_headings: If True, ensures heading levels start at h1 and are sequential.
719 |                            Default is False.
720 |         fix_lists: If True, fixes common issues with list formatting.
721 |                   Default is True.
722 |         fix_links: If True, fixes common issues with link formatting.
723 |                   Default is True.
724 |         add_line_breaks: If True, ensures proper paragraph breaks.
725 |                         Default is True.
726 |         compact_mode: If True, reduces whitespace for a more compact presentation.
727 |                      Default is False.
728 |         max_line_length: Maximum line length for wrapping. 0 means no wrapping.
729 |                         Default is 0.
730 |     
731 |     Returns:
732 |         Dictionary containing:
733 |         {
734 |             "optimized_markdown": "Cleaned and formatted markdown text",
735 |             "changes_made": {
736 |                 "headings_normalized": true,
737 |                 "lists_fixed": true,
738 |                 "links_fixed": true,
739 |                 "line_breaks_added": true
740 |             },
741 |             "processing_time": 0.15,  # Time taken in seconds
742 |             "success": true
743 |         }
744 |     
745 |     Raises:
746 |         ToolInputError: If the input markdown is empty or not a string.
747 |     """
748 |     import re
749 |     
750 |     start_time = time.time()
751 |     
752 |     # Input validation
753 |     if not markdown:
754 |         raise ToolInputError("Input markdown cannot be empty")
755 |     if not isinstance(markdown, str):
756 |         raise ToolInputError("Input markdown must be a string")
757 |     
758 |     # Track changes made
759 |     changes_made = {
760 |         "headings_normalized": False,
761 |         "lists_fixed": False,
762 |         "links_fixed": False,
763 |         "line_breaks_added": False,
764 |         "whitespace_adjusted": False
765 |     }
766 |     
767 |     optimized = markdown
768 |     
769 |     # Fix markdown heading formatting (ensure space after #)
770 |     if "#" in optimized:
771 |         original = optimized
772 |         optimized = re.sub(r'(^|\n)(#{1,6})([^#\s])', r'\1\2 \3', optimized)
773 |         changes_made["headings_normalized"] = original != optimized
774 |     
775 |     # Normalize heading levels if requested
776 |     if normalize_headings and "#" in optimized:
777 |         original = optimized
778 |         
779 |         # Find all headings and their levels
780 |         heading_pattern = r'(^|\n)(#{1,6})\s+(.*?)(\n|$)'
781 |         headings = [(m.group(2), m.group(3), m.start(), m.end()) 
782 |                     for m in re.finditer(heading_pattern, optimized)]
783 |         
784 |         if headings:
785 |             # Find the minimum heading level used
786 |             min_level = min(len(h[0]) for h in headings)
787 |             
788 |             # Adjust heading levels if the minimum isn't h1
789 |             if min_level > 1:
790 |                 # Process headings in reverse order to avoid messing up positions
791 |                 for level, text, start, end in reversed(headings):
792 |                     new_level = '#' * (len(level) - min_level + 1)
793 |                     replacement = f"{optimized[start:start+1]}{new_level} {text}{optimized[end-1:end]}"
794 |                     optimized = optimized[:start] + replacement + optimized[end:]
795 |                 
796 |                 changes_made["headings_normalized"] = True
797 |     
798 |     # Fix list formatting
799 |     if fix_lists and any(c in optimized for c in ['-', '*', '+']):
800 |         original = optimized
801 |         
802 |         # Ensure consistent list markers
803 |         optimized = re.sub(r'^([*+]) ', r'- ', optimized, flags=re.MULTILINE)
804 |         
805 |         # Fix list item spacing
806 |         optimized = re.sub(r'(\n- .+)(\n[^-\s])', r'\1\n\2', optimized)
807 |         
808 |         # Fix indentation in nested lists
809 |         optimized = re.sub(r'(\n- .+\n)(\s{1,3}- )', r'\1  \2', optimized)
810 |         
811 |         changes_made["lists_fixed"] = original != optimized
812 |     
813 |     # Fix link formatting
814 |     if fix_links and "[" in optimized:
815 |         original = optimized
816 |         
817 |         # Fix reference-style links (ensure consistent spacing)
818 |         optimized = re.sub(r'\]\[', r'] [', optimized)
819 |         
820 |         # Fix malformed links with space between []()
821 |         optimized = re.sub(r'\] \(', r'](', optimized)
822 |         
823 |         # Ensure proper spacing around links in sentences
824 |         optimized = re.sub(r'([^\s])\[', r'\1 [', optimized)
825 |         optimized = re.sub(r'\]([^\(\s])', r'] \1', optimized)
826 |         
827 |         changes_made["links_fixed"] = original != optimized
828 |     
829 |     # Add proper line breaks for readability
830 |     if add_line_breaks:
831 |         original = optimized
832 |         
833 |         # Ensure headings have a blank line before (except at start of document)
834 |         optimized = re.sub(r'(?<!\n\n)(^|\n)#', r'\1\n#', optimized)
835 |         
836 |         # Ensure paragraphs have blank lines between them
837 |         optimized = re.sub(r'(\n[^\s#>*-][^\n]+)(\n[^\s#>*-])', r'\1\n\2', optimized)
838 |         
839 |         # Clean up any excessive blank lines created
840 |         optimized = re.sub(r'\n{3,}', r'\n\n', optimized)
841 |         
842 |         changes_made["line_breaks_added"] = original != optimized
843 |     
844 |     # Adjust whitespace based on compact_mode
845 |     original = optimized
846 |     if compact_mode:
847 |         # Reduce blank lines to single blank lines
848 |         optimized = re.sub(r'\n\s*\n', r'\n\n', optimized)
849 |         
850 |         # Remove trailing whitespace
851 |         optimized = re.sub(r' +$', '', optimized, flags=re.MULTILINE)
852 |     else:
853 |         # Ensure consistent double line breaks for section transitions
854 |         optimized = re.sub(r'(\n#{1,6}[^\n]+\n)(?!\n)', r'\1\n', optimized)
855 |     
856 |     changes_made["whitespace_adjusted"] = original != optimized
857 |     
858 |     # Apply line wrapping if specified
859 |     if max_line_length > 0:
860 |         import textwrap
861 |         
862 |         # Split into paragraphs, wrap each, then rejoin
863 |         paragraphs = re.split(r'\n\s*\n', optimized)
864 |         wrapped_paragraphs = []
865 |         
866 |         for p in paragraphs:
867 |             # Skip wrapping for code blocks, lists, and headings
868 |             if (p.strip().startswith("```") or
869 |                 re.match(r'^\s*[*\-+]', p, re.MULTILINE) or
870 |                 re.match(r'^#{1,6}\s', p.strip())):
871 |                 wrapped_paragraphs.append(p)
872 |             else:
873 |                 # Wrap regular paragraphs
874 |                 lines = p.split('\n')
875 |                 wrapped_lines = []
876 |                 for line in lines:
877 |                     if not line.strip().startswith(('>', '#', '-', '*', '+')):
878 |                         wrapped = textwrap.fill(line, width=max_line_length)
879 |                         wrapped_lines.append(wrapped)
880 |                     else:
881 |                         wrapped_lines.append(line)
882 |                 wrapped_paragraphs.append('\n'.join(wrapped_lines))
883 |         
884 |         optimized = '\n\n'.join(wrapped_paragraphs)
885 |     
886 |     processing_time = time.time() - start_time
887 |     
888 |     return {
889 |         "optimized_markdown": optimized,
890 |         "changes_made": changes_made,
891 |         "processing_time": processing_time,
892 |         "success": True
893 |     }
```

--------------------------------------------------------------------------------
/examples/advanced_extraction_demo.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python
  2 | """Demo of advanced extraction capabilities using Ultimate MCP Server."""
  3 | import asyncio
  4 | import json
  5 | import os
  6 | import re
  7 | import sys
  8 | import time
  9 | from pathlib import Path
 10 | 
 11 | # Add project root to path for imports when running as script
 12 | sys.path.insert(0, str(Path(__file__).parent.parent))
 13 | 
 14 | from rich.panel import Panel
 15 | from rich.rule import Rule
 16 | from rich.syntax import Syntax
 17 | from rich.traceback import Traceback
 18 | 
 19 | from ultimate_mcp_server.constants import Provider
 20 | from ultimate_mcp_server.core.providers.base import get_provider
 21 | from ultimate_mcp_server.utils import get_logger
 22 | from ultimate_mcp_server.utils.display import CostTracker, parse_and_display_result
 23 | from ultimate_mcp_server.utils.logging.console import console
 24 | from ultimate_mcp_server.utils.parsing import extract_json_from_markdown
 25 | 
 26 | # --- Debug Flag ---
 27 | USE_DEBUG_LOGS = True # Set to True to enable detailed logging
 28 | # ------------------
 29 | 
 30 | # Initialize logger
 31 | logger = get_logger("example.advanced_extraction")
 32 | logger.set_level("debug")
 33 | 
 34 | # Configure the OpenAI client for direct extraction demos
 35 | async def setup_openai_provider():
 36 |     """Set up an OpenAI provider for demonstration."""
 37 |     try:
 38 |         logger.info("Initializing OpenAI for demonstration", emoji_key="start")
 39 |         
 40 |         # Get OpenAI provider - get_provider will return None if key missing/invalid in config
 41 |         provider = await get_provider(Provider.OPENAI.value)
 42 |         if not provider: 
 43 |              logger.error("Failed to get OpenAI provider. Is the OPENAI_API_KEY configured correctly in your environment/config?")
 44 |              return None
 45 |              
 46 |         logger.success("OpenAI provider initialized successfully.")
 47 |         return provider
 48 |     except Exception as e:
 49 |         logger.error(f"Failed to initialize OpenAI provider: {e}", emoji_key="error")
 50 |         return None
 51 | 
 52 | async def run_json_extraction_example(provider, tracker: CostTracker):
 53 |     """Demonstrate JSON extraction."""
 54 |     if USE_DEBUG_LOGS:
 55 |         logger.debug("Entering run_json_extraction_example.")
 56 |     if not provider:
 57 |         console.print("[yellow]Skipping JSON extraction demo - no provider available.[/yellow]")
 58 |         if USE_DEBUG_LOGS:
 59 |             logger.debug("Exiting run_json_extraction_example (no provider).")
 60 |         return
 61 |         
 62 |     console.print(Rule("[bold blue]1. JSON Extraction Example[/bold blue]"))
 63 |     
 64 |     # Load sample text
 65 |     sample_path = Path(__file__).parent / "data" / "sample_event.txt"
 66 |     if not sample_path.exists():
 67 |         # Create a sample text for demonstration
 68 |         sample_text = """
 69 |         Tech Conference 2024
 70 |         Location: San Francisco Convention Center, 123 Tech Blvd, San Francisco, CA 94103
 71 |         Date: June 15-17, 2024
 72 |         Time: 9:00 AM - 6:00 PM daily
 73 |         
 74 |         Registration Fee: $599 (Early Bird: $499 until March 31)
 75 |         
 76 |         Keynote Speakers:
 77 |         - Dr. Sarah Johnson, AI Research Director at TechCorp
 78 |         - Mark Williams, CTO of FutureTech Industries
 79 |         - Prof. Emily Chen, MIT Computer Science Department
 80 |         
 81 |         Special Events:
 82 |         - Networking Reception: June 15, 7:00 PM - 10:00 PM
 83 |         - Hackathon: June 16, 9:00 PM - 9:00 AM (overnight)
 84 |         - Career Fair: June 17, 1:00 PM - 5:00 PM
 85 |         
 86 |         For more information, contact [email protected] or call (555) 123-4567.
 87 |         """
 88 |         # Ensure the data directory exists
 89 |         os.makedirs(os.path.dirname(sample_path), exist_ok=True)
 90 |         # Write sample text to file
 91 |         with open(sample_path, "w") as f:
 92 |             f.write(sample_text)
 93 |     else:
 94 |         # Read existing sample text
 95 |         with open(sample_path, "r") as f:
 96 |             sample_text = f.read()
 97 |     
 98 |     # Display sample text
 99 |     console.print(Panel(sample_text, title="Sample Event Text", border_style="blue"))
100 |     
101 |     # Define JSON schema for event
102 |     event_schema = {
103 |         "type": "object",
104 |         "properties": {
105 |             "name": {"type": "string", "description": "Event name"},
106 |             "location": {
107 |                 "type": "object",
108 |                 "properties": {
109 |                     "venue": {"type": "string"},
110 |                     "address": {"type": "string"},
111 |                     "city": {"type": "string"},
112 |                     "state": {"type": "string"},
113 |                     "zip": {"type": "string"}
114 |                 }
115 |             },
116 |             "dates": {
117 |                 "type": "object",
118 |                 "properties": {
119 |                     "start": {"type": "string", "format": "date"},
120 |                     "end": {"type": "string", "format": "date"}
121 |                 }
122 |             },
123 |             "time": {"type": "string"},
124 |             "registration": {
125 |                 "type": "object",
126 |                 "properties": {
127 |                     "regular_fee": {"type": "number"},
128 |                     "early_bird_fee": {"type": "number"},
129 |                     "early_bird_deadline": {"type": "string", "format": "date"}
130 |                 }
131 |             },
132 |             "speakers": {
133 |                 "type": "array",
134 |                 "items": {
135 |                     "type": "object",
136 |                     "properties": {
137 |                         "name": {"type": "string"},
138 |                         "title": {"type": "string"},
139 |                         "organization": {"type": "string"}
140 |                     }
141 |                 }
142 |             },
143 |             "special_events": {
144 |                 "type": "array",
145 |                 "items": {
146 |                     "type": "object",
147 |                     "properties": {
148 |                         "name": {"type": "string"},
149 |                         "date": {"type": "string", "format": "date"},
150 |                         "time": {"type": "string"}
151 |                     }
152 |                 }
153 |             },
154 |             "contact": {
155 |                 "type": "object",
156 |                 "properties": {
157 |                     "email": {"type": "string", "format": "email"},
158 |                     "phone": {"type": "string"}
159 |                 }
160 |             }
161 |         }
162 |     }
163 |     
164 |     # Display JSON schema
165 |     schema_json = json.dumps(event_schema, indent=2)
166 |     console.print(Panel(
167 |         Syntax(schema_json, "json", theme="monokai", line_numbers=True),
168 |         title="Event JSON Schema",
169 |         border_style="green"
170 |     ))
171 |     
172 |     # Extract JSON using direct provider call
173 |     logger.info("Extracting structured JSON data from text...", emoji_key="processing")
174 |     
175 |     try:
176 |         start_time = time.time()
177 |         
178 |         # Instead of using the tool, use direct completion for demo purposes
179 |         prompt = f"""
180 |         Extract structured information from the following text into a JSON object.
181 |         Follow the provided JSON schema exactly.
182 |         
183 |         TEXT:
184 |         {sample_text}
185 |         
186 |         JSON SCHEMA:
187 |         {json.dumps(event_schema, indent=2)}
188 |         
189 |         Provide only the valid JSON object as output, with no additional commentary.
190 |         """
191 |         
192 |         if USE_DEBUG_LOGS:
193 |             logger.debug(f"JSON Extraction Prompt:\n{prompt}")
194 |         
195 |         # Call the provider directly
196 |         result = await provider.generate_completion(
197 |             prompt=prompt,
198 |             model="gpt-4.1-mini",  # Use an available OpenAI model
199 |             temperature=0.2,       # Lower temperature for more deterministic output
200 |             max_tokens=1500        # Enough tokens for a full response
201 |         )
202 |         
203 |         # Track cost
204 |         tracker.add_call(result)
205 | 
206 |         if USE_DEBUG_LOGS:
207 |             logger.debug(f"Raw JSON Extraction Result Text:\n{result.text}")
208 |         
209 |         # Process the result to extract just the JSON
210 |         try:
211 |             # Try to parse the response as JSON
212 |             raw_text = result.text.strip()
213 |             text_to_parse = extract_json_from_markdown(raw_text)
214 |             if USE_DEBUG_LOGS:
215 |                 logger.debug(f"Raw text received: {raw_text[:500]}...")
216 |                 logger.debug(f"Attempting to parse JSON after cleaning: {text_to_parse[:500]}...")
217 |             json_result = json.loads(text_to_parse)
218 |             if USE_DEBUG_LOGS:
219 |                 logger.debug(f"Successfully parsed JSON: {json.dumps(json_result, indent=2)}")
220 |             
221 |             # Create a dictionary with structured data and metadata for display
222 |             structured_result_data = {
223 |                 "json": json_result, # The actual parsed JSON
224 |                 "validated": True,   # Assuming validation happens elsewhere or is implied
225 |                 "model": result.model,
226 |                 "processing_time": time.time() - start_time,
227 |                 "tokens": {
228 |                     "input": result.input_tokens,
229 |                     "output": result.output_tokens,
230 |                     "total": result.input_tokens + result.output_tokens
231 |                 },
232 |                 "cost": result.cost
233 |             }
234 |             
235 |             # Display the results using the utility function
236 |             parse_and_display_result(
237 |                 title="JSON Extraction Results",
238 |                 input_data={"text": sample_text, "schema": event_schema},
239 |                 result=structured_result_data, # Pass the structured data
240 |                 console=console
241 |             )
242 |             
243 |         except json.JSONDecodeError as e:
244 |             # Log the error regardless of debug flag
245 |             logger.error(f"JSONDecodeError occurred: {e}", exc_info=False)
246 |             
247 |             if USE_DEBUG_LOGS:
248 |                 # Log the string that caused the error (before cleaning)
249 |                 logger.debug(f"Raw string causing JSONDecodeError:\n{raw_text}")
250 |                 # Log the string that failed parsing (after cleaning)
251 |                 logger.debug(f"Cleaned string that failed JSON parsing:\n{text_to_parse}")
252 |                 # Print a rich traceback to the console
253 |                 console.print("[bold red]-- Traceback for JSONDecodeError --[/bold red]")
254 |                 console.print(Traceback())
255 |                 console.print("[bold red]-- End Traceback --[/bold red]")
256 |                 
257 |             # If JSON parsing fails, show the raw response
258 |             console.print(Panel(
259 |                 raw_text, # Show the original raw text from the model
260 |                 title="[yellow]Raw Model Output (JSON parsing failed)[/yellow]",
261 |                 border_style="red"
262 |             ))
263 |             
264 |     except Exception as e:
265 |         logger.error(f"Error extracting JSON: {str(e)}", emoji_key="error", exc_info=True)
266 |         
267 |     console.print()
268 |     if USE_DEBUG_LOGS:
269 |         logger.debug("Exiting run_json_extraction_example.")
270 | 
271 | async def table_extraction_demo(provider, tracker: CostTracker):
272 |     """Demonstrate table extraction capabilities."""
273 |     if USE_DEBUG_LOGS:
274 |         logger.debug("Entering table_extraction_demo.")
275 |     if not provider:
276 |         console.print("[yellow]Skipping table extraction demo - no provider available.[/yellow]")
277 |         if USE_DEBUG_LOGS:
278 |             logger.debug("Exiting table_extraction_demo (no provider).")
279 |         return
280 |         
281 |     logger.info("Starting table extraction demo", emoji_key="start")
282 |     
283 |     # Sample text with embedded table
284 |     text = """
285 |     Financial Performance by Quarter (2023-2024)
286 |     
287 |     | Quarter | Revenue ($M) | Expenses ($M) | Profit ($M) | Growth (%) |
288 |     |---------|-------------|---------------|-------------|------------|
289 |     | Q1 2023 | 42.5        | 32.1          | 10.4        | 3.2        |
290 |     | Q2 2023 | 45.7        | 33.8          | 11.9        | 6.5        |
291 |     | Q3 2023 | 50.2        | 35.6          | 14.6        | 9.8        |
292 |     | Q4 2023 | 58.3        | 38.2          | 20.1        | 15.2       |
293 |     | Q1 2024 | 60.1        | 39.5          | 20.6        | 3.1        |
294 |     | Q2 2024 | 65.4        | 41.2          | 24.2        | 8.8        |
295 |     
296 |     Note: All figures are in millions of dollars and are unaudited.
297 |     Growth percentages are relative to the previous quarter.
298 |     """
299 |     
300 |     # Log extraction attempt
301 |     logger.info("Performing table extraction", emoji_key="processing")
302 |     
303 |     try:
304 |         start_time = time.time()
305 |         
306 |         # Prompt for table extraction
307 |         prompt = f"""
308 |         Extract the table from the following text and format it as both JSON and Markdown.
309 |         
310 |         TEXT:
311 |         {text}
312 |         
313 |         For the JSON format, use this structure:
314 |         {{
315 |             "headers": ["Header1", "Header2", ...],
316 |             "rows": [
317 |                 {{"Header1": "value", "Header2": "value", ...}},
318 |                 ...
319 |             ]
320 |         }}
321 |         
322 |         For the Markdown format, output a well-formatted Markdown table.
323 |         
324 |         Also extract any metadata about the table (title, notes, etc.).
325 |         
326 |         Format your response as JSON with the following structure:
327 |         {{
328 |             "json_table": {{...}},
329 |             "markdown_table": "...",
330 |             "metadata": {{
331 |                 "title": "...",
332 |                 "notes": [
333 |                     "..."
334 |                 ]
335 |             }}
336 |         }}
337 |         """
338 |         
339 |         if USE_DEBUG_LOGS:
340 |             logger.debug(f"Table Extraction Prompt:\n{prompt}")
341 |         
342 |         # Call the provider directly
343 |         result = await provider.generate_completion(
344 |             prompt=prompt,
345 |             model="gpt-4.1-mini",
346 |             temperature=0.2,
347 |             max_tokens=1500
348 |         )
349 |         
350 |         # Track cost
351 |         tracker.add_call(result)
352 | 
353 |         if USE_DEBUG_LOGS:
354 |             logger.debug(f"Raw Table Extraction Result Text:\n{result.text}")
355 |         
356 |         try:
357 |             # Try to parse the response as JSON
358 |             raw_text = result.text.strip() # Keep raw text separate
359 |             text_to_parse = extract_json_from_markdown(raw_text) # Clean it
360 |             if USE_DEBUG_LOGS:
361 |                 # Log both raw and cleaned versions
362 |                 logger.debug(f"Raw text received (Table): {raw_text[:500]}...")
363 |                 logger.debug(f"Attempting to parse Table Extraction JSON after cleaning: {text_to_parse[:500]}...")
364 |             json_result = json.loads(text_to_parse) # Parse the cleaned version
365 |             if USE_DEBUG_LOGS:
366 |                 logger.debug(f"Successfully parsed Table Extraction JSON: {json.dumps(json_result, indent=2)}")
367 |             
368 |             # Create structured data dictionary for display
369 |             structured_result_data = {
370 |                 "formats": {
371 |                     "json": json_result.get("json_table", {}),
372 |                     "markdown": json_result.get("markdown_table", "")
373 |                 },
374 |                 "metadata": json_result.get("metadata", {}),
375 |                 "model": result.model,
376 |                 "processing_time": time.time() - start_time,
377 |                 "tokens": {
378 |                     "input": result.input_tokens,
379 |                     "output": result.output_tokens,
380 |                     "total": result.input_tokens + result.output_tokens
381 |                 },
382 |                 "cost": result.cost
383 |             }
384 |             
385 |             # Parse the result using the shared utility
386 |             parse_and_display_result(
387 |                 "Table Extraction Demo", 
388 |                 {"text": text}, 
389 |                 structured_result_data # Pass the structured data
390 |             )
391 |             
392 |         except json.JSONDecodeError as e:
393 |             # Log the error regardless of debug flag
394 |             logger.error(f"JSONDecodeError in Table Extraction occurred: {e}", exc_info=False)
395 |             
396 |             if USE_DEBUG_LOGS:
397 |                 # Log both raw and cleaned versions for debugging the failure
398 |                 logger.debug(f"Raw string causing JSONDecodeError in Table Extraction:\n{raw_text}")
399 |                 logger.debug(f"Cleaned string that failed JSON parsing in Table Extraction:\n{text_to_parse}")
400 |                 # Print a rich traceback to the console
401 |                 console.print("[bold red]-- Traceback for JSONDecodeError (Table Extraction) --[/bold red]")
402 |                 console.print(Traceback())
403 |                 console.print("[bold red]-- End Traceback --[/bold red]")
404 |                 
405 |             # If JSON parsing fails, show the raw response using the original raw_text
406 |             console.print(Panel(
407 |                 raw_text,
408 |                 title="[yellow]Raw Model Output (JSON parsing failed)[/yellow]",
409 |                 border_style="red"
410 |             ))
411 |             
412 |     except Exception as e:
413 |         logger.error(f"Error in table extraction: {str(e)}", emoji_key="error")
414 |     # Add exit log
415 |     if USE_DEBUG_LOGS:
416 |         logger.debug("Exiting table_extraction_demo.")
417 | 
418 | async def semantic_schema_inference_demo(provider, tracker: CostTracker):
419 |     """Demonstrate semantic schema inference."""
420 |     if USE_DEBUG_LOGS:
421 |         logger.debug("Entering semantic_schema_inference_demo.")
422 |     if not provider:
423 |         console.print("[yellow]Skipping semantic schema inference demo - no provider available.[/yellow]")
424 |         if USE_DEBUG_LOGS:
425 |             logger.debug("Exiting semantic_schema_inference_demo (no provider).")
426 |         return
427 |         
428 |     logger.info("Starting semantic schema inference demo", emoji_key="start")
429 |     
430 |     # Sample text for schema inference
431 |     text = """
432 |     Patient Record: John Smith
433 |     Date of Birth: 05/12/1978
434 |     Patient ID: P-98765
435 |     Blood Type: O+
436 |     Height: 182 cm
437 |     Weight: 76 kg
438 |     
439 |     Medications:
440 |     - Lisinopril 10mg, once daily
441 |     - Metformin 500mg, twice daily
442 |     - Atorvastatin 20mg, once daily at bedtime
443 |     
444 |     Allergies:
445 |     - Penicillin (severe)
446 |     - Shellfish (mild)
447 |     
448 |     Recent Vital Signs:
449 |     Date: 03/15/2024
450 |     Blood Pressure: 128/85 mmHg
451 |     Heart Rate: 72 bpm
452 |     Temperature: 98.6°F
453 |     Oxygen Saturation: 98%
454 |     
455 |     Medical History:
456 |     - Type 2 Diabetes (diagnosed 2015)
457 |     - Hypertension (diagnosed 2017)
458 |     - Hyperlipidemia (diagnosed 2019)
459 |     - Appendectomy (2005)
460 |     """
461 |     
462 |     # Define a schema template for the extraction
463 |     patient_schema = {
464 |         "type": "object",
465 |         "properties": {
466 |             "patient": {
467 |                 "type": "object",
468 |                 "properties": {
469 |                     "name": {"type": "string"},
470 |                     "dob": {"type": "string"},
471 |                     "id": {"type": "string"},
472 |                     "blood_type": {"type": "string"},
473 |                     "height": {"type": "string"},
474 |                     "weight": {"type": "string"}
475 |                 }
476 |             },
477 |             "medications": {
478 |                 "type": "array",
479 |                 "items": {
480 |                     "type": "object",
481 |                     "properties": {
482 |                         "name": {"type": "string"},
483 |                         "dosage": {"type": "string"},
484 |                         "frequency": {"type": "string"}
485 |                     }
486 |                 }
487 |             },
488 |             "allergies": {
489 |                 "type": "array",
490 |                 "items": {
491 |                     "type": "object",
492 |                     "properties": {
493 |                         "allergen": {"type": "string"},
494 |                         "severity": {"type": "string"}
495 |                     }
496 |                 }
497 |             },
498 |             "vital_signs": {
499 |                 "type": "object",
500 |                 "properties": {
501 |                     "date": {"type": "string"},
502 |                     "blood_pressure": {"type": "string"},
503 |                     "heart_rate": {"type": "string"},
504 |                     "temperature": {"type": "string"},
505 |                     "oxygen_saturation": {"type": "string"}
506 |                 }
507 |             },
508 |             "medical_history": {
509 |                 "type": "array",
510 |                 "items": {
511 |                     "type": "object",
512 |                     "properties": {
513 |                         "condition": {"type": "string"},
514 |                         "diagnosed": {"type": "string"}
515 |                     }
516 |                 }
517 |             }
518 |         }
519 |     }
520 |     
521 |     # Log schema inference attempt
522 |     logger.info("Performing schema inference", emoji_key="processing")
523 |     
524 |     try:
525 |         start_time = time.time()
526 |         
527 |         # Prompt for semantic schema extraction
528 |         prompt = f"""
529 |         Extract structured information from the text according to the provided semantic schema.
530 |         
531 |         TEXT:
532 |         {text}
533 |         
534 |         SEMANTIC SCHEMA:
535 |         {json.dumps(patient_schema, indent=2)}
536 |         
537 |         Analyze the text and extract information following the schema structure. Return a valid JSON object.
538 |         """
539 |         
540 |         if USE_DEBUG_LOGS:
541 |             logger.debug(f"Schema Inference Prompt:\n{prompt}")
542 |         
543 |         # Call the provider directly
544 |         result = await provider.generate_completion(
545 |             prompt=prompt,
546 |             model="gpt-4.1-mini",
547 |             temperature=0.2,
548 |             max_tokens=1000
549 |         )
550 |         
551 |         # Track cost
552 |         tracker.add_call(result)
553 | 
554 |         if USE_DEBUG_LOGS:
555 |             logger.debug(f"Raw Schema Inference Result Text:\n{result.text}")
556 |         
557 |         try:
558 |             # Try to parse the response as JSON
559 |             raw_text = result.text.strip()
560 |             text_to_parse = extract_json_from_markdown(raw_text)
561 |             if USE_DEBUG_LOGS:
562 |                 logger.debug(f"Raw text received (Schema): {raw_text[:500]}...")
563 |                 logger.debug(f"Attempting to parse Schema Inference JSON after cleaning: {text_to_parse[:500]}...")
564 |             json_result = json.loads(text_to_parse)
565 |             if USE_DEBUG_LOGS:
566 |                 logger.debug(f"Successfully parsed Schema Inference JSON: {json.dumps(json_result, indent=2)}")
567 |             
568 |             # Create structured data dictionary for display
569 |             structured_result_data = {
570 |                 "extracted_data": json_result,
571 |                 "model": result.model,
572 |                 "processing_time": time.time() - start_time,
573 |                 "tokens": {
574 |                     "input": result.input_tokens,
575 |                     "output": result.output_tokens,
576 |                     "total": result.input_tokens + result.output_tokens
577 |                 },
578 |                 "cost": result.cost
579 |             }
580 |             
581 |             # Parse the result using the shared utility
582 |             parse_and_display_result(
583 |                 "Semantic Schema Inference Demo", 
584 |                 {"text": text}, 
585 |                 structured_result_data # Pass the structured data
586 |             )
587 |             
588 |         except json.JSONDecodeError as e:
589 |             # Log the error regardless of debug flag
590 |             logger.error(f"JSONDecodeError in Schema Inference occurred: {e}", exc_info=False)
591 |             
592 |             if USE_DEBUG_LOGS:
593 |                 # Log both raw and cleaned versions
594 |                 logger.debug(f"Raw string causing JSONDecodeError in Schema Inference:\n{raw_text}")
595 |                 logger.debug(f"Cleaned string that failed JSON parsing in Schema Inference:\n{text_to_parse}")
596 |                 # Print a rich traceback to the console
597 |                 console.print("[bold red]-- Traceback for JSONDecodeError (Schema Inference) --[/bold red]")
598 |                 console.print(Traceback())
599 |                 console.print("[bold red]-- End Traceback --[/bold red]")
600 |                 
601 |             # If JSON parsing fails, show the raw response
602 |             console.print(Panel(
603 |                 raw_text,
604 |                 title="[yellow]Raw Model Output (JSON parsing failed)[/yellow]",
605 |                 border_style="red"
606 |             ))
607 |             
608 |     except Exception as e:
609 |         logger.error(f"Error in schema inference: {str(e)}", emoji_key="error")
610 |     # Add exit log
611 |     if USE_DEBUG_LOGS:
612 |         logger.debug("Exiting semantic_schema_inference_demo.")
613 | 
614 | async def entity_extraction_demo(provider, tracker: CostTracker):
615 |     """Demonstrate entity extraction capabilities."""
616 |     if USE_DEBUG_LOGS:
617 |         logger.debug("Entering entity_extraction_demo.")
618 |     if not provider:
619 |         console.print("[yellow]Skipping entity extraction demo - no provider available.[/yellow]")
620 |         if USE_DEBUG_LOGS:
621 |             logger.debug("Exiting entity_extraction_demo (no provider).")
622 |         return
623 |         
624 |     logger.info("Starting entity extraction demo", emoji_key="start")
625 |     
626 |     # Sample text for entity extraction
627 |     text = """
628 |     In a groundbreaking announcement on March 15, 2024, Tesla unveiled its latest solar energy
629 |     technology in partnership with SolarCity. CEO Elon Musk presented the new PowerWall 4.0 
630 |     battery system at their headquarters in Austin, Texas. The system can store up to 20kWh of 
631 |     energy and costs approximately $6,500 per unit.
632 |     
633 |     According to Dr. Maria Chen, lead researcher at the National Renewable Energy Laboratory (NREL),
634 |     this technology represents a significant advancement in residential energy storage. The new
635 |     system integrates with the Tesla mobile app on both iOS and Android platforms, allowing users
636 |     to monitor energy usage in real-time.
637 |     
638 |     Tesla stock (TSLA) rose 5.8% following the announcement, reaching $248.32 per share on the NASDAQ.
639 |     The company plans to begin production at their Gigafactory Nevada location by June 2024, with
640 |     initial deployments in California and Texas markets.
641 |     """
642 |     
643 |     # Log entity extraction attempt
644 |     logger.info("Performing entity extraction", emoji_key="processing")
645 |     
646 |     try:
647 |         start_time = time.time()
648 |         
649 |         # Prompt for entity extraction
650 |         prompt = f"""
651 |         Extract key-value pairs and entities from the following text, categorized by type.
652 |         
653 |         TEXT:
654 |         {text}
655 |         
656 |         Extract the following categories of information:
657 |         - Organizations (companies, institutions, etc.)
658 |         - People (names and titles)
659 |         - Locations (cities, states, facilities, etc.)
660 |         - Dates and Times
661 |         - Products and Technologies
662 |         - Numerical Values (monetary values, percentages, measurements, etc.)
663 |         
664 |         Format the output as a JSON object with these categories as keys, and each containing relevant entities found.
665 |         Within each category, provide structured information when possible.
666 |         """
667 |         
668 |         if USE_DEBUG_LOGS:
669 |             logger.debug(f"Entity Extraction Prompt:\n{prompt}")
670 |             
671 |         # Call the provider directly
672 |         result = await provider.generate_completion(
673 |             prompt=prompt,
674 |             model="gpt-4.1-mini", 
675 |             temperature=0.2,
676 |             max_tokens=500
677 |         )
678 |         
679 |         # Track cost
680 |         tracker.add_call(result)
681 | 
682 |         if USE_DEBUG_LOGS:
683 |             logger.debug(f"Raw Entity Extraction Result Text:\n{result.text}")
684 |             
685 |         try:
686 |             # Try to parse the response as JSON
687 |             raw_text = result.text.strip()
688 |             text_to_parse = extract_json_from_markdown(raw_text)
689 |             if USE_DEBUG_LOGS:
690 |                 logger.debug(f"Raw text received (Entity): {raw_text[:500]}...")
691 |                 logger.debug(f"Attempting to parse Entity Extraction JSON after cleaning: {text_to_parse[:500]}...")
692 |             if USE_DEBUG_LOGS:
693 |                 logger.debug(f"EXACT STRING PASSED TO json.loads: >>>{text_to_parse}<<<")
694 |             
695 |             try:
696 |                 # First try standard parsing
697 |                 json_result = json.loads(text_to_parse)
698 |             except json.JSONDecodeError as e:
699 |                 logger.warning(f"Standard JSON parsing failed: {e}. Attempting emergency repair.")
700 |                 
701 |                 # Emergency fallback for malformed JSON due to unterminated strings
702 |                 # 1. Look for the raw JSON structure with markdown removed
703 |                 text_no_markdown = text_to_parse
704 |                 
705 |                 # 2. Manually check for key entity categories, even if JSON is malformed
706 |                 # Create a structured result with categories we expect to find
707 |                 json_result = {
708 |                     "Organizations": [],
709 |                     "People": [],
710 |                     "Locations": [],
711 |                     "Dates and Times": [],
712 |                     "Products and Technologies": [],
713 |                     "Numerical Values": []
714 |                 }
715 |                 
716 |                 # Look for entity categories using regex
717 |                 org_matches = re.findall(r'"name"\s*:\s*"([^"]+)".*?"type"\s*:\s*"([^"]+)"', text_no_markdown)
718 |                 for name, entity_type in org_matches:
719 |                     # Determine which category this entity belongs to based on type
720 |                     if any(keyword in entity_type.lower() for keyword in ["company", "corporation", "institution", "exchange"]):
721 |                         json_result["Organizations"].append({"name": name, "type": entity_type})
722 |                     elif any(keyword in entity_type.lower() for keyword in ["city", "state", "facility"]):
723 |                         json_result["Locations"].append({"name": name, "type": entity_type})
724 |                     elif any(keyword in entity_type.lower() for keyword in ["battery", "app", "system", "technology"]):
725 |                         json_result["Products and Technologies"].append({"name": name, "type": entity_type})
726 |                 
727 |                 # Look for people - they usually have titles and organizations
728 |                 people_matches = re.findall(r'"name"\s*:\s*"([^"]+)".*?"title"\s*:\s*"([^"]+)".*?"organization"\s*:\s*"([^"]*)"', text_no_markdown)
729 |                 for name, title, org in people_matches:
730 |                     json_result["People"].append({"name": name, "title": title, "organization": org})
731 |                 
732 |                 # Dates and numerical values are harder to extract generically
733 |                 # but we can look for obvious patterns
734 |                 date_matches = re.findall(r'"date"\s*:\s*"([^"]+)".*?"event"\s*:\s*"([^"]+)"', text_no_markdown)
735 |                 for date, event in date_matches:
736 |                     json_result["Dates and Times"].append({"date": date, "event": event})
737 |                 
738 |                 # For numerical values, look for values with units
739 |                 value_matches = re.findall(r'"value"\s*:\s*([^,]+).*?"unit"\s*:\s*"([^"]+)"', text_no_markdown)
740 |                 for value, unit in value_matches:
741 |                     # Clean up the value
742 |                     clean_value = value.strip('" ')
743 |                     item = {"value": clean_value, "unit": unit}
744 |                     
745 |                     # Look for a description if available
746 |                     desc_match = re.search(r'"description"\s*:\s*"([^"]+)"', text_no_markdown)
747 |                     if desc_match:
748 |                         item["description"] = desc_match.group(1)
749 |                         
750 |                     json_result["Numerical Values"].append(item)
751 |                 
752 |                 # Add a note about emergency repair
753 |                 logger.warning("Used emergency JSON repair - results may be incomplete")
754 |             
755 |             if USE_DEBUG_LOGS:
756 |                 logger.debug(f"Successfully parsed Entity Extraction JSON: {json.dumps(json_result, indent=2)}")
757 |             
758 |             # Create structured data dictionary for display
759 |             structured_result_data = {
760 |                 "extracted_data": json_result,
761 |                 "structured": True,
762 |                 "categorized": True,
763 |                 "model": result.model,
764 |                 "processing_time": time.time() - start_time,
765 |                 "tokens": {
766 |                     "input": result.input_tokens,
767 |                     "output": result.output_tokens,
768 |                     "total": result.input_tokens + result.output_tokens
769 |                 },
770 |                 "cost": result.cost
771 |             }
772 |             
773 |             # Parse the result using the shared utility
774 |             parse_and_display_result(
775 |                 "Entity Extraction Demo", 
776 |                 {"text": text}, 
777 |                 structured_result_data # Pass the structured data
778 |             )
779 |             
780 |         except json.JSONDecodeError as e:
781 |             # Log the error regardless of debug flag
782 |             logger.error(f"JSONDecodeError in Entity Extraction occurred: {e}", exc_info=False)
783 |             
784 |             if USE_DEBUG_LOGS:
785 |                 # Log both raw and cleaned versions
786 |                 logger.debug(f"Raw string causing JSONDecodeError in Entity Extraction:\n{raw_text}")
787 |                 logger.debug(f"Cleaned string that failed JSON parsing in Entity Extraction:\n{text_to_parse}")
788 |                 # Print a rich traceback to the console
789 |                 console.print("[bold red]-- Traceback for JSONDecodeError (Entity Extraction) --[/bold red]")
790 |                 console.print(Traceback())
791 |                 console.print("[bold red]-- End Traceback --[/bold red]")
792 |                 
793 |             # If JSON parsing fails, show the raw response
794 |             console.print(Panel(
795 |                 raw_text,
796 |                 title="[yellow]Raw Model Output (JSON parsing failed)[/yellow]",
797 |                 border_style="red"
798 |             ))
799 |             
800 |     except Exception as e:
801 |         logger.error(f"Error in entity extraction: {str(e)}", emoji_key="error")
802 |     # Add exit log
803 |     if USE_DEBUG_LOGS:
804 |         logger.debug("Exiting entity_extraction_demo.")
805 | 
806 | async def main():
807 |     """Run the advanced extraction demos."""
808 |     tracker = CostTracker() # Instantiate tracker
809 |     provider = await setup_openai_provider()
810 |     
811 |     if not provider:
812 |         logger.warning("OpenAI provider not available. Demo sections requiring it will be skipped.", emoji_key="warning")
813 |         
814 |     console.print(Rule("[bold magenta]Advanced Extraction Demos Starting[/bold magenta]"))
815 |     
816 |     demos_to_run = [
817 |         (run_json_extraction_example, "JSON Extraction"),
818 |         (table_extraction_demo, "Table Extraction"),
819 |         (semantic_schema_inference_demo, "Schema Inference"),
820 |         (entity_extraction_demo, "Entity Extraction")
821 |     ]
822 |     
823 |     # Execute demos sequentially
824 |     for demo_func, demo_name in demos_to_run:
825 |         try:
826 |             await demo_func(provider, tracker) # Pass tracker
827 |         except Exception as e:
828 |             logger.error(f"Error running {demo_name} demo: {e}", emoji_key="error", exc_info=True)
829 |     
830 |     # Display final cost summary
831 |     tracker.display_summary(console)
832 | 
833 |     logger.success("Advanced Extraction Demo finished successfully!", emoji_key="complete")
834 |     console.print(Rule("[bold magenta]Advanced Extraction Demos Complete[/bold magenta]"))
835 | 
836 | if __name__ == "__main__":
837 |     # Run the demos
838 |     exit_code = asyncio.run(main())
839 |     sys.exit(exit_code) 
```

--------------------------------------------------------------------------------
/ultimate_mcp_server/core/providers/anthropic.py:
--------------------------------------------------------------------------------

```python
  1 | # ultimate_mcp_server/providers/anthropic.py
  2 | """Anthropic (Claude) provider implementation."""
  3 | 
  4 | import json
  5 | import re
  6 | import time
  7 | from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple
  8 | 
  9 | from anthropic import AsyncAnthropic
 10 | 
 11 | from ultimate_mcp_server.constants import Provider, TaskType  # Import TaskType for logging
 12 | from ultimate_mcp_server.core.providers.base import (
 13 |     BaseProvider,
 14 |     ModelResponse,
 15 | )
 16 | from ultimate_mcp_server.utils import get_logger
 17 | 
 18 | # Use the same naming scheme everywhere: logger at module level
 19 | logger = get_logger("ultimate_mcp_server.providers.anthropic")
 20 | 
 21 | 
 22 | class AnthropicProvider(BaseProvider):
 23 |     """Provider implementation for Anthropic (Claude) API."""
 24 | 
 25 |     provider_name = Provider.ANTHROPIC.value
 26 | 
 27 |     def __init__(self, api_key: Optional[str] = None, **kwargs):
 28 |         """Initialize the Anthropic provider.
 29 | 
 30 |         Args:
 31 |             api_key: Anthropic API key
 32 |             **kwargs: Additional options (e.g., base_url)
 33 |         """
 34 |         super().__init__(api_key=api_key, **kwargs)
 35 |         self.base_url = kwargs.get("base_url")
 36 |         self.models_cache = None
 37 |         self.client: Optional[AsyncAnthropic] = None  # Initialize client attribute
 38 | 
 39 |     async def initialize(self) -> bool:
 40 |         """Initialize the Anthropic client.
 41 | 
 42 |         Returns:
 43 |             bool: True if initialization was successful
 44 |         """
 45 |         if not self.api_key:
 46 |             self.logger.error("Anthropic API key is not configured.", emoji_key="error")
 47 |             return False
 48 | 
 49 |         try:
 50 |             self.client = AsyncAnthropic(
 51 |                 api_key=self.api_key,
 52 |                 base_url=self.base_url,
 53 |             )
 54 | 
 55 |             # Skip API call if using a mock key (for tests)
 56 |             if "mock-" in self.api_key:
 57 |                 self.logger.info(
 58 |                     "Using mock Anthropic key - skipping API validation", emoji_key="mock"
 59 |                 )
 60 |                 # Assume mock initialization is always successful for testing purposes
 61 |                 self.is_initialized = True
 62 |                 return True
 63 | 
 64 |             # Optional: Add a quick check_api_key() call here if desired,
 65 |             # but initialize might succeed even if key is invalid later.
 66 |             # is_valid = await self.check_api_key() # This makes initialize slower
 67 |             # if not is_valid:
 68 |             #     self.logger.error("Anthropic API key appears invalid.", emoji_key="error")
 69 |             #     return False
 70 | 
 71 |             self.logger.success("Anthropic provider initialized successfully", emoji_key="provider")
 72 |             self.is_initialized = True  # Mark as initialized
 73 |             return True
 74 | 
 75 |         except Exception as e:
 76 |             self.logger.error(
 77 |                 f"Failed to initialize Anthropic provider: {str(e)}",
 78 |                 emoji_key="error",
 79 |                 exc_info=True,  # Log traceback for debugging
 80 |             )
 81 |             self.is_initialized = False
 82 |             return False
 83 | 
 84 |     async def generate_completion(
 85 |         self,
 86 |         prompt: Optional[str] = None,
 87 |         messages: Optional[List[Dict[str, Any]]] = None,
 88 |         model: Optional[str] = None,
 89 |         max_tokens: Optional[int] = 1024,  # Signature default
 90 |         temperature: float = 0.7,
 91 |         json_mode: bool = False,
 92 |         **kwargs,
 93 |     ) -> ModelResponse:
 94 |         """Generate a single non-chat completion using Anthropic Claude.
 95 | 
 96 |         Args:
 97 |             prompt: Text prompt to send to the model.
 98 |             messages: List of message dictionaries, alternative to prompt.
 99 |             model: Model name to use (e.g., "claude-3-opus-20240229").
100 |             max_tokens: Maximum tokens to generate. Defaults to 1024.
101 |             temperature: Temperature parameter (0.0-1.0).
102 |             json_mode: If True, attempt to guide model towards JSON output (via prompting).
103 |             **kwargs: Additional model-specific parameters (e.g., top_p, system).
104 | 
105 |         Returns:
106 |             ModelResponse object.
107 |         """
108 |         if not self.client:
109 |             if not await self.initialize():
110 |                 raise ConnectionError("Anthropic provider failed to initialize.")
111 | 
112 |         model = model or self.get_default_model()
113 |         actual_model_name = self.strip_provider_prefix(model)
114 | 
115 |         # Original logic: Validate that either prompt or messages is provided
116 |         if prompt is None and not messages:
117 |             raise ValueError("Either 'prompt' or 'messages' must be provided")
118 |             
119 |         # Original logic: If messages are provided, use the chat_completion function
120 |         if messages:
121 |             # Ensure all necessary parameters are passed to generate_chat_completion
122 |             # This includes system_prompt if it's in kwargs
123 |             return await self.generate_chat_completion(
124 |                 messages=messages,
125 |                 model=model, # Pass original model ID
126 |                 max_tokens=max_tokens,
127 |                 temperature=temperature,
128 |                 json_mode=json_mode, # Pass json_mode
129 |                 **kwargs # Pass other kwargs like system, top_p etc.
130 |             )
131 | 
132 |         # Original logic: Prepare message list for the API from prompt
133 |         # This path is taken if only 'prompt' is provided (and not 'messages')
134 |         current_api_messages = [{"role": "user", "content": prompt}]
135 | 
136 |         # Original logic: Handle system prompt if passed in kwargs for the simple prompt case
137 |         system_prompt = kwargs.pop("system", None)
138 | 
139 |         # Original logic: Handle JSON mode for simple prompt case
140 |         if json_mode:
141 |             self.logger.debug(
142 |                 "json_mode=True requested for completion (simple prompt), modifying user message for Anthropic."
143 |             )
144 |             # Modify the user message content in current_api_messages
145 |             user_message_idx = -1
146 |             for i, msg in enumerate(current_api_messages):
147 |                 if msg["role"] == "user":
148 |                     user_message_idx = i
149 |                     break
150 |             
151 |             if user_message_idx != -1:
152 |                 original_content = current_api_messages[user_message_idx]["content"]
153 |                 if isinstance(original_content, str) and "Please respond with valid JSON" not in original_content:
154 |                      current_api_messages[user_message_idx]["content"] = (
155 |                         f"{original_content}\\nPlease respond ONLY with valid JSON matching the expected schema. Do not include explanations or markdown formatting."
156 |                     )
157 |             else:
158 |                 # This case should ideally not happen if prompt is always user role.
159 |                 # If it could, one might append a new user message asking for JSON,
160 |                 # or include it in system prompt if system_prompt is being constructed here.
161 |                 self.logger.warning("Could not find user message to append JSON instruction for simple prompt case.")
162 | 
163 |         # Prepare API call parameters using max_tokens directly from signature
164 |         api_params = {
165 |             "messages": current_api_messages,
166 |             "model": actual_model_name,
167 |             "max_tokens": max_tokens, # Uses max_tokens from signature (which defaults to 1024 if not passed)
168 |             "temperature": temperature,
169 |             **kwargs,  # Pass remaining kwargs (like top_p, etc.) that were not popped
170 |         }
171 |         if system_prompt: # Add system prompt if it was extracted
172 |             api_params["system"] = system_prompt
173 |         
174 |         # Logging before API call (original style)
175 |         self.logger.info(
176 |             f"Generating completion with Anthropic model {actual_model_name}",
177 |             emoji_key=TaskType.COMPLETION.value,
178 |             prompt_length=len(prompt) if prompt else 0, # length of prompt if provided
179 |             json_mode_requested=json_mode,
180 |         )
181 | 
182 |         try:
183 |             response, processing_time = await self.process_with_timer(
184 |                 self.client.messages.create, **api_params
185 |             )
186 |         except Exception as e:
187 |             error_message = f"Anthropic API error during completion for model {actual_model_name}: {type(e).__name__}: {str(e)}"
188 |             self.logger.error(error_message, exc_info=True)
189 |             raise ConnectionError(error_message) from e
190 | 
191 |         if (
192 |             not response.content
193 |             or not isinstance(response.content, list)
194 |             or not hasattr(response.content[0], "text")
195 |         ):
196 |             raise ValueError(f"Unexpected response format from Anthropic API: {response}")
197 |         completion_text = response.content[0].text
198 | 
199 |         # Post-process if JSON mode was requested (for simple prompt case) - best effort extraction
200 |         if json_mode: # This json_mode is the original parameter
201 |             original_text_for_json_check = completion_text
202 |             completion_text = self._extract_json_from_text(completion_text)
203 |             if original_text_for_json_check != completion_text:
204 |                 self.logger.debug("Extracted JSON content from Anthropic response post-processing (simple prompt case).")
205 | 
206 |         result = ModelResponse(
207 |             text=completion_text,
208 |             model=f"{self.provider_name}/{actual_model_name}",
209 |             provider=self.provider_name,
210 |             input_tokens=response.usage.input_tokens,
211 |             output_tokens=response.usage.output_tokens,
212 |             processing_time=processing_time,
213 |             raw_response=response.model_dump(),
214 |         )
215 |         result.message = {"role": "assistant", "content": completion_text}
216 | 
217 |         self.logger.success(
218 |             "Anthropic completion successful",
219 |             emoji_key="success",
220 |             model=result.model,
221 |             tokens={"input": result.input_tokens, "output": result.output_tokens},
222 |             cost=result.cost,
223 |             time=result.processing_time,
224 |         )
225 |         return result
226 | 
227 |     # --- NEW METHOD ---
228 |     async def generate_chat_completion(
229 |         self,
230 |         messages: List[
231 |             Dict[str, Any]
232 |         ],  # Use Dict for broader compatibility, or specific MessageParam type
233 |         model: Optional[str] = None,
234 |         max_tokens: Optional[int] = 1024,  # Provide a default
235 |         temperature: float = 0.7,
236 |         json_mode: bool = False,  # Add json_mode parameter
237 |         **kwargs,
238 |     ) -> ModelResponse:
239 |         """Generate a chat completion using Anthropic Claude.
240 | 
241 |         Args:
242 |             messages: A list of message dictionaries (e.g., [{"role": "user", "content": "..."}]).
243 |                       Should conform to Anthropic's expected format.
244 |             model: Model name to use (e.g., "claude-3-opus-20240229").
245 |             max_tokens: Maximum tokens to generate. Defaults to 1024.
246 |             temperature: Temperature parameter (0.0-1.0).
247 |             json_mode: If True, guide the model to generate JSON output (via prompt engineering).
248 |             **kwargs: Additional model-specific parameters (e.g., top_p, system).
249 | 
250 |         Returns:
251 |             ModelResponse object containing the assistant's message.
252 |         """
253 |         if not self.client:
254 |             if not await self.initialize():
255 |                 raise ConnectionError("Anthropic provider failed to initialize.")
256 | 
257 |         model = model or self.get_default_model()
258 |         actual_model_name = self.strip_provider_prefix(model)
259 | 
260 |         # Handle system prompt extraction
261 |         system_prompt = kwargs.pop("system", None)
262 |         
263 |         # Process the messages to extract system message and convert to Anthropic format
264 |         processed_messages = []
265 |         extracted_system = None
266 |         
267 |         for msg in messages:
268 |             role = msg.get("role", "")
269 |             content = msg.get("content", "")
270 |             
271 |             # Extract system message if present
272 |             if role == "system":
273 |                 if extracted_system is None:  # Take the first system message
274 |                     extracted_system = content
275 |                 # Don't add system messages to the processed_messages list
276 |                 continue
277 |             elif role in ("user", "assistant"):
278 |                 # Keep user and assistant messages
279 |                 processed_messages.append({"role": role, "content": content})
280 |             else:
281 |                 self.logger.warning(f"Ignoring unsupported message role: {role}")
282 |                 
283 |         # If we found a system message, use it (overrides any system in kwargs)
284 |         if extracted_system is not None:
285 |             system_prompt = extracted_system
286 | 
287 |         # Process json_mode by modifying system prompt or last user message
288 |         json_mode_requested = json_mode
289 |         
290 |         if json_mode_requested:
291 |             self.logger.debug(
292 |                 "json_mode=True requested for chat completion, implementing via prompt engineering for Anthropic"
293 |             )
294 |             
295 |             # If we have a system prompt, update it to include JSON instructions
296 |             if system_prompt:
297 |                 system_prompt = f"{system_prompt}\n\nIMPORTANT: You must respond ONLY with valid JSON matching the expected schema. Do not include explanations or markdown formatting."
298 |             # Otherwise, if there's at least one user message, modify the last one
299 |             elif processed_messages and any(m.get("role") == "user" for m in processed_messages):
300 |                 # Find last user message
301 |                 for i in range(len(processed_messages) - 1, -1, -1):
302 |                     if processed_messages[i].get("role") == "user":
303 |                         user_content = processed_messages[i].get("content", "")
304 |                         # Only add JSON instruction if not already present
305 |                         if "respond with JSON" not in user_content and "respond in JSON" not in user_content:
306 |                             processed_messages[i]["content"] = f"{user_content}\n\nPlease respond ONLY with valid JSON. Do not include explanations or markdown formatting."
307 |                         break
308 |             # If neither system prompt nor user messages to modify, add a system prompt
309 |             else:
310 |                 system_prompt = "You must respond ONLY with valid JSON. Do not include explanations or markdown formatting."
311 | 
312 |         # Prepare API call parameters
313 |         api_params = {
314 |             "messages": processed_messages,
315 |             "model": actual_model_name,
316 |             "max_tokens": max_tokens,
317 |             "temperature": temperature,
318 |             **kwargs,  # Pass remaining kwargs (like top_p, etc.)
319 |         }
320 |         if system_prompt:
321 |             api_params["system"] = system_prompt
322 | 
323 |         self.logger.info(
324 |             f"Generating chat completion with Anthropic model {actual_model_name}",
325 |             emoji_key=TaskType.CHAT.value,  # Use enum value
326 |             message_count=len(processed_messages),
327 |             json_mode_requested=json_mode_requested,  # Log if it was requested
328 |         )
329 | 
330 |         try:
331 |             response, processing_time = await self.process_with_timer(
332 |                 self.client.messages.create, **api_params
333 |             )
334 |         except Exception as e:
335 |             error_message = f"Anthropic API error during chat completion for model {actual_model_name}: {type(e).__name__}: {str(e)}"
336 |             self.logger.error(error_message, exc_info=True)
337 |             raise ConnectionError(error_message) from e
338 | 
339 |         # Extract response content
340 |         if (
341 |             not response.content
342 |             or not isinstance(response.content, list)
343 |             or not hasattr(response.content[0], "text")
344 |         ):
345 |             raise ValueError(f"Unexpected response format from Anthropic API: {response}")
346 |         assistant_content = response.content[0].text
347 | 
348 |         # Create standardized response including the assistant message
349 |         result = ModelResponse(
350 |             text=assistant_content,  # Keep raw text accessible
351 |             model=f"{self.provider_name}/{actual_model_name}",  # Return prefixed model ID
352 |             provider=self.provider_name,
353 |             input_tokens=response.usage.input_tokens,
354 |             output_tokens=response.usage.output_tokens,
355 |             processing_time=processing_time,
356 |             raw_response=response.model_dump(),  # Use model_dump() if Pydantic
357 |         )
358 |         
359 |         # Add message to result for chat_completion
360 |         result.message = {"role": "assistant", "content": assistant_content}
361 | 
362 |         # Log success
363 |         self.logger.success(
364 |             "Anthropic chat completion successful",
365 |             emoji_key="success",
366 |             model=result.model,
367 |             tokens={"input": result.input_tokens, "output": result.output_tokens},
368 |             cost=result.cost,
369 |             time=result.processing_time,
370 |         )
371 | 
372 |         return result
373 | 
374 |     # --- END NEW METHOD ---
375 | 
376 |     async def generate_completion_stream(
377 |         self,
378 |         # Keep existing signature: accepts prompt primarily, but also messages/system in kwargs
379 |         prompt: Optional[str] = None,  # Make prompt optional if messages are primary input
380 |         messages: Optional[List[Dict[str, Any]]] = None,  # Allow messages directly
381 |         model: Optional[str] = None,
382 |         max_tokens: Optional[int] = 1024,  # Default max_tokens
383 |         temperature: float = 0.7,
384 |         json_mode: bool = False,  # Accept json_mode flag
385 |         **kwargs,
386 |     ) -> AsyncGenerator[Tuple[str, Dict[str, Any]], None]:
387 |         """Generate a streaming completion using Anthropic Claude. Handles both prompt and message inputs.
388 | 
389 |         Args:
390 |             prompt: (Optional) Text prompt (if messages not provided).
391 |             messages: (Optional) List of message dictionaries. Takes precedence over prompt.
392 |             model: Model name to use.
393 |             max_tokens: Maximum tokens to generate. Defaults to 1024.
394 |             temperature: Temperature parameter.
395 |             json_mode: If True, guides model towards JSON (via prompting if using prompt input).
396 |             **kwargs: Additional parameters (system, top_p, etc.).
397 | 
398 |         Yields:
399 |             Tuple of (text_chunk, metadata).
400 | 
401 |         Raises:
402 |             ConnectionError: If provider initialization fails or API call fails.
403 |             ValueError: If neither prompt nor messages are provided.
404 |         """
405 |         if not self.client:
406 |             if not await self.initialize():
407 |                 raise ConnectionError("Anthropic provider failed to initialize.")
408 | 
409 |         model = model or self.get_default_model()
410 |         actual_model_name = self.strip_provider_prefix(model)
411 | 
412 |         # Prepare system prompt if provided in kwargs
413 |         system_prompt = kwargs.pop("system", None)
414 | 
415 |         # Determine input messages: Use 'messages' if provided, otherwise construct from 'prompt'
416 |         if messages:
417 |             # Process the messages to extract system message and convert to Anthropic format
418 |             processed_messages = []
419 |             extracted_system = None
420 |             
421 |             for msg in messages:
422 |                 role = msg.get("role", "")
423 |                 content = msg.get("content", "")
424 |                 
425 |                 # Extract system message if present
426 |                 if role == "system":
427 |                     if extracted_system is None:  # Take the first system message
428 |                         extracted_system = content
429 |                     # Don't add system messages to the processed_messages list
430 |                     continue
431 |                 elif role in ("user", "assistant"):
432 |                     # Keep user and assistant messages
433 |                     processed_messages.append({"role": role, "content": content})
434 |                 else:
435 |                     self.logger.warning(f"Ignoring unsupported message role in streaming: {role}")
436 |                     
437 |             # If we found a system message, use it (overrides any system in kwargs)
438 |             if extracted_system is not None:
439 |                 system_prompt = extracted_system
440 |                 
441 |             input_desc = f"{len(processed_messages)} messages"
442 |         elif prompt:
443 |             # Construct messages from prompt
444 |             processed_messages = [{"role": "user", "content": prompt}]
445 |             input_desc = f"prompt ({len(prompt)} chars)"
446 | 
447 |             # Apply JSON mode prompt modification ONLY if using prompt input
448 |             if json_mode:
449 |                 self.logger.debug(
450 |                     "json_mode=True requested for stream completion, modifying prompt for Anthropic."
451 |                 )
452 |                 user_message = processed_messages[-1]
453 |                 original_content = user_message["content"]
454 |                 if "Please respond with valid JSON" not in original_content:
455 |                     user_message["content"] = (
456 |                         f"{original_content}\nPlease respond ONLY with valid JSON matching the expected schema. Do not include explanations or markdown formatting."
457 |                     )
458 |         else:
459 |             raise ValueError(
460 |                 "Either 'prompt' or 'messages' must be provided for generate_completion_stream"
461 |             )
462 | 
463 |         # Apply JSON mode to system prompt if using messages input and json_mode is True
464 |         json_mode_requested = kwargs.pop("json_mode", json_mode)  # Keep track if it was requested
465 |         if json_mode_requested and messages:
466 |             if system_prompt:
467 |                 system_prompt = f"{system_prompt}\n\nIMPORTANT: You must respond ONLY with valid JSON matching the expected schema. Do not include explanations or markdown formatting."
468 |             else:
469 |                 system_prompt = "You must respond ONLY with valid JSON matching the expected schema. Do not include explanations or markdown formatting."
470 | 
471 |         # Prepare API call parameters
472 |         params = {
473 |             "model": actual_model_name,
474 |             "messages": processed_messages,
475 |             "temperature": temperature,
476 |             "max_tokens": max_tokens,  # Use the default or provided value
477 |             **kwargs,  # Pass remaining kwargs
478 |         }
479 |         if system_prompt:
480 |             params["system"] = system_prompt
481 | 
482 |         self.logger.info(
483 |             f"Generating streaming completion with Anthropic model {actual_model_name}",
484 |             emoji_key=self.provider_name,
485 |             input_type=input_desc,
486 |             json_mode_requested=json_mode_requested,
487 |         )
488 | 
489 |         start_time = time.time()
490 |         total_chunks = 0
491 |         final_input_tokens = 0
492 |         final_output_tokens = 0
493 |         finish_reason = None  # Track finish reason
494 | 
495 |         try:
496 |             async with self.client.messages.stream(**params) as stream:
497 |                 async for chunk in stream:
498 |                     # Extract text delta
499 |                     if chunk.type == "content_block_delta":
500 |                         content = chunk.delta.text
501 |                         total_chunks += 1
502 |                         metadata = {
503 |                             "model": f"{self.provider_name}/{actual_model_name}",
504 |                             "provider": self.provider_name,
505 |                             "chunk_index": total_chunks,
506 |                             "finish_reason": None,  # Not final yet
507 |                         }
508 |                         yield content, metadata
509 | 
510 |                     # Don't attempt to capture usage from delta chunks - wait for final message
511 | 
512 |                 # Important: Get final tokens from the final message state
513 |                 try:
514 |                     final_message = await stream.get_final_message()
515 |                     final_input_tokens = final_message.usage.input_tokens if hasattr(final_message, 'usage') else 0
516 |                     final_output_tokens = final_message.usage.output_tokens if hasattr(final_message, 'usage') else 0
517 |                     # Ensure finish_reason is captured from the final message
518 |                     finish_reason = final_message.stop_reason if hasattr(final_message, 'stop_reason') else "unknown"
519 |                 except Exception as e:
520 |                     # If we can't get the final message for any reason, log it but continue
521 |                     self.logger.warning(f"Couldn't get final message stats: {e}")
522 |                     # Estimate token counts based on total characters / avg chars per token
523 |                     char_count = sum(len(m.get("content", "")) for m in processed_messages)
524 |                     final_input_tokens = char_count // 4  # Rough estimate
525 |                     final_output_tokens = total_chunks * 5  # Very rough estimate
526 | 
527 |             processing_time = time.time() - start_time
528 |             self.logger.success(
529 |                 "Anthropic streaming completion successful",
530 |                 emoji_key="success",
531 |                 model=f"{self.provider_name}/{actual_model_name}",
532 |                 chunks=total_chunks,
533 |                 tokens={"input": final_input_tokens, "output": final_output_tokens},
534 |                 time=processing_time,
535 |                 finish_reason=finish_reason,
536 |             )
537 | 
538 |             # Yield a final empty chunk with aggregated metadata
539 |             final_metadata = {
540 |                 "model": f"{self.provider_name}/{actual_model_name}",
541 |                 "provider": self.provider_name,
542 |                 "chunk_index": total_chunks + 1,
543 |                 "input_tokens": final_input_tokens,
544 |                 "output_tokens": final_output_tokens,
545 |                 "total_tokens": final_input_tokens + final_output_tokens,
546 |                 "processing_time": processing_time,
547 |                 "finish_reason": finish_reason,
548 |             }
549 |             yield "", final_metadata
550 | 
551 |         except Exception as e:
552 |             processing_time = time.time() - start_time
553 |             self.logger.error(
554 |                 f"Anthropic streaming completion failed after {processing_time:.2f}s: {str(e)}",
555 |                 emoji_key="error",
556 |                 model=f"{self.provider_name}/{actual_model_name}",
557 |                 exc_info=True,
558 |             )
559 |             # Yield a final error chunk
560 |             error_metadata = {
561 |                 "model": f"{self.provider_name}/{actual_model_name}",
562 |                 "provider": self.provider_name,
563 |                 "chunk_index": total_chunks + 1,
564 |                 "error": f"{type(e).__name__}: {str(e)}",
565 |                 "finish_reason": "error",
566 |                 "processing_time": processing_time,
567 |             }
568 |             yield "", error_metadata
569 |             # Don't re-raise here, let the caller handle the error chunk
570 | 
571 |     async def list_models(self) -> List[Dict[str, Any]]:
572 |         """List available Anthropic Claude models.
573 | 
574 |         Returns:
575 |             List of model information dictionaries including the provider prefix.
576 |         """
577 |         # Anthropic doesn't have a list models endpoint, return static list WITH prefix
578 |         # Based on the models defined in constants.py
579 |         static_models = [
580 |             # Define with the full ID including provider prefix
581 |             {
582 |                 "id": f"{self.provider_name}/claude-3-7-sonnet-20250219",
583 |                 "name": "Claude 3.7 Sonnet",
584 |                 "context_window": 200000,
585 |                 "input_cost_pmt": 3.0,
586 |                 "output_cost_pmt": 15.0,
587 |                 "features": ["chat", "completion", "vision", "tool_use"],
588 |             },
589 |             {
590 |                 "id": f"{self.provider_name}/claude-3-5-haiku-20241022",
591 |                 "name": "Claude 3.5 Haiku",
592 |                 "context_window": 200000,
593 |                 "input_cost_pmt": 0.80,
594 |                 "output_cost_pmt": 4.0,
595 |                 "features": ["chat", "completion", "vision"],
596 |             },
597 |             {
598 |                 "id": f"{self.provider_name}/claude-3-opus-20240229",
599 |                 "name": "Claude 3 Opus",
600 |                 "context_window": 200000,
601 |                 "input_cost_pmt": 15.0,
602 |                 "output_cost_pmt": 75.0,
603 |                 "features": ["chat", "completion", "vision"],
604 |             },
605 |         ]
606 | 
607 |         # Simple caching (optional, as list is static)
608 |         if not self.models_cache:
609 |             self.models_cache = static_models
610 |         return self.models_cache
611 | 
612 |     def get_default_model(self) -> str:
613 |         """Get the default Anthropic model ID (including provider prefix).
614 | 
615 |         Returns:
616 |             Default model ID string (e.g., "anthropic/claude-3-5-haiku-20241022").
617 |         """
618 |         # Try getting from config first
619 |         from ultimate_mcp_server.config import get_config
620 | 
621 |         default_model_id = f"{self.provider_name}/claude-3-5-haiku-20241022"  # Hardcoded default
622 | 
623 |         try:
624 |             config = get_config()
625 |             # Access nested provider config safely
626 |             provider_config = config.providers.get(self.provider_name) if config.providers else None
627 |             if provider_config and provider_config.default_model:
628 |                 # Ensure the configured default includes the prefix
629 |                 configured_default = provider_config.default_model
630 |                 if not configured_default.startswith(f"{self.provider_name}/"):
631 |                     self.logger.warning(
632 |                         f"Configured default model '{configured_default}' for Anthropic is missing the provider prefix. Using hardcoded default: {default_model_id}"
633 |                     )
634 |                     return default_model_id
635 |                 else:
636 |                     return configured_default
637 |         except (ImportError, AttributeError, TypeError) as e:
638 |             self.logger.debug(
639 |                 f"Could not retrieve default model from config ({e}), using hardcoded default."
640 |             )
641 | 
642 |         return default_model_id
643 | 
644 |     async def check_api_key(self) -> bool:
645 |         """Check if the Anthropic API key is valid by making a minimal request.
646 | 
647 |         Returns:
648 |             bool: True if API key allows a basic request.
649 |         """
650 |         if not self.client:
651 |             self.logger.warning("Cannot check API key: Anthropic client not initialized.")
652 |             # Attempt initialization first
653 |             if not await self.initialize():
654 |                 return False  # Initialization failed, key likely invalid or other issue
655 |             # If initialize succeeded but client still None (e.g., mock key path)
656 |             if not self.client:
657 |                 return True  # Assume mock key is 'valid' for testing
658 | 
659 |         try:
660 |             # Use the *unprefixed* default model name for the check
661 |             default_model_unprefixed = self.strip_provider_prefix(self.get_default_model())
662 |             await self.client.messages.create(
663 |                 model=default_model_unprefixed,
664 |                 messages=[{"role": "user", "content": "Test"}],
665 |                 max_tokens=1,
666 |             )
667 |             self.logger.info("Anthropic API key validation successful.")
668 |             return True
669 |         except Exception as e:
670 |             self.logger.warning(f"Anthropic API key validation failed: {type(e).__name__}")
671 |             return False
672 | 
673 |     def strip_provider_prefix(self, model_id: str) -> str:
674 |         """Removes the provider prefix (e.g., 'anthropic/') from a model ID."""
675 |         prefix = f"{self.provider_name}/"
676 |         if model_id.startswith(prefix):
677 |             return model_id[len(prefix) :]
678 |         # Handle ':' separator as well for backward compatibility if needed
679 |         alt_prefix = f"{self.provider_name}:"
680 |         if model_id.startswith(alt_prefix):
681 |             return model_id[len(alt_prefix) :]
682 |         return model_id  # Return original if no prefix found
683 | 
684 |     def _extract_json_from_text(self, text: str) -> str:
685 |         """Extract JSON content from text that might include markdown code blocks or explanatory text.
686 |         
687 |         Args:
688 |             text: The raw text response that might contain JSON
689 |             
690 |         Returns:
691 |             Cleaned JSON content
692 |         """
693 |         
694 |         # First check if the text is already valid JSON
695 |         try:
696 |             json.loads(text)
697 |             return text  # Already valid JSON
698 |         except json.JSONDecodeError:
699 |             pass  # Continue with extraction
700 |         
701 |         # Extract JSON from code blocks - most common Anthropic pattern
702 |         code_block_match = re.search(r'```(?:json)?\s*([\s\S]*?)```', text)
703 |         if code_block_match:
704 |             code_content = code_block_match.group(1).strip()
705 |             try:
706 |                 json.loads(code_content)
707 |                 return code_content
708 |             except json.JSONDecodeError:
709 |                 # Try to fix common JSON syntax issues like trailing commas
710 |                 fixed_content = re.sub(r',\s*([}\]])', r'\1', code_content)
711 |                 try:
712 |                     json.loads(fixed_content)
713 |                     return fixed_content
714 |                 except json.JSONDecodeError:
715 |                     pass  # Continue with other extraction methods
716 |         
717 |         # Look for JSON array or object patterns in the content
718 |         # Find the first [ or { and the matching closing ] or }
719 |         stripped = text.strip()
720 |         
721 |         # Try to extract array
722 |         if '[' in stripped and ']' in stripped:
723 |             start = stripped.find('[')
724 |             # Find the matching closing bracket
725 |             end = -1
726 |             depth = 0
727 |             for i in range(start, len(stripped)):
728 |                 if stripped[i] == '[':
729 |                     depth += 1
730 |                 elif stripped[i] == ']':
731 |                     depth -= 1
732 |                     if depth == 0:
733 |                         end = i + 1
734 |                         break
735 |             
736 |             if end > start:
737 |                 array_content = stripped[start:end]
738 |                 try:
739 |                     json.loads(array_content)
740 |                     return array_content
741 |                 except json.JSONDecodeError:
742 |                     pass  # Try other methods
743 |         
744 |         # Try to extract object
745 |         if '{' in stripped and '}' in stripped:
746 |             start = stripped.find('{')
747 |             # Find the matching closing bracket
748 |             end = -1
749 |             depth = 0
750 |             for i in range(start, len(stripped)):
751 |                 if stripped[i] == '{':
752 |                     depth += 1
753 |                 elif stripped[i] == '}':
754 |                     depth -= 1
755 |                     if depth == 0:
756 |                         end = i + 1
757 |                         break
758 |             
759 |             if end > start:
760 |                 object_content = stripped[start:end]
761 |                 try:
762 |                     json.loads(object_content)
763 |                     return object_content
764 |                 except json.JSONDecodeError:
765 |                     pass  # Try other methods
766 |         
767 |         # If all else fails, return the original text
768 |         return text
769 | 
770 |     async def process_with_timer(self, func, *args, **kwargs) -> Tuple[Any, float]:
771 |         """Helper to time an async function call."""
772 |         start_time = time.perf_counter()
773 |         result = await func(*args, **kwargs)
774 |         end_time = time.perf_counter()
775 |         return result, end_time - start_time
776 | 
777 | 
```

--------------------------------------------------------------------------------
/examples/smart_browser_demo.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python
  2 | """
  3 | DETAILED Demonstration script for the Smart Browser Tools in Ultimate MCP Server,
  4 | showcasing browsing, interaction, search, download, macro, and autopilot features.
  5 | """
  6 | 
  7 | import asyncio
  8 | import logging
  9 | import sys
 10 | import time
 11 | import traceback
 12 | from datetime import datetime
 13 | from pathlib import Path
 14 | from typing import Any, Dict, Optional, Tuple
 15 | 
 16 | # Add project root to path for imports when running as script
 17 | # Adjust this relative path if your script structure is different
 18 | _PROJECT_ROOT = Path(__file__).resolve().parent.parent
 19 | if str(_PROJECT_ROOT) not in sys.path:
 20 |     sys.path.insert(0, str(_PROJECT_ROOT))
 21 |     print(f"INFO: Added {_PROJECT_ROOT} to sys.path")
 22 | 
 23 | # Rich imports for enhanced terminal UI
 24 | from rich import box, get_console  # noqa: E402
 25 | from rich.console import Group  # noqa: E402
 26 | from rich.markup import escape  # noqa: E402
 27 | from rich.panel import Panel  # noqa: E402
 28 | from rich.rule import Rule  # noqa: E402
 29 | from rich.table import Table  # noqa: E402
 30 | from rich.text import Text  # noqa: E402
 31 | from rich.traceback import install as install_rich_traceback  # noqa: E402
 32 | 
 33 | # Initialize Rich console
 34 | console = get_console()
 35 | 
 36 | # Define a fallback logger in case the import fails
 37 | def create_fallback_logger(name):
 38 |     logger = logging.getLogger(name)
 39 |     logger.setLevel(logging.INFO)
 40 |     handler = logging.StreamHandler()
 41 |     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 42 |     handler.setFormatter(formatter)
 43 |     logger.addHandler(handler)
 44 |     return logger
 45 | 
 46 | # Import Gateway and MCP components
 47 | from ultimate_mcp_server.core.server import Gateway  # noqa: E402
 48 | from ultimate_mcp_server.exceptions import ToolError, ToolInputError  # noqa: E402
 49 | 
 50 | # Import smart browser tools directly
 51 | from ultimate_mcp_server.tools.smart_browser import (  # noqa: E402
 52 |     autopilot,
 53 |     browse,
 54 |     click,
 55 |     collect_documentation,
 56 |     download,
 57 |     download_site_pdfs,
 58 |     parallel,
 59 |     run_macro,
 60 |     search,
 61 |     shutdown,
 62 |     type_text,
 63 | )
 64 | from ultimate_mcp_server.utils import get_logger  # noqa: E402
 65 | from ultimate_mcp_server.utils.display import CostTracker  # noqa: E402
 66 | 
 67 | # Initialize logger 
 68 | logger = get_logger("demo.smart_browser")
 69 | 
 70 | # Install rich tracebacks
 71 | install_rich_traceback(show_locals=True, width=console.width, extra_lines=2)
 72 | 
 73 | # --- Configuration ---
 74 | # Base directory for Smart Browser outputs
 75 | SMART_BROWSER_INTERNAL_BASE = "storage/smart_browser_internal"  # Relative path used by the tool
 76 | SMART_BROWSER_DOWNLOADS_BASE = "storage/smart_browser_downloads"  # Default download relative path
 77 | DEMO_OUTPUTS_DIR = Path(
 78 |     "./sb_demo_outputs"
 79 | )  # Local dir for demo-specific outputs like the test HTML
 80 | 
 81 | # Example URLs for demo
 82 | URL_EXAMPLE = "http://example.com"
 83 | URL_BOOKSTORE = "http://books.toscrape.com/"
 84 | URL_QUOTES = "http://quotes.toscrape.com/"
 85 | URL_PDF_SAMPLE = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
 86 | URL_GITHUB = "https://github.com/features/copilot"
 87 | 
 88 | # --- Demo Helper Functions (Unchanged from previous version) ---
 89 | 
 90 | 
 91 | def timestamp_str(short: bool = False) -> str:
 92 |     """Return a formatted timestamp string."""
 93 |     now = time.time()  # Use time.time for consistency
 94 |     dt_now = datetime.fromtimestamp(now)
 95 |     if short:
 96 |         return f"[dim]{dt_now.strftime('%H:%M:%S')}[/]"
 97 |     return f"[dim]{dt_now.strftime('%Y-%m-%d %H:%M:%S')}[/]"
 98 | 
 99 | 
100 | def truncate_text_by_lines(text: str, max_lines: int = 50) -> str:
101 |     """Truncates text to show first/last lines if too long."""
102 |     if not text:
103 |         return ""
104 |     lines = text.splitlines()
105 |     if len(lines) <= max_lines:
106 |         return text
107 |     half_lines = max_lines // 2
108 |     # Ensure half_lines is at least 1 if max_lines >= 2
109 |     half_lines = max(1, half_lines)
110 |     # Handle edge case where max_lines is 1
111 |     if max_lines == 1:
112 |         return lines[0] + "\n[...TRUNCATED...]"
113 | 
114 |     # Return first half, separator, and last half
115 |     return "\n".join(lines[:half_lines] + ["[...TRUNCATED...]"] + lines[-half_lines:])
116 | 
117 | 
118 | def format_value(key: str, value: Any, detail_level: int = 1) -> Any:
119 |     """Format specific values for display, returning strings with markup."""
120 |     if value is None:
121 |         return "[dim]None[/]"  # Keep markup
122 |     if isinstance(value, bool):
123 |         return "[green]Yes[/]" if value else "[red]No[/]"  # Keep markup
124 |     if isinstance(value, float):
125 |         return f"{value:.3f}"  # Return simple string
126 |     if key.lower().endswith("time_seconds") or key.lower() == "duration_ms":
127 |         try:
128 |             val_s = float(value) / 1000.0 if key.lower() == "duration_ms" else float(value)
129 |             return f"[green]{val_s:.3f}s[/]"  # Keep markup
130 |         except (ValueError, TypeError):
131 |             return escape(str(value))  # Fallback for non-numeric time values
132 |     if key.lower() == "size_bytes" and isinstance(value, int):
133 |         if value < 0:
134 |             return "[dim]N/A[/]"
135 |         if value > 1024 * 1024:
136 |             return f"{value / (1024 * 1024):.2f} MB"
137 |         if value > 1024:
138 |             return f"{value / 1024:.2f} KB"
139 |         return f"{value} Bytes"  # Return simple string
140 | 
141 |     if isinstance(value, list):
142 |         if not value:
143 |             return "[dim]Empty List[/]"  # Keep markup
144 |         list_len = len(value)
145 |         preview_count = 3 if detail_level < 2 else 5
146 |         suffix = (
147 |             f" [dim]... ({list_len} items total)[/]" if list_len > preview_count else ""
148 |         )  # Keep markup
149 |         if detail_level >= 1:
150 |             previews = [
151 |                 str(
152 |                     format_value(f"{key}[{i}]", item, detail_level=0)
153 |                 )  # Recursive call returns string
154 |                 for i, item in enumerate(value[:preview_count])
155 |             ]
156 |             return f"[{', '.join(previews)}]{suffix}"  # Returns string with markup
157 |         else:
158 |             return f"[List with {list_len} items]"  # Keep markup
159 | 
160 |     if isinstance(value, dict):
161 |         if not value:
162 |             return "[dim]Empty Dict[/]"  # Keep markup
163 |         dict_len = len(value)
164 |         preview_count = 4 if detail_level < 2 else 8
165 |         preview_keys = list(value.keys())[:preview_count]
166 |         suffix = (
167 |             f" [dim]... ({dict_len} keys total)[/]" if dict_len > preview_count else ""
168 |         )  # Keep markup
169 |         if detail_level >= 1:
170 |             items_preview = [
171 |                 # Key repr for clarity, value formatted recursively
172 |                 f"{repr(k)}: {str(format_value(k, value[k], detail_level=0))}"
173 |                 for k in preview_keys
174 |             ]
175 |             return f"{{{'; '.join(items_preview)}}}{suffix}"  # Returns string with markup
176 |         else:
177 |             return f"[Dict with {dict_len} keys]"  # Keep markup
178 | 
179 |     if isinstance(value, str):
180 |         value_truncated = truncate_text_by_lines(value, 30)  # Truncate by lines first
181 |         preview_len = 300 if detail_level < 2 else 600
182 |         suffix = ""
183 |         # Check length after line truncation
184 |         if len(value_truncated) > preview_len:
185 |             value_display = value_truncated[:preview_len]
186 |             suffix = "[dim]... (truncated)[/]"  # Keep markup
187 |         else:
188 |             value_display = value_truncated
189 | 
190 |         # Escape only if it doesn't look like it contains Rich markup
191 |         if "[" in value_display and "]" in value_display and "/" in value_display:
192 |             # Heuristic: Assume it might contain markup, don't escape
193 |             return value_display + suffix
194 |         else:
195 |             # Safe to escape plain strings
196 |             return escape(value_display) + suffix
197 | 
198 |     # Fallback: escape the string representation of other types
199 |     return escape(str(value))
200 | 
201 | 
202 | def display_page_state(state: Dict[str, Any], title: str = "Page State"):
203 |     """Display the 'page_state' dictionary nicely."""
204 |     panel_content = []
205 |     url = state.get("url", "N/A")
206 |     panel_content.append(
207 |         Text.from_markup(f"[bold cyan]URL:[/bold cyan] [link={url}]{escape(url)}[/link]")
208 |     )
209 |     panel_content.append(
210 |         Text.from_markup(f"[bold cyan]Title:[/bold cyan] {escape(state.get('title', 'N/A'))}")
211 |     )
212 | 
213 |     main_text = state.get("main_text", "")
214 |     if main_text:
215 |         truncated_text = truncate_text_by_lines(main_text, 15)
216 |         panel_content.append(Text.from_markup("\n[bold cyan]Main Text Summary:[/bold cyan]"))
217 |         panel_content.append(Panel(escape(truncated_text), border_style="dim", padding=(0, 1)))
218 | 
219 |     elements = state.get("elements", [])
220 |     if elements:
221 |         elements_table = Table(
222 |             title=Text.from_markup(f"Interactive Elements ({len(elements)} found)"),
223 |             box=box.MINIMAL,
224 |             show_header=True,
225 |             padding=(0, 1),
226 |             border_style="blue",
227 |         )
228 |         elements_table.add_column("ID", style="magenta", no_wrap=True)
229 |         elements_table.add_column("Tag", style="cyan")
230 |         elements_table.add_column("Role", style="yellow")
231 |         elements_table.add_column("Text Preview", style="white", max_width=60)
232 |         elements_table.add_column("BBox", style="dim")
233 | 
234 |         preview_count = 15
235 |         for elem in elements[:preview_count]:
236 |             elem_text_raw = elem.get("text", "")
237 |             elem_text_preview = escape(
238 |                 elem_text_raw[:60] + ("..." if len(elem_text_raw) > 60 else "")
239 |             )
240 |             bbox = elem.get("bbox", [])
241 |             if len(bbox) == 4:
242 |                 bbox_str = f"({bbox[0]}x{bbox[1]}, {bbox[2]}w{bbox[3]}h)"
243 |             else:
244 |                 bbox_str = "[Invalid Bbox]"
245 | 
246 |             elements_table.add_row(
247 |                 str(elem.get("id", "?")),
248 |                 str(elem.get("tag", "?")),
249 |                 str(elem.get("role", "")),
250 |                 elem_text_preview,  # Pass escaped preview string
251 |                 bbox_str,
252 |             )
253 |         if len(elements) > preview_count:
254 |             elements_table.add_row(
255 |                 "...",
256 |                 Text.from_markup(f"[dim]{len(elements) - preview_count} more...[/]"),
257 |                 "",
258 |                 "",
259 |                 "",
260 |             )
261 | 
262 |         panel_content.append(Text.from_markup("\n[bold cyan]Elements:[/bold cyan]"))
263 |         panel_content.append(elements_table)
264 | 
265 |     console.print(
266 |         Panel(
267 |             Group(*panel_content),
268 |             title=Text.from_markup(title),
269 |             border_style="blue",
270 |             padding=(1, 2),
271 |             expand=False,
272 |         )
273 |     )
274 | 
275 | 
276 | def display_result(
277 |     title: str, result: Dict[str, Any], display_options: Optional[Dict] = None
278 | ) -> None:
279 |     """Display operation result with enhanced formatting using Rich."""
280 |     display_options = display_options or {}
281 |     console.print(
282 |         Rule(
283 |             Text.from_markup(f"[bold cyan]{escape(title)}[/] {timestamp_str(short=True)}"),
284 |             style="cyan",
285 |         )
286 |     )
287 | 
288 |     success = result.get("success", False)
289 |     detail_level = display_options.get("detail_level", 1)
290 |     # Use _display_options from result if available, otherwise use passed options
291 |     effective_display_options = result.get("_display_options", display_options)
292 | 
293 |     hide_keys_set = set(
294 |         effective_display_options.get(
295 |             "hide_keys",
296 |             [
297 |                 "success",
298 |                 "page_state",
299 |                 "results",
300 |                 "steps",
301 |                 "download",
302 |                 "final_page_state",
303 |                 "documentation",
304 |                 "raw_response",
305 |                 "raw_llm_response",
306 |                 "_display_options",  # Also hide internal options
307 |             ],
308 |         )
309 |     )
310 | 
311 |     # --- Status Panel ---
312 |     status_panel_content = Text.from_markup(
313 |         f"Status: {'[bold green]Success[/]' if success else '[bold red]Failed[/]'}\n"
314 |     )
315 |     if not success:
316 |         error_code = result.get("error_code", "N/A")
317 |         error_msg = result.get("error", "Unknown error")
318 |         status_panel_content.append(
319 |             Text.from_markup(f"Error Code: [yellow]{escape(str(error_code))}[/]\n")
320 |         )
321 |         status_panel_content.append(
322 |             Text.from_markup(f"Message: [red]{escape(str(error_msg))}[/]\n")
323 |         )
324 |         console.print(
325 |             Panel(
326 |                 status_panel_content,
327 |                 title="Operation Status",
328 |                 border_style="red",
329 |                 padding=(1, 2),
330 |                 expand=False,
331 |             )
332 |         )
333 |     else:
334 |         console.print(
335 |             Panel(
336 |                 status_panel_content,
337 |                 title="Operation Status",
338 |                 border_style="green",
339 |                 padding=(0, 1),
340 |                 expand=False,
341 |             )
342 |         )
343 | 
344 |     # --- Top Level Details ---
345 |     details_table = Table(
346 |         title="Result Summary", box=box.MINIMAL, show_header=False, padding=(0, 1)
347 |     )
348 |     details_table.add_column("Key", style="cyan", justify="right", no_wrap=True)
349 |     details_table.add_column("Value", style="white")
350 |     has_details = False
351 |     for key, value in result.items():
352 |         if key in hide_keys_set or key.startswith("_"):
353 |             continue
354 |         formatted_value = format_value(key, value, detail_level=detail_level)
355 |         details_table.add_row(
356 |             escape(str(key)), formatted_value
357 |         )  # formatted_value is already string/markup
358 |         has_details = True
359 |     if has_details:
360 |         console.print(details_table)
361 | 
362 |     # --- Special Section Displays ---
363 | 
364 |     # Page State
365 |     if "page_state" in result and isinstance(result["page_state"], dict):
366 |         display_page_state(result["page_state"], title="Page State After Action")
367 |     elif "final_page_state" in result and isinstance(result["final_page_state"], dict):
368 |         display_page_state(result["final_page_state"], title="Final Page State")
369 | 
370 |     # Search Results
371 |     if "results" in result and isinstance(result["results"], list) and "query" in result:
372 |         search_results = result["results"]
373 |         search_table = Table(
374 |             title=Text.from_markup(
375 |                 f"Search Results for '{escape(result['query'])}' ({len(search_results)} found)"
376 |             ),
377 |             box=box.ROUNDED,
378 |             show_header=True,
379 |             padding=(0, 1),
380 |         )
381 |         search_table.add_column("#", style="dim")
382 |         search_table.add_column("Title", style="cyan")
383 |         search_table.add_column("URL", style="blue", no_wrap=False)
384 |         search_table.add_column("Snippet", style="white", no_wrap=False)
385 |         for i, item in enumerate(search_results, 1):
386 |             title = truncate_text_by_lines(item.get("title", ""), 3)
387 |             snippet = truncate_text_by_lines(item.get("snippet", ""), 5)
388 |             url = item.get("url", "")
389 |             search_table.add_row(
390 |                 str(i), escape(title), f"[link={url}]{escape(url)}[/link]", escape(snippet)
391 |             )
392 |         console.print(search_table)
393 | 
394 |     # Download Result
395 |     if "download" in result and isinstance(result["download"], dict):
396 |         dl_info = result["download"]
397 |         dl_table = Table(
398 |             title="Download Details", box=box.MINIMAL, show_header=False, padding=(0, 1)
399 |         )
400 |         dl_table.add_column("Metric", style="cyan", justify="right")
401 |         dl_table.add_column("Value", style="white")
402 |         dl_table.add_row("File Path", escape(dl_info.get("file_path", "N/A")))
403 |         dl_table.add_row("File Name", escape(dl_info.get("file_name", "N/A")))
404 |         dl_table.add_row("SHA256", escape(dl_info.get("sha256", "N/A")))
405 |         dl_table.add_row("Size", format_value("size_bytes", dl_info.get("size_bytes", -1)))
406 |         dl_table.add_row("Source URL", escape(dl_info.get("url", "N/A")))
407 |         dl_table.add_row(
408 |             "Tables Extracted",
409 |             format_value("tables_extracted", dl_info.get("tables_extracted", False)),
410 |         )
411 |         if dl_info.get("tables"):
412 |             # format_value handles potential markup in table preview string
413 |             dl_table.add_row("Table Preview", format_value("tables", dl_info.get("tables")))
414 |         console.print(
415 |             Panel(dl_table, title="Download Result", border_style="green", padding=(1, 2))
416 |         )
417 | 
418 |     # Macro/Autopilot Steps
419 |     if "steps" in result and isinstance(result["steps"], list):
420 |         steps = result["steps"]
421 |         steps_table = Table(
422 |             title=Text.from_markup(f"Macro/Autopilot Steps ({len(steps)} executed)"),
423 |             box=box.ROUNDED,
424 |             show_header=True,
425 |             padding=(0, 1),
426 |         )
427 |         steps_table.add_column("#", style="dim")
428 |         steps_table.add_column("Action/Tool", style="cyan")
429 |         steps_table.add_column("Arguments/Hint", style="white", no_wrap=False)
430 |         steps_table.add_column("Status", style="yellow")
431 |         steps_table.add_column("Result/Error", style="white", no_wrap=False)
432 | 
433 |         for i, step in enumerate(steps, 1):
434 |             action = step.get("action", step.get("tool", "?"))
435 |             args = step.get("args")  # Check if 'args' exists
436 |             if args is None:  # If no 'args', use the step itself excluding status keys
437 |                 args = {
438 |                     k: v
439 |                     for k, v in step.items()
440 |                     if k
441 |                     not in ["action", "tool", "success", "result", "error", "step", "duration_ms"]
442 |                 }
443 | 
444 |             args_preview = format_value("args", args, detail_level=0)  # format_value handles markup
445 |             success_step = step.get("success", False)
446 |             status = "[green]OK[/]" if success_step else "[red]FAIL[/]"  # Markup string
447 |             outcome = step.get("result", step.get("error", ""))
448 |             outcome_preview = format_value(
449 |                 "outcome", outcome, detail_level=0
450 |             )  # format_value handles markup
451 |             steps_table.add_row(str(i), escape(action), args_preview, status, outcome_preview)
452 |         console.print(steps_table)
453 | 
454 |     # Documentation (assuming it's stored under 'file_path' key now)
455 |     if (
456 |         "file_path" in result and result.get("pages_collected") is not None
457 |     ):  # Check for doc collection result structure
458 |         doc_file_path = result.get("file_path")
459 |         pages_collected = result.get("pages_collected")
460 |         if doc_file_path and pages_collected > 0:
461 |             content_to_display: Any = f"[dim]Documentation saved to: {escape(doc_file_path)}[/]"
462 |             try:
463 |                 with open(doc_file_path, "r", encoding="utf-8") as f:
464 |                     content = f.read(1500)  # Read preview
465 |                 content_to_display += f"\n\n[bold]File Preview ({len(content)} chars):[/]\n"
466 |                 content_to_display += escape(content) + "\n[dim]...[/]"
467 |             except Exception as e:
468 |                 content_to_display += f"\n[yellow]Could not read file preview: {escape(str(e))}[/]"
469 | 
470 |             console.print(
471 |                 Panel(
472 |                     Text.from_markup(content_to_display),
473 |                     title=f"Collected Documentation ({pages_collected} pages)",
474 |                     border_style="magenta",
475 |                     padding=(1, 2),
476 |                 )
477 |             )
478 | 
479 |     console.print()  # Add spacing
480 | 
481 | 
482 | async def safe_tool_call(
483 |     operation_name: str, tool_func: callable, *args, tracker: Optional[CostTracker] = None, **kwargs
484 | ) -> Tuple[bool, Dict[str, Any]]:
485 |     """Safely call a tool function, handling exceptions and logging."""
486 |     console.print(
487 |         f"\n[cyan]Calling Tool:[/][bold] {escape(operation_name)}[/] {timestamp_str(short=True)}"
488 |     )
489 |     display_options = kwargs.pop("display_options", {})
490 | 
491 |     log_args_repr = {}
492 |     MAX_ARG_LEN = 100
493 |     for k, v in kwargs.items():
494 |         try:
495 |             if isinstance(v, (str, bytes)) and len(v) > MAX_ARG_LEN:
496 |                 log_args_repr[k] = f"{type(v).__name__}(len={len(v)})"
497 |             elif isinstance(v, (list, dict)) and len(v) > 10:
498 |                 log_args_repr[k] = f"{type(v).__name__}(len={len(v)})"
499 |             else:
500 |                 log_args_repr[k] = repr(v)
501 |         except Exception:  # Handle potential errors during repr()
502 |             log_args_repr[k] = f"<{type(v).__name__} repr_error>"
503 | 
504 |     logger.debug(f"Executing {operation_name} with args: {args}, kwargs: {log_args_repr}")
505 | 
506 |     try:
507 |         # Call the tool function directly
508 |         result = await tool_func(*args, **kwargs)
509 |         if not isinstance(result, dict):
510 |             logger.error(f"Tool '{operation_name}' returned non-dict type: {type(result)}")
511 |             return False, {
512 |                 "success": False,
513 |                 "error": f"Tool returned unexpected type: {type(result).__name__}",
514 |                 "error_code": "INTERNAL_ERROR",
515 |                 "_display_options": display_options,
516 |             }
517 | 
518 |         # Store display options within the result for the display function
519 |         result["_display_options"] = display_options
520 |         logger.debug(f"Tool '{operation_name}' completed.")
521 |         # Add success=True if missing and no error key present (should usually be set by tool)
522 |         if "success" not in result and "error" not in result:
523 |             result["success"] = True
524 |         return result.get("success", False), result  # Return success flag and the result dict
525 |     except ToolInputError as e:
526 |         logger.warning(f"Input error for {operation_name}: {e}")
527 |         return False, {
528 |             "success": False,
529 |             "error": str(e),
530 |             "error_code": getattr(e, "error_code", "INPUT_ERROR"),
531 |             "_display_options": display_options,
532 |         }
533 |     except ToolError as e:
534 |         logger.error(f"Tool error during {operation_name}: {e}", exc_info=True)
535 |         return False, {
536 |             "success": False,
537 |             "error": str(e),
538 |             "error_code": getattr(e, "error_code", "TOOL_ERROR"),
539 |             "_display_options": display_options,
540 |         }
541 |     except Exception as e:
542 |         logger.error(f"Unexpected error during {operation_name}: {e}", exc_info=True)
543 |         tb_str = traceback.format_exc(limit=1)
544 |         return False, {
545 |             "success": False,
546 |             "error": f"{type(e).__name__}: {e}\n{tb_str}",
547 |             "error_type": type(e).__name__,
548 |             "error_code": "UNEXPECTED_ERROR",
549 |             "_display_options": display_options,
550 |         }
551 | 
552 | 
553 | # --- Demo Sections ---
554 | 
555 | async def demo_section_1_browse(gateway, tracker: CostTracker) -> None:
556 |     console.print(Rule("[bold green]Demo 1: Basic Browsing[/]", style="green"))
557 |     logger.info("Starting Demo Section 1: Basic Browsing")
558 | 
559 |     # 1a: Browse Example.com
560 |     success, result = await safe_tool_call(
561 |         "Browse Example.com", browse, url=URL_EXAMPLE, tracker=tracker
562 |     )
563 |     display_result("Browse Example.com", result)
564 | 
565 |     # 1b: Browse Bookstore (wait for specific element)
566 |     success, result = await safe_tool_call(
567 |         "Browse Bookstore (wait for footer)",
568 |         browse,
569 |         url=URL_BOOKSTORE,
570 |         wait_for_selector="footer.footer",
571 |         tracker=tracker,
572 |     )
573 |     display_result("Browse Bookstore (Wait)", result)
574 | 
575 | 
576 | async def demo_section_2_interaction(gateway, tracker: CostTracker) -> None:
577 |     console.print(Rule("[bold green]Demo 2: Page Interaction[/]", style="green"))
578 |     logger.info("Starting Demo Section 2: Page Interaction")
579 | 
580 |     # 2a: Search on Bookstore
581 |     console.print(f"--- Scenario: Search for 'Science' on {URL_BOOKSTORE} ---")
582 |     success, initial_state_res = await safe_tool_call(
583 |         "Load Bookstore Search Page",
584 |         browse,
585 |         url=URL_BOOKSTORE,
586 |         tracker=tracker,
587 |     )
588 |     if not success:
589 |         console.print("[red]Cannot proceed with interaction demo, failed to load page.[/]")
590 |         return
591 |     display_result("Bookstore Initial State", initial_state_res)
592 | 
593 |     # Fill the search form using task hints
594 |     fields_to_type = [
595 |         {"task_hint": "The search input field", "text": "Science", "enter": False},
596 |     ]
597 |     success, fill_res = await safe_tool_call(
598 |         "Type into Bookstore Search Form",
599 |         type_text,
600 |         url=URL_BOOKSTORE,
601 |         fields=fields_to_type,
602 |         submit_hint="The search button",
603 |         wait_after_submit_ms=1500,
604 |         tracker=tracker,
605 |     )
606 |     display_result("Type into Bookstore Search Form", fill_res)
607 | 
608 |     # 2b: Click the first search result (if successful)
609 |     if success:
610 |         console.print("--- Scenario: Click the first search result ---")
611 |         current_url = fill_res.get("page_state", {}).get("url", URL_BOOKSTORE)
612 | 
613 |         success, click_res = await safe_tool_call(
614 |             "Click First Book Result",
615 |             click,
616 |             url=current_url,
617 |             task_hint="The link for the first book shown in the results list",
618 |             wait_ms=1000,
619 |             tracker=tracker,
620 |         )
621 |         display_result("Click First Book Result", click_res)
622 | 
623 | 
624 | async def demo_section_3_search(gateway, tracker: CostTracker) -> None:
625 |     console.print(Rule("[bold green]Demo 3: Web Search[/]", style="green"))
626 |     logger.info("Starting Demo Section 3: Web Search")
627 | 
628 |     search_query = "latest advancements in large language models"
629 | 
630 |     # 3a: Search Bing
631 |     success, result = await safe_tool_call(
632 |         "Search Bing",
633 |         search,
634 |         query=search_query,
635 |         engine="bing",
636 |         max_results=5,
637 |         tracker=tracker,
638 |     )
639 |     display_result(f"Search Bing: '{search_query}'", result)
640 | 
641 |     # 3b: Search DuckDuckGo
642 |     success, result = await safe_tool_call(
643 |         "Search DuckDuckGo",
644 |         search,
645 |         query=search_query,
646 |         engine="duckduckgo",
647 |         max_results=5,
648 |         tracker=tracker,
649 |     )
650 |     display_result(f"Search DuckDuckGo: '{search_query}'", result)
651 | 
652 | 
653 | async def demo_section_4_download(gateway, tracker: CostTracker) -> None:
654 |     console.print(Rule("[bold green]Demo 4: File Download[/]", style="green"))
655 |     logger.info("Starting Demo Section 4: File Download")
656 | 
657 |     # Ensure local demo output dir exists
658 |     DEMO_OUTPUTS_DIR_ABS = DEMO_OUTPUTS_DIR.resolve(strict=False) # Resolve to absolute, allow non-existent
659 |     DEMO_OUTPUTS_DIR_ABS.mkdir(parents=True, exist_ok=True) # Ensure it exists after resolving
660 | 
661 |     # Create the parent directory for PDF downloads if it doesn't exist
662 |     pdf_parent_dir = "storage/smart_browser_site_pdfs"
663 |     console.print(f"[cyan]Creating parent directory for PDFs: {pdf_parent_dir}[/cyan]")
664 |     from ultimate_mcp_server.tools.filesystem import create_directory
665 |     parent_dir_result = await create_directory(path=pdf_parent_dir)
666 |     if not parent_dir_result.get("success", False):
667 |         console.print(f"[yellow]Warning: Could not create parent directory: {parent_dir_result.get('error', 'Unknown error')}[/yellow]")
668 |     else:
669 |         console.print(f"[green]Successfully created parent directory: {pdf_parent_dir}[/green]")
670 | 
671 |     # 4a: Download PDFs from a site
672 |     console.print("--- Scenario: Find and Download PDFs from Example.com ---")
673 |     success, result = await safe_tool_call(
674 |         "Download PDFs from Example.com",
675 |         download_site_pdfs,
676 |         start_url=URL_EXAMPLE,
677 |         max_depth=1,
678 |         max_pdfs=5,
679 |         dest_subfolder="example_com_pdfs",
680 |         tracker=tracker,
681 |     )
682 |     display_result("Download PDFs from Example.com", result)
683 |     if result.get("pdf_count", 0) == 0:
684 |         console.print("[yellow]Note: No PDFs found on example.com as expected.[/]")
685 | 
686 |     # 4b: Click-based download
687 |     download_page_content = f"""
688 |     <!DOCTYPE html>
689 |     <html><head><title>Download Test</title></head>
690 |     <body><h1>Download Page</h1>
691 |     <p>Click the link to download a dummy PDF.</p>
692 |     <a href="{URL_PDF_SAMPLE}" id="downloadLink">Download Dummy PDF Now</a>
693 |     <p>Another paragraph.</p>
694 |     </body></html>
695 |     """
696 |     download_page_path = DEMO_OUTPUTS_DIR_ABS / "download_test.html"
697 |     try:
698 |         download_page_path.write_text(download_page_content, encoding="utf-8")
699 |         local_url = download_page_path.as_uri()
700 | 
701 |         console.print("\n--- Scenario: Click a link to download a file ---")
702 |         success, result = await safe_tool_call(
703 |         "Click to Download PDF",
704 |         download,
705 |         url=local_url,
706 |         task_hint="The 'Download Dummy PDF Now' link",
707 |         dest_dir="storage/sb_demo_outputs/clicked_downloads", # Adjusted path
708 |         tracker=tracker,
709 |         )
710 |         display_result("Click to Download PDF", result)
711 |     except Exception as e:
712 |         console.print(f"[red]Error setting up or running click-download demo: {e}[/]")
713 |     finally:
714 |         if download_page_path.exists():
715 |             try:
716 |                 download_page_path.unlink()
717 |             except OSError:
718 |                 pass
719 | 
720 | 
721 | async def demo_section_5_macro(gateway, tracker: CostTracker) -> None:
722 |     console.print(Rule("[bold green]Demo 5: Execute Macro[/]", style="green"))
723 |     logger.info("Starting Demo Section 5: Execute Macro")
724 | 
725 |     macro_task = f"Go to {URL_BOOKSTORE}, search for 'History', find the book 'Sapiens: A Brief History of Humankind', and click its link."
726 |     console.print("--- Scenario: Execute Macro ---")
727 |     console.print(f"[italic]Task:[/italic] {escape(macro_task)}")
728 | 
729 |     success, result = await safe_tool_call(
730 |         "Execute Bookstore Search Macro",
731 |         run_macro,
732 |         url=URL_BOOKSTORE,
733 |         task=macro_task,
734 |         max_rounds=5,
735 |         tracker=tracker,
736 |     )
737 |     display_result("Execute Bookstore Search Macro", result)
738 | 
739 | 
740 | async def demo_section_6_autopilot(gateway, tracker: CostTracker) -> None:
741 |     console.print(Rule("[bold green]Demo 6: Autopilot[/]", style="green"))
742 |     logger.info("Starting Demo Section 6: Autopilot")
743 | 
744 |     autopilot_task = "Search the web for the official documentation URL of the 'httpx' Python library, then browse that URL and summarize the main page content."
745 |     console.print("--- Scenario: Autopilot ---")
746 |     console.print(f"[italic]Task:[/italic] {escape(autopilot_task)}")
747 | 
748 |     success, result = await safe_tool_call(
749 |         "Run Autopilot: Find httpx Docs",
750 |         autopilot,
751 |         task=autopilot_task,
752 |         max_steps=8,
753 |         scratch_subdir="autopilot_demo",
754 |         tracker=tracker,
755 |     )
756 |     display_result("Run Autopilot: Find httpx Docs", result)
757 |     if result.get("run_log"):
758 |         console.print(f"[dim]Autopilot run log saved to: {result['run_log']}[/]")
759 | 
760 | 
761 | async def demo_section_7_parallel(gateway, tracker: CostTracker) -> None:
762 |     console.print(Rule("[bold green]Demo 7: Parallel Processing[/]", style="green"))
763 |     logger.info("Starting Demo Section 7: Parallel Processing")
764 | 
765 |     urls_to_process = [
766 |         URL_EXAMPLE,
767 |         URL_BOOKSTORE,
768 |         URL_QUOTES,
769 |         "http://httpbin.org/delay/1",
770 |         "https://webscraper.io/test-sites/e-commerce/static",
771 |     ]
772 |     console.print("--- Scenario: Get Page State for Multiple URLs in Parallel ---")
773 |     console.print(f"[dim]URLs:[/dim] {urls_to_process}")
774 | 
775 |     success, result = await safe_tool_call(
776 |         "Parallel Get Page State",
777 |         parallel,
778 |         urls=urls_to_process,
779 |         action="get_state",  # Only 'get_state' supported currently
780 |         # max_tabs=3 # Can override default here if needed
781 |         tracker=tracker,
782 |     )
783 | 
784 |     # Custom display for parallel results (same logic as before)
785 |     console.print(Rule("[bold cyan]Parallel Processing Results[/]", style="cyan"))
786 |     if success:
787 |         console.print(f"Total URLs Processed: {result.get('processed_count', 0)}")
788 |         console.print(f"Successful: {result.get('successful_count', 0)}")
789 |         console.print("-" * 20)
790 |         for i, item_result in enumerate(result.get("results", [])):
791 |             url = item_result.get("url", f"URL {i + 1}")
792 |             item_success = item_result.get("success", False)
793 |             panel_title = f"Result for: {escape(url)}"
794 |             border = "green" if item_success else "red"
795 |             content = ""
796 |             if item_success:
797 |                 state = item_result.get("page_state", {})
798 |                 content = f"Title: {escape(state.get('title', 'N/A'))}\nElements Found: {len(state.get('elements', []))}"
799 |             else:
800 |                 content = f"[red]Error:[/red] {escape(item_result.get('error', 'Unknown'))}"
801 |             console.print(
802 |                 Panel(content, title=panel_title, border_style=border, padding=(0, 1), expand=False)
803 |             )
804 |     else:
805 |         console.print(
806 |             Panel(
807 |                 f"[red]Parallel processing tool call failed:[/red]\n{escape(result.get('error', '?'))}",
808 |                 border_style="red",
809 |             )
810 |         )
811 |     console.print()
812 | 
813 | 
814 | async def demo_section_8_docs(gateway, tracker: CostTracker) -> None:
815 |     console.print(Rule("[bold green]Demo 8: Documentation Collection[/]", style="green"))
816 |     logger.info("Starting Demo Section 8: Documentation Collection")
817 | 
818 |     package_name = "fastapi"  # Use a different package
819 |     console.print(f"--- Scenario: Collect Documentation for '{package_name}' ---")
820 | 
821 |     success, result = await safe_tool_call(
822 |         f"Collect Docs: {package_name}",
823 |         collect_documentation,
824 |         package=package_name,
825 |         max_pages=15,
826 |         rate_limit_rps=2.0,
827 |         tracker=tracker,
828 |     )
829 |     # Use the updated display logic that looks for file_path and pages_collected
830 |     display_result(f"Collect Docs: {package_name}", result)
831 | 
832 | 
833 | # --- Main Function ---
834 | async def main() -> int:
835 |     """Run the SmartBrowser tools demo."""
836 |     console.print(Rule("[bold magenta]Smart Browser Tools Demo[/bold magenta]"))
837 | 
838 |     exit_code = 0
839 |     gateway = None
840 | 
841 |     # Ensure local demo output directory exists
842 |     DEMO_OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)
843 |     console.print(f"[dim]Demo-specific outputs will be saved in: {DEMO_OUTPUTS_DIR}[/]")
844 | 
845 |     try:
846 |         # --- Initialize Gateway for providers only ---
847 |         console.print("[cyan]Initializing MCP Gateway...[/]")
848 |         gateway = Gateway("smart-browser-demo")
849 |         console.print("[cyan]Initializing Providers (for LLM tools)...[/]")
850 |         await gateway._initialize_providers()
851 |         
852 |         # --- Initialize Smart Browser module ---
853 |         console.print("[cyan]Initializing Smart Browser tool...[/]")
854 |         # await initialize()
855 |         
856 |         # Initialize CostTracker
857 |         tracker = CostTracker()
858 | 
859 |         # Run Demo Sections (passing gateway and tracker)
860 |         await demo_section_1_browse(gateway, tracker)
861 |         await demo_section_2_interaction(gateway, tracker)
862 |         await demo_section_3_search(gateway, tracker)
863 |         await demo_section_4_download(gateway, tracker)
864 |         await demo_section_5_macro(gateway, tracker)
865 |         await demo_section_6_autopilot(gateway, tracker) # Uncomment to run autopilot
866 |         # console.print(
867 |         #     "[yellow]Skipping Autopilot demo section (can be intensive). Uncomment to run.[/]"
868 |         # )
869 |         await demo_section_7_parallel(gateway, tracker)
870 |         await demo_section_8_docs(gateway, tracker)
871 | 
872 |         console.print(Rule("[bold magenta]Demo Complete[/bold magenta]"))
873 | 
874 |     except Exception as e:
875 |         logger.critical(f"Demo failed with critical error: {e}", exc_info=True)
876 |         console.print("[bold red]CRITICAL ERROR DURING DEMO:[/]")
877 |         console.print_exception(show_locals=True)
878 |         exit_code = 1
879 |     finally:
880 |         # Shutdown Smart Browser
881 |         console.print("[cyan]Shutting down Smart Browser tool...[/]")
882 |         try:
883 |             await shutdown()
884 |         except Exception as e:
885 |             logger.error(f"Error during Smart Browser shutdown: {e}")
886 | 
887 |     return exit_code
888 | 
889 | 
890 | if __name__ == "__main__":
891 |     # Ensure the script is run with asyncio
892 |     try:
893 |         exit_code = asyncio.run(main())
894 |         sys.exit(exit_code)
895 |     except KeyboardInterrupt:
896 |         console.print("\n[yellow]Demo interrupted by user. Shutting down...[/]")
897 |         # Try to run shutdown asynchronously even on keyboard interrupt
898 |         try:
899 |             asyncio.run(shutdown())
900 |         except Exception as e:
901 |             print(f"Error during emergency shutdown: {e}")
902 |         sys.exit(1)
903 | 
```

--------------------------------------------------------------------------------
/examples/grok_integration_demo.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python
  2 | """Grok integration demonstration using Ultimate MCP Server."""
  3 | import asyncio
  4 | import json
  5 | import sys
  6 | import time
  7 | from pathlib import Path
  8 | 
  9 | # Add project root to path for imports when running as script
 10 | sys.path.insert(0, str(Path(__file__).parent.parent))
 11 | 
 12 | # Third-party imports
 13 | from rich import box
 14 | from rich.align import Align
 15 | from rich.columns import Columns
 16 | from rich.console import Console, Group
 17 | from rich.live import Live
 18 | from rich.markup import escape
 19 | from rich.panel import Panel
 20 | from rich.progress import (
 21 |     BarColumn,
 22 |     Progress,
 23 |     TaskProgressColumn,
 24 |     TextColumn,
 25 |     TimeElapsedColumn,
 26 | )
 27 | from rich.rule import Rule
 28 | from rich.table import Table
 29 | from rich.text import Text
 30 | from rich.tree import Tree
 31 | 
 32 | # Project imports
 33 | from ultimate_mcp_server.constants import Provider
 34 | from ultimate_mcp_server.core.server import Gateway
 35 | from ultimate_mcp_server.utils import get_logger
 36 | from ultimate_mcp_server.utils.display import CostTracker
 37 | from ultimate_mcp_server.utils.logging.console import console
 38 | 
 39 | # Initialize logger
 40 | logger = get_logger("example.grok_integration")
 41 | 
 42 | # Create a separate console for detailed debugging output
 43 | debug_console = Console(stderr=True, highlight=False)
 44 | 
 45 | 
 46 | async def compare_grok_models(tracker: CostTracker):
 47 |     """Compare different Grok models."""
 48 |     console.print(Rule("[bold cyan]⚡ Grok Model Comparison [/bold cyan]", style="bold blue"))
 49 |     logger.info("Starting Grok models comparison", emoji_key="start")
 50 |     
 51 |     # Create Gateway instance - this handles provider initialization
 52 |     gateway = Gateway("grok-demo", register_tools=False, provider_exclusions=[Provider.OPENROUTER.value])
 53 |     
 54 |     # Initialize providers
 55 |     logger.info("Initializing providers...", emoji_key="provider")
 56 |     await gateway._initialize_providers()
 57 |     
 58 |     provider_name = Provider.GROK
 59 |     try:
 60 |         # Get the provider from the gateway
 61 |         provider = gateway.providers.get(provider_name)
 62 |         if not provider:
 63 |             logger.error(f"Provider {provider_name} not available or initialized", emoji_key="error")
 64 |             return
 65 |         
 66 |         logger.info(f"Using provider: {provider_name}", emoji_key="provider")
 67 |         
 68 |         models = await provider.list_models()
 69 |         model_names = [m["id"] for m in models]  # Extract names from model dictionaries
 70 |         
 71 |         # Display available models in a tree structure with consistent padding
 72 |         model_tree = Tree("[bold cyan]Available Grok Models[/bold cyan]")
 73 |         for model in model_names:
 74 |             # Only display grok-3 models
 75 |             if not model.startswith("grok-3"):
 76 |                 continue
 77 |                 
 78 |             if "fast" in model:
 79 |                 model_tree.add(f"[bold yellow]{model}[/bold yellow] [dim](optimized for speed)[/dim]")
 80 |             elif "mini" in model:
 81 |                 model_tree.add(f"[bold green]{model}[/bold green] [dim](optimized for reasoning)[/dim]")
 82 |             else:
 83 |                 model_tree.add(f"[bold magenta]{model}[/bold magenta] [dim](general purpose)[/dim]")
 84 |         
 85 |         # Add padding around the tree
 86 |         console.print(Panel(model_tree, border_style="dim cyan", padding=(1, 2)))
 87 |         
 88 |         # Select specific models to compare
 89 |         grok_models = [
 90 |             "grok-3-latest",
 91 |             "grok-3-mini-latest"
 92 |         ]
 93 |         
 94 |         # Filter based on available models
 95 |         models_to_compare = [m for m in grok_models if m in model_names]
 96 |         if not models_to_compare:
 97 |             # Only use grok-3 models
 98 |             models_to_compare = [m for m in model_names if m.startswith("grok-3")][:2]
 99 |         
100 |         if not models_to_compare:
101 |             logger.warning("No grok-3 models available for comparison.", emoji_key="warning")
102 |             return
103 |         
104 |         # Consistent panel styling
105 |         console.print(Panel(
106 |             f"Comparing models: [yellow]{escape(', '.join(models_to_compare))}[/yellow]",
107 |             title="[bold]Comparison Setup[/bold]",
108 |             border_style="blue", # Use blue for info
109 |             padding=(1, 2)
110 |         ))
111 |         
112 |         prompt = """
113 |         Explain the concept of quantum entanglement in a way that a high school student would understand.
114 |         Keep your response brief and accessible.
115 |         """
116 |         
117 |         # Consistent panel styling for prompt
118 |         console.print(Panel(
119 |             escape(prompt.strip()),
120 |             title="[bold]Test Prompt[/bold]",
121 |             border_style="yellow", # Yellow for prompts
122 |             expand=False,
123 |             padding=(1, 2)
124 |         ))
125 |         
126 |         results_data = []
127 |         
128 |         # Create progress display with TaskProgressColumn
129 |         with Progress(
130 |             TextColumn("[bold blue]{task.description}"),
131 |             BarColumn(complete_style="green", finished_style="green"),
132 |             TaskProgressColumn(),
133 |             TextColumn("[green]{task.completed} of {task.total}"),
134 |             TimeElapsedColumn(),
135 |             console=console,
136 |             expand=True
137 |         ) as progress:
138 |             task_id = progress.add_task("[cyan]Testing models...", total=len(models_to_compare))
139 |             
140 |             for model_name in models_to_compare:
141 |                 progress.update(task_id, description=f"[cyan]Testing model: [bold]{model_name}[/bold]")
142 |                 
143 |                 try:
144 |                     logger.info(f"Testing model: {model_name}", emoji_key="model")
145 |                     start_time = time.time()
146 |                     result = await provider.generate_completion(
147 |                         prompt=prompt,
148 |                         model=model_name,
149 |                         temperature=0.3,
150 |                         max_tokens=300
151 |                     )
152 |                     processing_time = time.time() - start_time
153 |                     
154 |                     # Track the cost
155 |                     tracker.add_call(result)
156 |                     
157 |                     # Log detailed timing info to debug console
158 |                     debug_console.print(f"[dim]Model {model_name} processing details:[/dim]")
159 |                     debug_console.print(f"[dim]Time: {processing_time:.2f}s | Tokens: {result.total_tokens}[/dim]")
160 |                     
161 |                     # Check if model is a mini model with reasoning output
162 |                     reasoning_content = None
163 |                     reasoning_tokens = None
164 |                     if "mini" in model_name and result.metadata:
165 |                         reasoning_content = result.metadata.get("reasoning_content")
166 |                         reasoning_tokens = result.metadata.get("reasoning_tokens")
167 |                     
168 |                     results_data.append({
169 |                         "model": model_name,
170 |                         "text": result.text,
171 |                         "tokens": {
172 |                             "input": result.input_tokens,
173 |                             "output": result.output_tokens,
174 |                             "total": result.total_tokens
175 |                         },
176 |                         "reasoning_content": reasoning_content,
177 |                         "reasoning_tokens": reasoning_tokens,
178 |                         "cost": result.cost,
179 |                         "time": processing_time
180 |                     })
181 |                     
182 |                     logger.success(
183 |                         f"Completion for {model_name} successful",
184 |                         emoji_key="success",
185 |                     )
186 |                     
187 |                 except Exception as e:
188 |                     logger.error(f"Error testing model {model_name}: {str(e)}", emoji_key="error", exc_info=True)
189 |                     debug_console.print_exception()
190 |                     results_data.append({
191 |                         "model": model_name,
192 |                         "error": str(e)
193 |                     })
194 |                 
195 |                 progress.advance(task_id)
196 |         
197 |         # Display comparison results using Rich
198 |         if results_data:
199 |             # Bolder rule style
200 |             console.print(Rule("[bold green]⚡ Comparison Results [/bold green]", style="bold green"))
201 |             
202 |             # Store panels for potential column layout
203 |             comparison_panels = []
204 |             
205 |             for result_item in results_data:
206 |                 model = result_item["model"]
207 |                 
208 |                 if "error" in result_item:
209 |                     # Handle error case with consistent styling
210 |                     error_panel = Panel(
211 |                         f"[red]{escape(result_item['error'])}[/red]",
212 |                         title=f"[bold red]{escape(model)} - ERROR[/bold red]",
213 |                         border_style="red", # Red for errors
214 |                         expand=False,
215 |                         padding=(1, 2)
216 |                     )
217 |                     comparison_panels.append(error_panel)
218 |                     continue
219 |                 
220 |                 time_s = result_item["time"]
221 |                 tokens = result_item.get("tokens", {})
222 |                 input_tokens = tokens.get("input", 0)
223 |                 output_tokens = tokens.get("output", 0)
224 |                 total_tokens = tokens.get("total", 0)
225 |                 
226 |                 tokens_per_second = total_tokens / time_s if time_s > 0 else 0
227 |                 cost = result_item.get("cost", 0.0)
228 |                 text = result_item.get("text", "[red]Error generating response[/red]").strip()
229 |                 
230 |                 # Determine border color based on model type (consistent scheme)
231 |                 border_style = "magenta" # Magenta for general models
232 |                 if "mini" in model:
233 |                     border_style = "green" # Green for reasoning
234 |                 elif "fast" in model:
235 |                     border_style = "yellow" # Yellow for speed
236 |                 
237 |                 # Create the panel for this model's output
238 |                 model_panel = Panel(
239 |                     escape(text),
240 |                     title=f"[bold {border_style}]{escape(model)}[/bold {border_style}]", # Use border color in title
241 |                     subtitle="[dim]Response Text[/dim]",
242 |                     border_style=border_style,
243 |                     expand=True,
244 |                     # height=len(text.splitlines()) + 4, # Adjust height dynamically based on padding
245 |                     padding=(1, 2) # Standard padding
246 |                 )
247 |                 
248 |                 # Create beautiful stats table with a slightly different box
249 |                 stats_table = Table(box=box.MINIMAL, show_header=False, expand=True, padding=0)
250 |                 stats_table.add_column("Metric", style="dim cyan", width=15)
251 |                 stats_table.add_column("Value", style="white")
252 |                 stats_table.add_row("Input Tokens", f"[yellow]{input_tokens}[/yellow]")
253 |                 stats_table.add_row("Output Tokens", f"[green]{output_tokens}[/green]")
254 |                 stats_table.add_row("Total Tokens", f"[bold cyan]{total_tokens}[/bold cyan]")
255 |                 stats_table.add_row("Time", f"[yellow]{time_s:.2f}s[/yellow]")
256 |                 stats_table.add_row("Speed", f"[blue]{tokens_per_second:.1f} tok/s[/blue]")
257 |                 stats_table.add_row("Cost", f"[green]${cost:.6f}[/green]")
258 |                 
259 |                 # Combine as a single compact panel
260 |                 combined_panel = Panel(
261 |                     Group(
262 |                         model_panel,
263 |                         Align.center(stats_table)
264 |                     ),
265 |                     border_style=border_style,
266 |                     padding=(1, 1), # Reduced padding for combined view
267 |                     title=f"[bold]Response from {escape(model)}[/bold]"
268 |                 )
269 |                 
270 |                 # If there's reasoning content, show it directly
271 |                 reasoning_content = result_item.get("reasoning_content")
272 |                 reasoning_tokens = result_item.get("reasoning_tokens")
273 |                 
274 |                 if reasoning_content:
275 |                     reasoning_panel = Panel(
276 |                         escape(reasoning_content),
277 |                         title="[bold cyan]Reasoning Process[/bold cyan]",
278 |                         subtitle=f"[dim]Reasoning Tokens: {reasoning_tokens}[/dim]",
279 |                         border_style="cyan", # Cyan for reasoning/tools
280 |                         expand=True,
281 |                         # height=len(reasoning_content.splitlines()) + 4, # Adjust height
282 |                         padding=(1, 2) # Standard padding
283 |                     )
284 |                     # Group main result and reasoning
285 |                     comparison_panels.append(Group(combined_panel, reasoning_panel))
286 |                 else:
287 |                     comparison_panels.append(combined_panel)
288 | 
289 |             # Use Columns layout if exactly two results (and no errors resulted in fewer panels)
290 |             if len(comparison_panels) == 2 and len(comparison_panels) == len(results_data):
291 |                  console.print(Columns(comparison_panels, equal=True, expand=True))
292 |             else:
293 |                  # Otherwise, print panels sequentially
294 |                  for panel in comparison_panels:
295 |                      console.print(panel)
296 |         
297 |     except Exception as e:
298 |         logger.error(f"Error in model comparison: {str(e)}", emoji_key="error", exc_info=True)
299 | 
300 | 
301 | async def demonstrate_reasoning(tracker: CostTracker):
302 |     """Demonstrate Grok-mini reasoning capabilities."""
303 |     console.print(Rule("[bold cyan]⚡ Grok Reasoning Demonstration [/bold cyan]", style="bold blue"))
304 |     logger.info("Demonstrating Grok-mini reasoning capabilities", emoji_key="start")
305 |     
306 |     # Create Gateway instance - this handles provider initialization
307 |     gateway = Gateway("grok-demo", register_tools=False, provider_exclusions=[Provider.OPENROUTER.value])
308 |     
309 |     # Initialize providers
310 |     logger.info("Initializing providers...", emoji_key="provider")
311 |     await gateway._initialize_providers()
312 |     
313 |     provider_name = Provider.GROK
314 |     try:
315 |         # Get the provider from the gateway
316 |         provider = gateway.providers.get(provider_name)
317 |         if not provider:
318 |             logger.error(f"Provider {provider_name} not available or initialized", emoji_key="error")
319 |             return
320 |         
321 |         # Use a Grok mini model (ensure it's available)
322 |         model = "grok-3-mini-latest"
323 |         available_models = await provider.list_models()
324 |         model_names = [m["id"] for m in available_models]
325 |         
326 |         if model not in model_names:
327 |             # Find any mini model
328 |             for m in model_names:
329 |                 if "mini" in m:
330 |                     model = m
331 |                     break
332 |             else:
333 |                 logger.warning("No mini model available for reasoning demo. Using default model.", emoji_key="warning")
334 |                 model = provider.get_default_model()
335 |         
336 |         logger.info(f"Using model: {model}", emoji_key="model")
337 |         
338 |         # Problem requiring reasoning
339 |         problem = """
340 |         A cylindrical water tank has a radius of 3 meters and a height of 4 meters.
341 |         If water flows in at a rate of 2 cubic meters per minute, how long will it take to fill the tank?
342 |         Show your work step by step.
343 |         """
344 |         
345 |         # Consistent panel styling for prompt
346 |         console.print(Panel(
347 |             escape(problem.strip()),
348 |             title="[bold yellow]Math Problem[/bold yellow]",
349 |             border_style="yellow", # Yellow for prompts
350 |             expand=False,
351 |             padding=(1, 2) # Standard padding
352 |         ))
353 |         
354 |         with Progress(
355 |             TextColumn("[bold blue]Status:"),
356 |             BarColumn(complete_style="green", finished_style="green"),
357 |             TaskProgressColumn(),
358 |             TextColumn("[cyan]{task.description}"),
359 |             TimeElapsedColumn(),
360 |             console=console,
361 |             expand=True
362 |         ) as progress:
363 |             task = progress.add_task("[cyan]Thinking...", total=1)
364 |             
365 |             logger.info("Generating solution with reasoning", emoji_key="processing")
366 |             
367 |             result = await provider.generate_completion(
368 |                 prompt=problem,
369 |                 model=model,
370 |                 temperature=0.3,
371 |                 reasoning_effort="high",  # Use high reasoning effort
372 |                 max_tokens=1000
373 |             )
374 |             
375 |             # Track the cost
376 |             tracker.add_call(result)
377 |             
378 |             progress.update(task, description="Complete!", completed=1)
379 |         
380 |         logger.success("Reasoning solution completed", emoji_key="success")
381 |         
382 |         # Extract reasoning content
383 |         reasoning_content = None
384 |         reasoning_tokens = None
385 |         if result.metadata:
386 |             reasoning_content = result.metadata.get("reasoning_content")
387 |             reasoning_tokens = result.metadata.get("reasoning_tokens")
388 |         
389 |         # Create a more compact layout for reasoning demo
390 |         if reasoning_content:
391 |             reasoning_panel = Panel(
392 |                 escape(reasoning_content),
393 |                 title="[bold cyan]Thinking Process[/bold cyan]",
394 |                 subtitle=f"[dim]Reasoning Tokens: {reasoning_tokens}[/dim]",
395 |                 border_style="cyan", # Cyan for reasoning/tools
396 |                 expand=True,
397 |                 # height=len(reasoning_content.splitlines()) + 4, # Adjust height
398 |                 padding=(1, 2) # Standard padding
399 |             )
400 |         else:
401 |             reasoning_panel = Panel(
402 |                 "[italic]No explicit reasoning process available[/italic]",
403 |                 title="[bold cyan]Thinking Process[/bold cyan]",
404 |                 border_style="cyan", # Cyan for reasoning/tools
405 |                 expand=True,
406 |                 padding=(1, 2) # Standard padding
407 |             )
408 |         
409 |         # Format the answer
410 |         answer_panel = Panel(
411 |             escape(result.text.strip()),
412 |             title="[bold green]Final Solution[/bold green]",
413 |             subtitle=f"[dim]Tokens: {result.input_tokens} in, {result.output_tokens} out | Cost: ${result.cost:.6f} | Time: {result.processing_time:.2f}s[/dim]",
414 |             border_style="green", # Green for success/final result
415 |             expand=True,
416 |             # height=len(result.text.strip().splitlines()) + 4, # Adjust height
417 |             padding=(1, 2) # Standard padding
418 |         )
419 |         
420 |         # Use Group for better vertical spacing control than grid
421 |         console.print(Group(reasoning_panel, answer_panel))
422 |         
423 |     except Exception as e:
424 |         logger.error(f"Error in reasoning demonstration: {str(e)}", emoji_key="error", exc_info=True)
425 | 
426 | 
427 | async def demonstrate_function_calling(tracker: CostTracker):
428 |     """Demonstrate Grok function calling capabilities."""
429 |     console.print(Rule("[bold cyan]⚡ Grok Function Calling Demonstration [/bold cyan]", style="bold blue"))
430 |     logger.info("Demonstrating Grok function calling capabilities", emoji_key="start")
431 |     
432 |     # Create Gateway instance - this handles provider initialization
433 |     gateway = Gateway("grok-demo", register_tools=False, provider_exclusions=[Provider.OPENROUTER.value])
434 |     
435 |     # Initialize providers
436 |     logger.info("Initializing providers...", emoji_key="provider")
437 |     await gateway._initialize_providers()
438 |     
439 |     provider_name = Provider.GROK
440 |     try:
441 |         # Get the provider from the gateway
442 |         provider = gateway.providers.get(provider_name)
443 |         if not provider:
444 |             logger.error(f"Provider {provider_name} not available or initialized", emoji_key="error")
445 |             return
446 |         
447 |         # Use default Grok model
448 |         model = provider.get_default_model()
449 |         logger.info(f"Using model: {model}", emoji_key="model")
450 |         
451 |         # Define tools for the model to use
452 |         tools = [
453 |             {
454 |                 "type": "function",
455 |                 "function": {
456 |                     "name": "get_weather",
457 |                     "description": "Get the current weather in a given location",
458 |                     "parameters": {
459 |                         "type": "object",
460 |                         "properties": {
461 |                             "location": {
462 |                                 "type": "string",
463 |                                 "description": "The city and state, e.g. San Francisco, CA"
464 |                             },
465 |                             "unit": {
466 |                                 "type": "string",
467 |                                 "enum": ["celsius", "fahrenheit"],
468 |                                 "description": "The unit of temperature to use"
469 |                             }
470 |                         },
471 |                         "required": ["location"]
472 |                     }
473 |                 }
474 |             },
475 |             {
476 |                 "type": "function",
477 |                 "function": {
478 |                     "name": "get_flight_info",
479 |                     "description": "Get flight information between two cities",
480 |                     "parameters": {
481 |                         "type": "object",
482 |                         "properties": {
483 |                             "departure_city": {
484 |                                 "type": "string",
485 |                                 "description": "The departure city"
486 |                             },
487 |                             "arrival_city": {
488 |                                 "type": "string",
489 |                                 "description": "The arrival city"
490 |                             },
491 |                             "date": {
492 |                                 "type": "string",
493 |                                 "description": "The date of travel in YYYY-MM-DD format"
494 |                             }
495 |                         },
496 |                         "required": ["departure_city", "arrival_city"]
497 |                     }
498 |                 }
499 |             }
500 |         ]
501 |         
502 |         # Display tools in a Panel for consistency
503 |         tools_table = Table(title="[bold cyan]Available Tools[/bold cyan]", box=box.MINIMAL, show_header=True, header_style="bold magenta")
504 |         tools_table.add_column("Tool Name", style="cyan", no_wrap=True)
505 |         tools_table.add_column("Description", style="white")
506 |         tools_table.add_column("Parameters", style="green")
507 |         
508 |         for tool in tools:
509 |             function = tool["function"]
510 |             name = function["name"]
511 |             description = function["description"]
512 |             params = ", ".join([p for p in function["parameters"]["properties"]])
513 |             tools_table.add_row(name, description, params)
514 |         
515 |         console.print(Panel(tools_table, border_style="cyan", padding=(1, 2))) # Cyan for tools
516 |         
517 |         # User query
518 |         user_query = "I'm planning a trip from New York to Los Angeles next week. What's the weather like in LA, and can you help me find flight information?"
519 |         
520 |         # Consistent panel styling for prompt
521 |         console.print(Panel(
522 |             escape(user_query),
523 |             title="[bold yellow]User Query[/bold yellow]",
524 |             border_style="yellow", # Yellow for prompts
525 |             expand=False,
526 |             padding=(1, 2) # Standard padding
527 |         ))
528 |         
529 |         with Progress(
530 |             TextColumn("[bold blue]Status:"),
531 |             BarColumn(complete_style="green", finished_style="green"),
532 |             TaskProgressColumn(),
533 |             TextColumn("[cyan]{task.description}"),
534 |             TimeElapsedColumn(),
535 |             console=console,
536 |             expand=True
537 |         ) as progress:
538 |             task = progress.add_task("[cyan]Processing...", total=1)
539 |             
540 |             logger.info("Generating completion with function calling", emoji_key="processing")
541 |             
542 |             result = await provider.generate_completion(
543 |                 prompt=user_query,
544 |                 model=model,
545 |                 temperature=0.7,
546 |                 tools=tools,
547 |                 tool_choice="auto"
548 |             )
549 |             
550 |             # Track the cost
551 |             tracker.add_call(result)
552 |             
553 |             progress.update(task, description="Complete!", completed=1)
554 |         
555 |         logger.success("Function calling completed", emoji_key="success")
556 |         
557 |         # Check if there are tool calls in the response
558 |         tool_calls = None
559 |         if hasattr(result.raw_response.choices[0].message, 'tool_calls') and \
560 |            result.raw_response.choices[0].message.tool_calls:
561 |             tool_calls = result.raw_response.choices[0].message.tool_calls
562 |         
563 |         if tool_calls:
564 |             # Format the model response
565 |             response_text = escape(result.text.strip()) if result.text else "[italic dim]No direct text response, only tool calls.[/italic dim]"
566 |             response_info = f"[dim]Input Tokens: {result.input_tokens} | Output Tokens: {result.output_tokens} | Cost: ${result.cost:.6f}[/dim]"
567 |             
568 |             model_response_panel = Panel(
569 |                 response_text,
570 |                 title="[bold green]Model Response[/bold green]",
571 |                 subtitle=response_info,
572 |                 padding=(1, 2), # Standard padding
573 |                 border_style="green" # Green for success/results
574 |             )
575 | 
576 |             # Prepare panels for tool calls
577 |             tool_panels = []
578 |             for tool_call in tool_calls:
579 |                 # Parse JSON arguments
580 |                 try:
581 |                     args = json.loads(tool_call.function.arguments)
582 |                     args_formatted = f"[json]{escape(json.dumps(args, indent=2))}[/json]"
583 |                 except Exception:
584 |                     args_formatted = escape(tool_call.function.arguments)
585 |                 
586 |                 # Create compact tool call display content
587 |                 call_content_lines = [
588 |                     f"[bold cyan]Function:[/bold cyan] [magenta]{tool_call.function.name}[/magenta]",
589 |                     f"[bold cyan]Arguments:[/bold cyan]\n{args_formatted}"
590 |                 ]
591 | 
592 |                 # Add mock function result if available
593 |                 result_data = None
594 |                 if tool_call.function.name == "get_weather":
595 |                     location = args.get("location", "Unknown")
596 |                     unit = args.get("unit", "fahrenheit")
597 |                     temp = 75 if unit == "fahrenheit" else 24
598 |                     result_data = {
599 |                         "location": location,
600 |                         "temperature": temp,
601 |                         "unit": unit,
602 |                         "condition": "Sunny",
603 |                         "humidity": 65
604 |                     }
605 |                 elif tool_call.function.name == "get_flight_info":
606 |                     departure = args.get("departure_city", "Unknown")
607 |                     arrival = args.get("arrival_city", "Unknown")
608 |                     date = args.get("date", "2025-04-20") # noqa: F841
609 |                     result_data = {
610 |                         "flights": [
611 |                             {
612 |                                 "airline": "Delta", "flight": "DL1234",
613 |                                 "departure": f"{departure} 08:30 AM", "arrival": f"{arrival} 11:45 AM",
614 |                                 "price": "$349.99"
615 |                             },
616 |                             {
617 |                                 "airline": "United", "flight": "UA567",
618 |                                 "departure": f"{departure} 10:15 AM", "arrival": f"{arrival} 1:30 PM",
619 |                                 "price": "$289.99"
620 |                             }
621 |                         ]
622 |                     }
623 |                 
624 |                 if result_data:
625 |                     result_formatted = f"[json]{escape(json.dumps(result_data, indent=2))}[/json]"
626 |                     call_content_lines.append(f"\n[bold blue]Mock Result:[/bold blue]\n{result_formatted}")
627 |                 
628 |                 # Join content lines for the panel
629 |                 call_content = "\n".join(call_content_lines)
630 |                 
631 |                 tool_panel = Panel(
632 |                     call_content,
633 |                     title=f"[bold magenta]Tool Call: {tool_call.function.name}[/bold magenta]",
634 |                     subtitle=f"[dim]ID: {tool_call.id}[/dim]",
635 |                     border_style="magenta", # Magenta for specific tool calls
636 |                     padding=(1, 2) # Standard padding
637 |                 )
638 |                 tool_panels.append(tool_panel)
639 |             
640 |             # Use Columns for horizontal layout if multiple tool calls
641 |             if len(tool_panels) > 1:
642 |                 tool_call_display = Columns(tool_panels, equal=True, expand=True)
643 |             elif tool_panels:
644 |                 tool_call_display = tool_panels[0]
645 |             else: # Should not happen if tool_calls is true, but handle defensively
646 |                 tool_call_display = Text("No tool calls processed.", style="dim")
647 | 
648 |             # Create combined panel with response and tool calls
649 |             combined_panel = Panel(
650 |                 Group(
651 |                     model_response_panel,
652 |                     tool_call_display
653 |                 ),
654 |                 title="[bold green]Function Calling Results[/bold green]",
655 |                 border_style="green", # Green for overall success
656 |                 padding=(1, 1) # Slightly reduced outer padding
657 |             )
658 |             
659 |             console.print(combined_panel)
660 |         else:
661 |             # No tool calls, just display the response with consistent styling
662 |             console.print(Panel(
663 |                 escape(result.text.strip()),
664 |                 title="[bold green]Model Response (No Tool Calls)[/bold green]",
665 |                 subtitle=f"[dim]Tokens: {result.input_tokens} in, {result.output_tokens} out | Cost: ${result.cost:.6f}[/dim]",
666 |                 border_style="green", # Green for success/result
667 |                 padding=(1, 2) # Standard padding
668 |             ))
669 |         
670 |         console.print() # Keep spacing
671 |         
672 |     except Exception as e:
673 |         logger.error(f"Error in function calling demonstration: {str(e)}", emoji_key="error", exc_info=True)
674 | 
675 | 
676 | async def streaming_example(tracker: CostTracker):
677 |     """Demonstrate Grok streaming capabilities."""
678 |     console.print(Rule("[bold cyan]⚡ Grok Streaming Demonstration [/bold cyan]", style="bold blue"))
679 |     logger.info("Demonstrating Grok streaming capabilities", emoji_key="start")
680 |     
681 |     # Create Gateway instance - this handles provider initialization
682 |     gateway = Gateway("grok-demo", register_tools=False, provider_exclusions=[Provider.OPENROUTER.value])
683 |     
684 |     # Initialize providers
685 |     logger.info("Initializing providers...", emoji_key="provider")
686 |     await gateway._initialize_providers()
687 |     
688 |     provider_name = Provider.GROK
689 |     try:
690 |         # Get the provider from the gateway
691 |         provider = gateway.providers.get(provider_name)
692 |         if not provider:
693 |             logger.error(f"Provider {provider_name} not available or initialized", emoji_key="error")
694 |             return
695 |         
696 |         # Use default Grok model
697 |         model = provider.get_default_model()
698 |         logger.info(f"Using model: {model}", emoji_key="model")
699 |         
700 |         # Create prompt for streaming
701 |         prompt = "Write a short story about an AI that discovers emotions for the first time."
702 |         
703 |         # Consistent panel styling for prompt
704 |         console.print(Panel(
705 |             escape(prompt),
706 |             title="[bold yellow]Streaming Prompt[/bold yellow]",
707 |             border_style="yellow", # Yellow for prompts
708 |             expand=False,
709 |             padding=(1, 2) # Standard padding
710 |         ))
711 |         
712 |         # Create streaming panel with consistent styling
713 |         stream_panel = Panel(
714 |             "",
715 |             title=f"[bold green]Streaming Output from {model}[/bold green]",
716 |             subtitle="[dim]Live output...[/dim]",
717 |             border_style="green", # Green for results
718 |             expand=True,
719 |             height=15, # Slightly increased height for better visibility
720 |             padding=(1, 2) # Standard padding
721 |         )
722 |         
723 |         # Setup for streaming
724 |         logger.info("Starting stream", emoji_key="processing")
725 |         stream = provider.generate_completion_stream(
726 |             prompt=prompt,
727 |             model=model,
728 |             temperature=0.7,
729 |             max_tokens=500
730 |         )
731 |         
732 |         full_text = ""
733 |         chunk_count = 0
734 |         start_time = time.time()
735 |         
736 |         # Display streaming content with Rich Live display
737 |         with Live(stream_panel, console=console, refresh_per_second=10, vertical_overflow="visible") as live:
738 |             async for content, _metadata in stream:
739 |                 chunk_count += 1
740 |                 full_text += content
741 |                 
742 |                 # Update the live display
743 |                 # Ensure renderable is Text for better control if needed, though escape works
744 |                 stream_panel.renderable = Text(escape(full_text))
745 |                 stream_panel.subtitle = f"[dim]Received {chunk_count} chunks...[/dim]"
746 |                 live.update(stream_panel) # No need to pass stream_panel again
747 | 
748 |         # Final update to show completion
749 |         stream_panel.subtitle = f"[bold green]Stream Complete ({chunk_count} chunks)[/bold green]"
750 |         # Update the panel content one last time outside the live context
751 |         console.print(stream_panel)
752 | 
753 |         # Calculate stats
754 |         processing_time = time.time() - start_time
755 |         # More accurate token estimation might involve encoding, but keep simple for demo
756 |         estimated_tokens = len(full_text.split()) * 1.3 # Rough estimate
757 |         tokens_per_second = estimated_tokens / processing_time if processing_time > 0 else 0
758 |         
759 |         # Display final stats in a Panel with a Table
760 |         stats_table = Table(title="[bold blue]Streaming Stats[/bold blue]", box=box.MINIMAL, padding=(0,1), show_header=False)
761 |         stats_table.add_column("Metric", style="dim cyan")
762 |         stats_table.add_column("Value", style="white")
763 |         stats_table.add_row("Total Time", f"[yellow]{processing_time:.2f}s[/yellow]")
764 |         stats_table.add_row("Chunks Received", f"[green]{chunk_count}[/green]")
765 |         stats_table.add_row("Est. Output Tokens", f"[cyan]~{int(estimated_tokens)}[/cyan]")
766 |         stats_table.add_row("Est. Speed", f"[blue]{tokens_per_second:.1f} tok/s[/blue]")
767 |         
768 |         console.print(Panel(stats_table, border_style="blue", padding=(1, 2))) # Blue for info/stats
769 |         logger.success("Streaming completed", emoji_key="success")
770 |         
771 |     except Exception as e:
772 |         logger.error(f"Error in streaming demonstration: {str(e)}", emoji_key="error", exc_info=True)
773 | 
774 | 
775 | async def main():
776 |     """Run Grok integration examples."""
777 |     tracker = CostTracker()
778 |     try:
779 |         # Create title with padding
780 |         title = Text("⚡ Grok Integration Showcase ⚡", style="bold white on blue")
781 |         title.justify = "center"
782 |         # Add padding to the main title panel
783 |         console.print(Panel(title, box=box.DOUBLE_EDGE, padding=(1, 0))) # Vertical padding
784 |         
785 |         debug_console.print("[dim]Starting Grok integration demo in debug mode[/dim]")
786 |         
787 |         # Run model comparison
788 |         await compare_grok_models(tracker)
789 |         
790 |         console.print()  # Add space between sections
791 |         
792 |         # Run reasoning demonstration
793 |         await demonstrate_reasoning(tracker)
794 |         
795 |         console.print()  # Add space between sections
796 |         
797 |         # Run function calling demonstration
798 |         await demonstrate_function_calling(tracker)
799 |         
800 |         console.print()  # Add space between sections
801 |         
802 |         # Run streaming example
803 |         await streaming_example(tracker)
804 |         
805 |         # Display final summary
806 |         tracker.display_summary(console)
807 | 
808 |     except Exception as e:
809 |         logger.critical(f"Example failed: {str(e)}", emoji_key="critical", exc_info=True)
810 |         debug_console.print_exception(show_locals=True)
811 |         return 1
812 |     
813 |     logger.success("Grok Integration Demo Finished Successfully!", emoji_key="complete")
814 |     return 0
815 | 
816 | 
817 | if __name__ == "__main__":
818 |     exit_code = asyncio.run(main())
819 |     sys.exit(exit_code)
```