hanlulong/stata-mcp # codebase.md

This is page 4 of 4. Use http://codebase.md/hanlulong/stata-mcp?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .github
│   ├── .gitattributes
│   ├── CLI_USAGE.md
│   └── CONTRIBUTING.md
├── .gitignore
├── .vscodeignore
├── CHANGELOG.md
├── docs
│   ├── examples
│   │   ├── auto_report.pdf
│   │   └── jupyter.ipynb
│   ├── incidents
│   │   ├── CLAUDE_CLIENTS_STREAMING_COMPARISON.md
│   │   ├── CLAUDE_CODE_NOTIFICATION_DIAGNOSIS.md
│   │   ├── CLAUDE_CODE_NOTIFICATION_ISSUE.md
│   │   ├── DUAL_TRANSPORT.md
│   │   ├── FINAL_DIAGNOSIS.md
│   │   ├── FINAL_STATUS_REPORT.md
│   │   ├── FINAL_TIMEOUT_TEST_RESULTS.md
│   │   ├── KEEP_ALIVE_IMPLEMENTATION.md
│   │   ├── LONG_EXECUTION_ISSUE.md
│   │   ├── MCP_CLIENT_VERIFICATION_SUCCESS.md
│   │   ├── MCP_ERROR_FIX.md
│   │   ├── MCP_TIMEOUT_SOLUTION.md
│   │   ├── MCP_TRANSPORT_FIX.md
│   │   ├── NOTIFICATION_FIX_COMPLETE.md
│   │   ├── NOTIFICATION_FIX_VERIFIED.md
│   │   ├── NOTIFICATION_ROUTING_BUG.md
│   │   ├── PROGRESSIVE_OUTPUT_APPROACH.md
│   │   ├── README.md
│   │   ├── SESSION_ACCESS_SOLUTION.md
│   │   ├── SSE_STREAMING_IMPLEMENTATION.md
│   │   ├── STREAMING_DIAGNOSIS.md
│   │   ├── STREAMING_IMPLEMENTATION_GUIDE.md
│   │   ├── STREAMING_SOLUTION.md
│   │   ├── STREAMING_STATUS.md
│   │   ├── STREAMING_TEST_GUIDE.md
│   │   ├── TIMEOUT_FIX_SUMMARY.md
│   │   └── TIMEOUT_TEST_REPORT.md
│   ├── jupyter-stata.md
│   ├── jupyter-stata.zh-CN.md
│   ├── release_notes.md
│   ├── release_notes.zh-CN.md
│   ├── releases
│   │   └── INSTALL_v0.3.4.md
│   └── REPO_STRUCTURE.md
├── images
│   ├── demo_2x.gif
│   ├── demo.mp4
│   ├── jupyterlab.png
│   ├── JupyterLabExample.png
│   ├── logo.png
│   ├── pystata.png
│   ├── Stata_MCP_logo_144x144.png
│   └── Stata_MCP_logo_400x400.png
├── LICENSE
├── package.json
├── README.md
├── README.zh-CN.md
├── src
│   ├── check-python.js
│   ├── devtools
│   │   ├── prepare-npm-package.js
│   │   └── restore-vscode-package.js
│   ├── extension.js
│   ├── language-configuration.json
│   ├── requirements.txt
│   ├── start-server.js
│   ├── stata_mcp_server.py
│   └── syntaxes
│       └── stata.tmLanguage.json
└── tests
    ├── README.md
    ├── simple_mcp_test.py
    ├── test_gr_list_issue.do
    ├── test_graph_issue.do
    ├── test_graph_name_param.do
    ├── test_keepalive.do
    ├── test_log_location.do
    ├── test_notifications.py
    ├── test_stata.do
    ├── test_streaming_http.py
    ├── test_streaming.do
    ├── test_timeout_direct.py
    ├── test_timeout.do
    └── test_understanding.do
```

# Files

--------------------------------------------------------------------------------
/src/stata_mcp_server.py:
--------------------------------------------------------------------------------

```python
   1 | #!/usr/bin/env python
   2 | # -*- coding: utf-8 -*-
   3 | """
   4 | Stata MCP Server - Exposes Stata functionality to AI models via MCP protocol
   5 | Using fastapi-mcp for clean implementation
   6 | """
   7 | 
   8 | import os
   9 | import tempfile
  10 | import json
  11 | import sys
  12 | import time
  13 | import argparse
  14 | import logging
  15 | import platform
  16 | import signal
  17 | import subprocess
  18 | import traceback
  19 | import socket
  20 | import asyncio
  21 | from typing import Dict, Any, Optional
  22 | import warnings
  23 | import re
  24 | 
  25 | # Fix encoding issues on Windows for Unicode characters
  26 | if platform.system() == 'Windows':
  27 |     # Force UTF-8 encoding for stdout and stderr on Windows
  28 |     import io
  29 |     if sys.stdout.encoding != 'utf-8':
  30 |         sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace', line_buffering=True)
  31 |     if sys.stderr.encoding != 'utf-8':
  32 |         sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace', line_buffering=True)
  33 |     # Set environment variable for Python to use UTF-8
  34 |     os.environ['PYTHONIOENCODING'] = 'utf-8'
  35 | 
  36 | # Hide Python process from Mac Dock (server should be background process)
  37 | if platform.system() == 'Darwin':
  38 |     try:
  39 |         from AppKit import NSApplication
  40 |         # Set activation policy to accessory - hides dock icon but allows functionality
  41 |         # This must be called early, before any GUI operations (like Stata's JVM graphics)
  42 |         app = NSApplication.sharedApplication()
  43 |         # NSApplicationActivationPolicyAccessory = 1 (hidden from dock, can show windows)
  44 |         # NSApplicationActivationPolicyProhibited = 2 (completely hidden)
  45 |         app.setActivationPolicy_(1)  # Use Accessory to allow Stata's GUI operations
  46 |     except Exception:
  47 |         # Silently ignore if AppKit not available or fails
  48 |         # This is just a UI improvement, not critical for functionality
  49 |         pass
  50 | 
  51 | # Check if running as a module (using -m flag)
  52 | is_running_as_module = __name__ == "__main__" and not sys.argv[0].endswith('stata_mcp_server.py')
  53 | if is_running_as_module:
  54 |     print(f"Running as a module, using modified command-line handling")
  55 | 
  56 | # Check Python version on Windows but don't exit immediately to allow logging
  57 | if platform.system() == 'Windows':
  58 |     required_version = (3, 11)
  59 |     current_version = (sys.version_info.major, sys.version_info.minor)
  60 |     if current_version < required_version:
  61 |         print(f"WARNING: Python 3.11 or higher is recommended on Windows. Current version: {sys.version}")
  62 |         print("Please install Python 3.11 from python.org for best compatibility.")
  63 |         # Log this but don't exit immediately so logs can be written
  64 | 
  65 | try:
  66 |     from fastapi import FastAPI, Request, Response, Query
  67 |     from fastapi.responses import StreamingResponse
  68 |     from fastapi_mcp import FastApiMCP
  69 |     from pydantic import BaseModel, Field
  70 |     from contextlib import asynccontextmanager
  71 |     import httpx
  72 | except ImportError as e:
  73 |     print(f"ERROR: Required Python packages not found: {str(e)}")
  74 |     print("Please install the required packages:")
  75 |     print("pip install fastapi uvicorn fastapi-mcp pydantic")
  76 |     
  77 |     # On Windows, provide more guidance
  78 |     if platform.system() == 'Windows':
  79 |         print("\nOn Windows, you can install required packages by running:")
  80 |         print("py -3.11 -m pip install fastapi uvicorn fastapi-mcp pydantic")
  81 |         print("\nIf you need to install Python 3.11, download it from: https://www.python.org/downloads/")
  82 |     
  83 |     # Exit with error
  84 |     sys.exit(1)
  85 | 
  86 | # Configure logging - will be updated in main() with proper log file
  87 | # Start with basic console logging
  88 | logging.basicConfig(
  89 |     level=logging.INFO,  # Changed from DEBUG to INFO to reduce verbosity
  90 |     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
  91 |     stream=sys.stdout  # Default to stdout until log file is configured
  92 | )
  93 | 
  94 | # Create console handler for debugging
  95 | console_handler = logging.StreamHandler(sys.stdout)
  96 | console_handler.setLevel(logging.WARNING)  # Only show WARNING level and above to keep console output clean
  97 | formatter = logging.Formatter('%(levelname)s: %(message)s')
  98 | console_handler.setFormatter(formatter)
  99 | logging.getLogger().addHandler(console_handler)
 100 | 
 101 | # Silence uvicorn access logs but allow warnings
 102 | logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
 103 | logging.getLogger("uvicorn.error").setLevel(logging.WARNING)
 104 | 
 105 | # Server info
 106 | SERVER_NAME = "Stata MCP Server"
 107 | SERVER_VERSION = "1.0.0"
 108 | 
 109 | # Flag for Stata availability
 110 | stata_available = False
 111 | has_stata = False
 112 | stata = None  # Module-level reference to stata module
 113 | STATA_PATH = None
 114 | # Add a flag to track if we've already displayed the Stata banner
 115 | stata_banner_displayed = False
 116 | # Add a flag to track if MCP server is fully initialized
 117 | mcp_initialized = False
 118 | # Add a storage for continuous command history
 119 | command_history = []
 120 | # Store the current Stata edition
 121 | stata_edition = 'mp'  # Default to MP edition
 122 | # Store log file settings
 123 | log_file_location = 'extension'  # Default to extension directory
 124 | custom_log_directory = ''  # Custom log directory
 125 | extension_path = None  # Path to the extension directory
 126 | 
 127 | # Try to import pandas
 128 | try:
 129 |     import pandas as pd
 130 |     has_pandas = True
 131 |     logging.info("pandas module loaded successfully")
 132 | except ImportError:
 133 |     has_pandas = False
 134 |     logging.warning("pandas not available, data transfer functionality will be limited")
 135 |     warnings.warn("pandas not available, data transfer functionality will be limited")
 136 | 
 137 | # Try to initialize Stata with the given path
 138 | def try_init_stata(stata_path):
 139 |     """Try to initialize Stata with the given path"""
 140 |     global stata_available, has_stata, stata, STATA_PATH, stata_banner_displayed, stata_edition
 141 |     
 142 |     # If Stata is already available, don't re-initialize
 143 |     if stata_available and has_stata and stata is not None:
 144 |         logging.debug("Stata already initialized, skipping re-initialization")
 145 |         return True
 146 |     
 147 |     # Clean the path (remove quotes if present)
 148 |     if stata_path:
 149 |         # Remove any quotes that might have been added
 150 |         stata_path = stata_path.strip('"\'')
 151 |         STATA_PATH = stata_path
 152 |         logging.info(f"Using Stata path: {stata_path}")
 153 |     
 154 |     logging.info(f"Initializing Stata from path: {stata_path}")
 155 |     
 156 |     try:
 157 |         # Add environment variables to help with library loading
 158 |         if stata_path:
 159 |             if not os.path.exists(stata_path):
 160 |                 error_msg = f"Stata path does not exist: {stata_path}"
 161 |                 logging.error(error_msg)
 162 |                 print(f"ERROR: {error_msg}")
 163 |                 return False
 164 |                 
 165 |             os.environ['SYSDIR_STATA'] = stata_path
 166 |         
 167 |         stata_utilities_path = os.path.join(os.environ.get('SYSDIR_STATA', ''), 'utilities')
 168 |         if os.path.exists(stata_utilities_path):
 169 |             sys.path.insert(0, stata_utilities_path)
 170 |             logging.debug(f"Added Stata utilities path to sys.path: {stata_utilities_path}")
 171 |         else:
 172 |             warning_msg = f"Stata utilities path not found: {stata_utilities_path}"
 173 |             logging.warning(warning_msg)
 174 |             
 175 |         # Try to import pystata or stata-sfi
 176 |         try:
 177 |             # First try pystata
 178 |             from pystata import config
 179 |             logging.debug("Successfully imported pystata")
 180 |             
 181 |             # Try to initialize Stata 
 182 |             try:
 183 |                 # Only show banner once (suppress if we've shown it before)
 184 |                 if not stata_banner_displayed and platform.system() == 'Windows':
 185 |                     # On Windows, the banner appears even if we try to suppress it
 186 |                     # At least mark that we've displayed it
 187 |                     stata_banner_displayed = True
 188 |                     logging.debug("Stata banner will be displayed (first time)")
 189 |                 else:
 190 |                     # On subsequent initializations, try to suppress the banner
 191 |                     # This doesn't always work on Windows, but at least we're trying
 192 |                     logging.debug("Attempting to suppress Stata banner on re-initialization")
 193 |                     os.environ['STATA_QUIETLY'] = '1'  # Add this environment variable
 194 | 
 195 |                 # Set Java headless mode to prevent Dock icon on Mac (must be before config.init)
 196 |                 # When Stata's embedded JVM initializes for graphics, it normally creates a Dock icon
 197 |                 # Setting headless=true prevents this GUI behavior
 198 |                 if platform.system() == 'Darwin':
 199 |                     os.environ['JAVA_TOOL_OPTIONS'] = '-Djava.awt.headless=true'
 200 |                     logging.debug("Set Java headless mode to prevent Dock icon")
 201 | 
 202 |                 # Initialize with the specified Stata edition
 203 |                 config.init(stata_edition)
 204 |                 logging.info(f"Stata initialized successfully with {stata_edition.upper()} edition")
 205 | 
 206 |                 # Fix encoding for PyStata output on Windows
 207 |                 if platform.system() == 'Windows':
 208 |                     import io
 209 |                     # Replace PyStata's output file handle with UTF-8 encoded version
 210 |                     config.stoutputf = io.TextIOWrapper(
 211 |                         sys.stdout.buffer,
 212 |                         encoding='utf-8',
 213 |                         errors='replace',
 214 |                         line_buffering=True
 215 |                     )
 216 |                     logging.debug("Configured PyStata output with UTF-8 encoding for Windows")
 217 | 
 218 |                 # Now import stata after initialization
 219 |                 from pystata import stata as stata_module
 220 |                 # Set module-level stata reference
 221 |                 globals()['stata'] = stata_module
 222 |                 
 223 |                 # Successfully initialized Stata
 224 |                 has_stata = True
 225 |                 stata_available = True
 226 | 
 227 |                 # Initialize PNG export capability to prevent JVM crash in daemon threads (Mac-specific)
 228 |                 #
 229 |                 # Root cause: On Mac, Stata's graphics use embedded JVM. When PNG export is first
 230 |                 # called from a daemon thread, the JVM initialization fails with SIGBUS error in
 231 |                 # CodeHeap::allocate(). This is Mac-specific due to different JVM/threading model
 232 |                 # in libstata-mp.dylib compared to Windows stata-mp-64.dll.
 233 |                 #
 234 |                 # Solution: Initialize JVM in main thread by doing one PNG export at startup.
 235 |                 # All subsequent daemon thread PNG exports will reuse the initialized JVM.
 236 |                 #
 237 |                 # See: tests/MAC_SPECIFIC_ANALYSIS.md for detailed technical analysis
 238 |                 try:
 239 |                     from pystata.config import stlib, get_encode_str
 240 |                     import tempfile
 241 | 
 242 |                     # Create minimal dataset and graph (2 obs, 1 var)
 243 |                     stlib.StataSO_Execute(get_encode_str("qui clear"), False)
 244 |                     stlib.StataSO_Execute(get_encode_str("qui set obs 2"), False)
 245 |                     stlib.StataSO_Execute(get_encode_str("qui gen x=1"), False)
 246 |                     stlib.StataSO_Execute(get_encode_str("qui twoway scatter x x, name(_init, replace)"), False)
 247 | 
 248 |                     # Export tiny PNG (10x10px) to initialize JVM in main thread
 249 |                     # This prevents SIGBUS crash when daemon threads later export PNG
 250 |                     png_init = os.path.join(tempfile.gettempdir(), "_stata_png_init.png")
 251 |                     stlib.StataSO_Execute(get_encode_str(f'qui graph export "{png_init}", name(_init) replace width(10) height(10)'), False)
 252 |                     stlib.StataSO_Execute(get_encode_str("qui graph drop _init"), False)
 253 | 
 254 |                     # Cleanup temporary files
 255 |                     if os.path.exists(png_init):
 256 |                         os.unlink(png_init)
 257 | 
 258 |                     logging.debug("PNG export initialized successfully (Mac JVM fix)")
 259 |                 except Exception as png_init_error:
 260 |                     # Non-fatal: log but continue - PNG may still work on some platforms
 261 |                     logging.warning(f"PNG initialization failed (non-fatal): {str(png_init_error)}")
 262 | 
 263 |                 return True
 264 |             except Exception as init_error:
 265 |                 error_msg = f"Failed to initialize Stata: {str(init_error)}"
 266 |                 logging.error(error_msg)
 267 |                 print(f"ERROR: {error_msg}")
 268 |                 print("Will attempt to continue without full Stata integration")
 269 |                 print("Check if Stata is already running in another instance, or if your Stata license is valid")
 270 |                 
 271 |                 # Some features will still work without full initialization
 272 |                 has_stata = False
 273 |                 stata_available = False
 274 |                 
 275 |                 return False
 276 |         except ImportError as config_error:
 277 |             # Try stata-sfi as fallback
 278 |             try:
 279 |                 import stata_setup
 280 |                 
 281 |                 # Only show banner once
 282 |                 if not stata_banner_displayed and platform.system() == 'Windows':
 283 |                     stata_banner_displayed = True
 284 |                     logging.debug("Stata banner will be displayed (first time)")
 285 |                 else:
 286 |                     # On subsequent initializations, try to suppress the banner
 287 |                     logging.debug("Attempting to suppress Stata banner on re-initialization")
 288 |                     os.environ['STATA_QUIETLY'] = '1'
 289 |                 
 290 |                 stata_setup.config(stata_path, stata_edition)
 291 |                 logging.debug("Successfully configured stata_setup")
 292 |                 
 293 |                 try:
 294 |                     import sfi
 295 |                     # Set module-level stata reference for compatibility
 296 |                     globals()['stata'] = sfi
 297 |                     
 298 |                     has_stata = True
 299 |                     stata_available = True
 300 |                     logging.info("Stata initialized successfully using sfi")
 301 |                     
 302 |                     return True
 303 |                 except ImportError as sfi_error:
 304 |                     error_msg = f"Could not import sfi: {str(sfi_error)}"
 305 |                     logging.error(error_msg)
 306 |                     print(f"ERROR: {error_msg}")
 307 |                     has_stata = False
 308 |                     stata_available = False
 309 |                     return False
 310 |             except Exception as setup_error:
 311 |                 error_msg = f"Could not import pystata or sfi: {str(setup_error)}"
 312 |                 logging.error(error_msg)
 313 |                 print(f"ERROR: {error_msg}")
 314 |                 print("Stata commands will not be available")
 315 |             has_stata = False
 316 |             stata_available = False
 317 |             
 318 |             return False
 319 |     except Exception as e:
 320 |         error_msg = f"General error setting up Stata environment: {str(e)}"
 321 |         logging.error(error_msg)
 322 |         print(f"ERROR: {error_msg}")
 323 |         print("Stata commands will not be available")
 324 |         print(f"Check if the Stata path is correct: {stata_path}")
 325 |         print("And ensure Stata is properly licensed and not running in another process")
 326 |         has_stata = False
 327 |         stata_available = False
 328 |         
 329 |         return False
 330 | 
 331 | # Lock file mechanism removed - VS Code/Cursor handles extension instances properly
 332 | # If there are port conflicts, the server will fail to start cleanly
 333 | 
 334 | def get_log_file_path(do_file_path, do_file_base):
 335 |     """Get the appropriate log file path based on user settings
 336 | 
 337 |     Returns an absolute path to ensure log files are saved to the correct location
 338 |     regardless of Stata's working directory.
 339 |     """
 340 |     global log_file_location, custom_log_directory, extension_path
 341 | 
 342 |     if log_file_location == 'extension':
 343 |         # Use logs folder in extension directory
 344 |         if extension_path:
 345 |             logs_dir = os.path.join(extension_path, 'logs')
 346 |             # Create logs directory if it doesn't exist
 347 |             os.makedirs(logs_dir, exist_ok=True)
 348 |             log_path = os.path.join(logs_dir, f"{do_file_base}_mcp.log")
 349 |             return os.path.abspath(log_path)
 350 |         else:
 351 |             # Fallback to workspace if extension path is not available
 352 |             do_file_dir = os.path.dirname(do_file_path)
 353 |             log_path = os.path.join(do_file_dir, f"{do_file_base}_mcp.log")
 354 |             return os.path.abspath(log_path)
 355 |     elif log_file_location == 'custom':
 356 |         # Use custom directory
 357 |         if custom_log_directory and os.path.exists(custom_log_directory):
 358 |             log_path = os.path.join(custom_log_directory, f"{do_file_base}_mcp.log")
 359 |             return os.path.abspath(log_path)
 360 |         else:
 361 |             # Fallback to workspace if custom directory is invalid
 362 |             logging.warning(f"Custom log directory not valid: {custom_log_directory}, falling back to workspace")
 363 |             do_file_dir = os.path.dirname(do_file_path)
 364 |             log_path = os.path.join(do_file_dir, f"{do_file_base}_mcp.log")
 365 |             return os.path.abspath(log_path)
 366 |     else:  # workspace
 367 |         # Use same directory as .do file (original behavior)
 368 |         do_file_dir = os.path.dirname(do_file_path)
 369 |         log_path = os.path.join(do_file_dir, f"{do_file_base}_mcp.log")
 370 |         return os.path.abspath(log_path)
 371 | 
 372 | def resolve_do_file_path(file_path: str) -> tuple[Optional[str], list[str]]:
 373 |     """Resolve a .do file path to an absolute location, mirroring run_stata_file logic.
 374 | 
 375 |     Returns:
 376 |         A tuple of (resolved_path, tried_paths). resolved_path is None if the file
 377 |         could not be located. tried_paths contains the normalized paths that were examined.
 378 |     """
 379 |     original_path = file_path
 380 |     normalized_path = os.path.normpath(file_path)
 381 | 
 382 |     # Normalize Windows paths to use backslashes for consistency
 383 |     if platform.system() == "Windows" and '/' in normalized_path:
 384 |         normalized_path = normalized_path.replace('/', '\\')
 385 |         logging.info(f"Converted path for Windows: {normalized_path}")
 386 | 
 387 |     candidates: list[str] = []
 388 |     tried_paths: list[str] = []
 389 | 
 390 |     if not os.path.isabs(normalized_path):
 391 |         cwd = os.getcwd()
 392 |         logging.info(f"File path is not absolute. Current working directory: {cwd}")
 393 | 
 394 |         candidates.extend([
 395 |             normalized_path,
 396 |             os.path.join(cwd, normalized_path),
 397 |             os.path.join(cwd, os.path.basename(normalized_path)),
 398 |         ])
 399 | 
 400 |         if platform.system() == "Windows":
 401 |             if '/' in original_path:
 402 |                 win_path = original_path.replace('/', '\\')
 403 |                 candidates.append(win_path)
 404 |                 candidates.append(os.path.join(cwd, win_path))
 405 |             elif '\\' in original_path:
 406 |                 unix_path = original_path.replace('\\', '/')
 407 |                 candidates.append(unix_path)
 408 |                 candidates.append(os.path.join(cwd, unix_path))
 409 | 
 410 |         # Search subdirectories up to two levels deep for the file
 411 |         for root, dirs, files in os.walk(cwd, topdown=True, followlinks=False):
 412 |             if os.path.basename(normalized_path) in files and root != cwd:
 413 |                 subdir_path = os.path.join(root, os.path.basename(normalized_path))
 414 |                 candidates.append(subdir_path)
 415 | 
 416 |             # Limit depth to two levels
 417 |             if root.replace(cwd, '').count(os.sep) >= 2:
 418 |                 dirs[:] = []
 419 |     else:
 420 |         candidates.append(normalized_path)
 421 | 
 422 |     # Deduplicate while preserving order
 423 |     seen = set()
 424 |     unique_candidates = []
 425 |     for candidate in candidates:
 426 |         normalized_candidate = os.path.normpath(candidate)
 427 |         if normalized_candidate not in seen:
 428 |             seen.add(normalized_candidate)
 429 |             unique_candidates.append(normalized_candidate)
 430 | 
 431 |     for candidate in unique_candidates:
 432 |         tried_paths.append(candidate)
 433 |         if os.path.isfile(candidate) and candidate.lower().endswith('.do'):
 434 |             resolved = os.path.abspath(candidate)
 435 |             logging.info(f"Found file at: {resolved}")
 436 |             return resolved, tried_paths
 437 | 
 438 |     return None, tried_paths
 439 | 
 440 | def get_stata_path():
 441 |     """Get the Stata executable path based on the platform and configured path"""
 442 |     global STATA_PATH
 443 |     
 444 |     if not STATA_PATH:
 445 |         return None
 446 |         
 447 |     # Build the actual executable path based on the platform
 448 |     if platform.system() == "Windows":
 449 |         # On Windows, executable is StataMP.exe or similar
 450 |         # Try different executable names
 451 |         for exe_name in ["StataMP-64.exe", "StataMP.exe", "StataSE-64.exe", "StataSE.exe", "Stata-64.exe", "Stata.exe"]:
 452 |             exe_path = os.path.join(STATA_PATH, exe_name)
 453 |             if os.path.exists(exe_path):
 454 |                 return exe_path
 455 |                 
 456 |         # If no specific executable found, use the default path with StataMP.exe
 457 |         return os.path.join(STATA_PATH, "StataMP.exe")
 458 |     else:
 459 |         # On macOS, executable is StataMPC inside the app bundle
 460 |         if platform.system() == "Darwin":  # macOS
 461 |             # Check if STATA_PATH is the app bundle path
 462 |             if STATA_PATH.endswith(".app"):
 463 |                 # App bundle format like /Applications/Stata/StataMC.app
 464 |                 exe_path = os.path.join(STATA_PATH, "Contents", "MacOS", "StataMP")
 465 |                 if os.path.exists(exe_path):
 466 |                     return exe_path
 467 |                     
 468 |                 # Try other Stata variants    
 469 |                 for variant in ["StataSE", "Stata"]:
 470 |                     exe_path = os.path.join(STATA_PATH, "Contents", "MacOS", variant)
 471 |                     if os.path.exists(exe_path):
 472 |                         return exe_path
 473 |             else:
 474 |                 # Direct path like /Applications/Stata
 475 |                 for variant in ["StataMP", "StataSE", "Stata"]:
 476 |                     # Check if there's an app bundle inside the directory
 477 |                     app_path = os.path.join(STATA_PATH, f"{variant}.app")
 478 |                     if os.path.exists(app_path):
 479 |                         exe_path = os.path.join(app_path, "Contents", "MacOS", variant)
 480 |                         if os.path.exists(exe_path):
 481 |                             return exe_path
 482 |                             
 483 |                     # Also check for direct executable
 484 |                     exe_path = os.path.join(STATA_PATH, variant)
 485 |                     if os.path.exists(exe_path):
 486 |                         return exe_path
 487 |         else:
 488 |             # Linux - executable should be inside the path directly
 489 |             for variant in ["stata-mp", "stata-se", "stata"]:
 490 |                 exe_path = os.path.join(STATA_PATH, variant)
 491 |                 if os.path.exists(exe_path):
 492 |                     return exe_path
 493 |     
 494 |     # If we get here, we couldn't find the executable
 495 |     logging.error(f"Could not find Stata executable in {STATA_PATH}")
 496 |     return STATA_PATH  # Return the base path as fallback
 497 | 
 498 | def check_stata_installed():
 499 |     """Check if Stata is installed and available"""
 500 |     global stata_available
 501 |     
 502 |     # First check if we have working Python integration
 503 |     if stata_available and 'stata' in globals():
 504 |         return True
 505 |         
 506 |     # Otherwise check for executable
 507 |     stata_path = get_stata_path()
 508 |     if not stata_path:
 509 |         return False
 510 |         
 511 |     # Check if the file exists and is executable
 512 |     if not os.path.exists(stata_path):
 513 |         return False
 514 |         
 515 |     # On non-Windows, check if it's executable
 516 |     if platform.system() != "Windows" and not os.access(stata_path, os.X_OK):
 517 |         return False
 518 |         
 519 |     return True
 520 | 
 521 | # Function to run a Stata command
 522 | def run_stata_command(command: str, clear_history=False, auto_detect_graphs=False):
 523 |     """Run a Stata command
 524 | 
 525 |     Args:
 526 |         command: The Stata command to run
 527 |         clear_history: Whether to clear command history
 528 |         auto_detect_graphs: Whether to detect and export graphs after execution (default: False for MCP/LLM calls)
 529 | 
 530 |     Note: This function manually enables _gr_list on before execution and detects graphs after.
 531 |     We do NOT use inline=True because it calls _gr_list off at the end, clearing our graph list!
 532 |     This function is only called from /v1/tools endpoint which is excluded from MCP.
 533 |     """
 534 |     global stata_available, has_stata, command_history
 535 |     
 536 |     # Only log at debug level instead of info to reduce verbosity
 537 |     logging.debug(f"Running Stata command: {command}")
 538 |     
 539 |     # Clear history if requested
 540 |     if clear_history:
 541 |         logging.info(f"Clearing command history (had {len(command_history)} items)")
 542 |         command_history = []
 543 |         # If it's just a clear request with no command, return empty
 544 |         if not command or command.strip() == '':
 545 |             logging.info("Clear history request completed")
 546 |             return ''
 547 | 
 548 |     # For multi-line commands, don't add semicolons - just clean up whitespace
 549 |     if "\n" in command:
 550 |         # Clean up the commands to ensure proper formatting without adding semicolons
 551 |         command = "\n".join(line.strip() for line in command.splitlines() if line.strip())
 552 |         logging.debug(f"Processed multiline command: {command}")
 553 |     
 554 |     # Special handling for 'do' commands with file paths
 555 |     if command.lower().startswith('do '):
 556 |         # Extract the file path part
 557 |         parts = command.split(' ', 1)
 558 |         if len(parts) > 1:
 559 |             file_path = parts[1].strip()
 560 |             
 561 |             # Remove any existing quotes
 562 |             if (file_path.startswith('"') and file_path.endswith('"')) or \
 563 |                (file_path.startswith("'") and file_path.endswith("'")):
 564 |                 file_path = file_path[1:-1]
 565 |             
 566 |             # Normalize path for OS
 567 |             file_path = os.path.normpath(file_path)
 568 |             
 569 |             # On Windows, make sure backslashes are used
 570 |             if platform.system() == "Windows" and '/' in file_path:
 571 |                 file_path = file_path.replace('/', '\\')
 572 |                 logging.debug(f"Converted path for Windows: {file_path}")
 573 |             
 574 |             # For Stata's do command, ALWAYS use double quotes regardless of platform
 575 |             # This is the most reliable approach to handle spaces and special characters
 576 |             file_path = f'"{file_path}"'
 577 |             
 578 |             # Reconstruct the command with the properly formatted path
 579 |             command = f"do {file_path}"
 580 |             logging.debug(f"Reformatted 'do' command: {command}")
 581 |     
 582 |     # Check if pystata is available
 583 |     if has_stata and stata_available:
 584 |         # Run the command via pystata
 585 |         try:
 586 |             # Enable graph listing for this command using low-level API
 587 |             try:
 588 |                 from pystata.config import stlib, get_encode_str
 589 |                 logging.debug("Enabling graph listing with _gr_list on...")
 590 |                 stlib.StataSO_Execute(get_encode_str("qui _gr_list on"), False)
 591 |                 logging.debug("Successfully enabled graph listing")
 592 |             except Exception as e:
 593 |                 logging.warning(f"Could not enable graph listing: {str(e)}")
 594 |                 logging.debug(f"Graph listing enable error: {traceback.format_exc()}")
 595 | 
 596 |             # Initialize graphs list (will be populated if graphs are found)
 597 |             graphs_from_interactive = []
 598 | 
 599 |             # Create a temp file to capture output
 600 |             with tempfile.NamedTemporaryFile(
 601 | 
 602 |                 suffix='.do', delete=False, mode='w', encoding='utf-8'
 603 | 
 604 |             ) as f:
 605 |                 # Write the command to the file
 606 |                 f.write(f"capture log close _all\n")
 607 |                 f.write(f"log using \"{f.name}.log\", replace text\n")
 608 | 
 609 |                 # Process command line by line to comment out cls commands
 610 |                 cls_commands_found = 0
 611 |                 processed_command = ""
 612 |                 for line in command.splitlines():
 613 |                     # Ensure line is a string (defensive programming)
 614 |                     line = str(line) if line is not None else ""
 615 | 
 616 |                     # Check if this is a cls command
 617 |                     if re.match(r'^\s*cls\s*$', line, re.IGNORECASE):
 618 |                         processed_command += f"* COMMENTED OUT BY MCP: {line}\n"
 619 |                         cls_commands_found += 1
 620 |                     else:
 621 |                         processed_command += f"{line}\n"
 622 | 
 623 |                 if cls_commands_found > 0:
 624 |                     logging.info(f"Found and commented out {cls_commands_found} cls commands in the selection")
 625 | 
 626 |                 # Special handling for 'do' commands to ensure proper quoting
 627 |                 if command.lower().startswith('do '):
 628 |                     # For do commands, we need to make sure the file path is properly handled
 629 |                     # The command already has the file in quotes from the code above
 630 |                     f.write(f"{processed_command}")
 631 |                 else:
 632 |                     # Normal commands don't need special treatment
 633 |                     f.write(f"{processed_command}")
 634 | 
 635 |                 f.write(f"capture log close\n")
 636 |                 do_file = f.name
 637 | 
 638 |             # Execute the do file with echo=False to completely silence Stata output to console
 639 |             try:
 640 |                 # Redirect stdout temporarily to silence Stata output
 641 |                 original_stdout = sys.stdout
 642 |                 sys.stdout = open(os.devnull, 'w')
 643 |                 
 644 |                 try:
 645 |                     # Always use double quotes for the do file path for PyStata
 646 |                     run_cmd = f"do \"{do_file}\""
 647 |                     # Use inline=False because inline=True calls _gr_list off at the end!
 648 |                     globals()['stata'].run(run_cmd, echo=False, inline=False)
 649 |                     logging.debug(f"Command executed successfully via pystata: {run_cmd}")
 650 |                 except Exception as e:
 651 |                     # If command fails, try to reinitialize Stata once
 652 |                     logging.warning(f"Stata command failed, attempting to reinitialize: {str(e)}")
 653 |                     
 654 |                     # Try to reinitialize Stata with the global path
 655 |                     if STATA_PATH:
 656 |                         if try_init_stata(STATA_PATH):
 657 |                             # Retry the command if reinitialization succeeded
 658 |                             try:
 659 |                                 globals()['stata'].run(f"do \"{do_file}\"", echo=False, inline=False)
 660 |                                 logging.info(f"Command succeeded after Stata reinitialization")
 661 |                             except Exception as retry_error:
 662 |                                 logging.error(f"Command still failed after reinitializing Stata: {str(retry_error)}")
 663 |                                 raise retry_error
 664 |                         else:
 665 |                             logging.error(f"Failed to reinitialize Stata")
 666 |                             raise e
 667 |                     else:
 668 |                         logging.error(f"No Stata path available for reinitialization")
 669 |                         raise e
 670 |                 finally:
 671 |                     # Restore stdout
 672 |                     sys.stdout.close()
 673 |                     sys.stdout = original_stdout
 674 | 
 675 |                 # Only detect and export graphs if enabled (not from LLM/MCP)
 676 |                 if auto_detect_graphs:
 677 |                     # Immediately check for graphs while they're still in memory
 678 |                     # This happens right after stata.run() completes, before any cleanup
 679 |                     try:
 680 |                         logging.debug("Checking for graphs immediately after execution (interactive mode)...")
 681 |                         graphs_from_interactive = display_graphs_interactive(graph_format='png', width=800, height=600)
 682 |                         if graphs_from_interactive:
 683 |                             logging.info(f"Captured {len(graphs_from_interactive)} graphs in interactive mode")
 684 |                     except Exception as graph_err:
 685 |                         logging.warning(f"Could not capture graphs in interactive mode: {str(graph_err)}")
 686 | 
 687 |             except Exception as exec_error:
 688 |                 error_msg = f"Error running command: {str(exec_error)}"
 689 |                 logging.error(error_msg)
 690 |                 return error_msg
 691 | 
 692 |             # Read the log file
 693 |             log_file = f"{do_file}.log"
 694 |             logging.debug(f"Reading log file: {log_file}")
 695 |             
 696 |             # Wait for the log file to be written
 697 |             max_attempts = 10
 698 |             attempts = 0
 699 |             while not os.path.exists(log_file) and attempts < max_attempts:
 700 |                 time.sleep(0.3)
 701 |                 attempts += 1
 702 |             
 703 |             if not os.path.exists(log_file):
 704 |                 logging.error(f"Log file not created: {log_file}")
 705 |                 return "Command executed but no output was captured"
 706 |             
 707 |             # Wait a moment for file writing to complete
 708 |             time.sleep(0.5)
 709 |             
 710 |             try:
 711 |                 with open(log_file, 'r', encoding='utf-8', errors='replace') as f:
 712 |                     log_content = f.read()
 713 |                 
 714 |                 # MUCH SIMPLER APPROACH: Just filter beginning and end of log file
 715 |                 lines = log_content.strip().split('\n')
 716 |                 
 717 |                 # Find the first actual command (first line that starts with a dot that's not log related)
 718 |                 start_index = 0
 719 |                 for i, line in enumerate(lines):
 720 |                     if line.strip().startswith('.') and 'log ' not in line and 'capture log close' not in line:
 721 |                         # Found the first actual command, so output starts right after this
 722 |                         start_index = i + 1
 723 |                         break
 724 |                 
 725 |                 # Find end of output (the "capture log close" or "end of do-file" at the end)
 726 |                 end_index = len(lines)
 727 |                 for i in range(len(lines)-1, 0, -1):
 728 |                     if 'capture log close' in lines[i] or 'end of do-file' in lines[i]:
 729 |                         end_index = i
 730 |                         break
 731 |                 
 732 |                 # Extract just the middle part (the actual output)
 733 |                 result_lines = []
 734 |                 for i in range(start_index, end_index):
 735 |                     line = lines[i].rstrip()  # Remove trailing whitespace
 736 |                     
 737 |                     # Skip empty lines at beginning or end
 738 |                     if not line.strip():
 739 |                         continue
 740 |                     
 741 |                     # Keep command lines (don't filter out lines starting with '.')
 742 |                     
 743 |                     # Remove consecutive blank lines (keep just one)
 744 |                     if (not line.strip() and result_lines and not result_lines[-1].strip()):
 745 |                         continue
 746 |                         
 747 |                     result_lines.append(line)
 748 |                 
 749 |                 # Clean up temporary files
 750 |                 try:
 751 |                     os.unlink(do_file)
 752 |                     os.unlink(log_file)
 753 |                 except Exception as e:
 754 |                     logging.warning(f"Could not delete temporary files: {str(e)}")
 755 |                 
 756 |                 # Add timestamp to the result
 757 |                 timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
 758 |                 command_entry = f"[{timestamp}] {command}"
 759 |                 
 760 |                 # Return properly formatted output
 761 |                 if not result_lines:
 762 |                     result = "Command executed successfully (no output)"
 763 |                 else:
 764 |                     result = "\n".join(result_lines)
 765 | 
 766 |                 # Use graphs captured in interactive mode (if any)
 767 |                 # These were already captured right after execution while still in memory
 768 |                 if graphs_from_interactive:
 769 |                     graph_info = "\n\n" + "="*60 + "\n"
 770 |                     graph_info += f"GRAPHS DETECTED: {len(graphs_from_interactive)} graph(s) created\n"
 771 |                     graph_info += "="*60 + "\n"
 772 |                     for graph in graphs_from_interactive:
 773 |                         # Include command if available, using special format for JavaScript parsing
 774 |                         if 'command' in graph and graph['command']:
 775 |                             graph_info += f"  • {graph['name']}: {graph['path']} [CMD: {graph['command']}]\n"
 776 |                         else:
 777 |                             graph_info += f"  • {graph['name']}: {graph['path']}\n"
 778 |                     result += graph_info
 779 |                     logging.info(f"Added {len(graphs_from_interactive)} graphs to output (from interactive mode)")
 780 |                 else:
 781 |                     logging.debug("No graphs were captured in interactive mode")
 782 | 
 783 |                 # Disable graph listing after detection
 784 |                 try:
 785 |                     from pystata.config import stlib, get_encode_str
 786 |                     stlib.StataSO_Execute(get_encode_str("qui _gr_list off"), False)
 787 |                     logging.debug("Disabled graph listing")
 788 |                 except Exception as e:
 789 |                     logging.warning(f"Could not disable graph listing: {str(e)}")
 790 | 
 791 |                 # For interactive window, just return the current result
 792 |                 # The client will handle displaying history
 793 |                 return result
 794 |                 
 795 |             except Exception as e:
 796 |                 error_msg = f"Error reading log file: {str(e)}"
 797 |                 logging.error(error_msg)
 798 |                 return error_msg
 799 |                 
 800 |         except Exception as e:
 801 |             error_msg = f"Error executing Stata command: {str(e)}"
 802 |             logging.error(error_msg)
 803 |             # Add to command history
 804 |             timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
 805 |             command_entry = f"[{timestamp}] {command}"
 806 |             command_history.append({"command": command_entry, "result": error_msg})
 807 |             return error_msg
 808 |             
 809 |     else:
 810 |         error_msg = "Stata is not available. Please check if Stata is installed and configured correctly."
 811 |         logging.error(error_msg)
 812 |         # Add to command history
 813 |         timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
 814 |         command_entry = f"[{timestamp}] {command}"
 815 |         command_history.append({"command": command_entry, "result": error_msg})
 816 |         return error_msg
 817 | 
 818 | def detect_and_export_graphs():
 819 |     """Detect and export any graphs created by Stata commands
 820 | 
 821 |     Returns:
 822 |         List of dictionaries with graph info: [{"name": "graph1", "path": "/path/to/graph.png"}, ...]
 823 |     """
 824 |     global stata_available, has_stata, extension_path
 825 | 
 826 |     if not (has_stata and stata_available):
 827 |         return []
 828 | 
 829 |     try:
 830 |         import sfi
 831 |         from pystata.config import stlib, get_encode_str
 832 | 
 833 |         # Get list of graphs using low-level API like PyStata does
 834 |         logging.debug("Checking for graphs using _gr_list (low-level API)...")
 835 | 
 836 |         # Get the list (_gr_list should already be on from before command execution)
 837 |         rc = stlib.StataSO_Execute(get_encode_str("qui _gr_list list"), False)
 838 |         logging.debug(f"_gr_list list returned rc={rc}")
 839 |         gnamelist = sfi.Macro.getGlobal("r(_grlist)")
 840 |         logging.debug(f"r(_grlist) returned: '{gnamelist}' (type: {type(gnamelist)}, length: {len(gnamelist) if gnamelist else 0})")
 841 | 
 842 |         if not gnamelist:
 843 |             logging.debug("No graphs found (gnamelist is empty)")
 844 |             return []
 845 | 
 846 |         graphs_info = []
 847 |         graph_names = gnamelist.split()
 848 |         logging.info(f"Found {len(graph_names)} graph(s): {graph_names}")
 849 | 
 850 |         # Create graphs directory in extension path or temp
 851 |         if extension_path:
 852 |             graphs_dir = os.path.join(extension_path, 'graphs')
 853 |         else:
 854 |             graphs_dir = os.path.join(tempfile.gettempdir(), 'stata_mcp_graphs')
 855 | 
 856 |         os.makedirs(graphs_dir, exist_ok=True)
 857 |         logging.debug(f"Exporting graphs to: {graphs_dir}")
 858 | 
 859 |         # Export each graph to PNG
 860 |         for i, gname in enumerate(graph_names):
 861 |             try:
 862 |                 # Display the graph first using low-level API
 863 |                 # Stata graph names should not be quoted in graph display command
 864 |                 gph_disp = f'qui graph display {gname}'
 865 |                 rc = stlib.StataSO_Execute(get_encode_str(gph_disp), False)
 866 |                 if rc != 0:
 867 |                     logging.warning(f"Failed to display graph '{gname}' (rc={rc})")
 868 |                     continue
 869 | 
 870 |                 # Export as PNG (best for VS Code display)
 871 |                 # Use a sanitized filename but keep the original name for the name() option
 872 |                 graph_file = os.path.join(graphs_dir, f'{gname}.png')
 873 |                 # The name() option does NOT need quotes - it's a Stata name, not a string
 874 |                 gph_exp = f'qui graph export "{graph_file}", name({gname}) replace width(800) height(600)'
 875 | 
 876 |                 logging.debug(f"Executing graph export command: {gph_exp}")
 877 |                 rc = stlib.StataSO_Execute(get_encode_str(gph_exp), False)
 878 |                 if rc != 0:
 879 |                     logging.warning(f"Failed to export graph '{gname}' (rc={rc})")
 880 |                     continue
 881 | 
 882 |                 if os.path.exists(graph_file):
 883 |                     graphs_info.append({
 884 |                         "name": gname,
 885 |                         "path": graph_file
 886 |                     })
 887 |                     logging.info(f"Exported graph '{gname}' to {graph_file}")
 888 |                 else:
 889 |                     logging.warning(f"Failed to export graph '{gname}' - file not created")
 890 | 
 891 |             except Exception as e:
 892 |                 logging.error(f"Error exporting graph '{gname}': {str(e)}")
 893 |                 continue
 894 | 
 895 |         return graphs_info
 896 | 
 897 |     except Exception as e:
 898 |         logging.error(f"Error detecting graphs: {str(e)}")
 899 |         return []
 900 | 
 901 | def display_graphs_interactive(graph_format='png', width=800, height=600):
 902 |     """Display graphs using PyStata's interactive approach (similar to Jupyter)
 903 | 
 904 |     This function mimics PyStata's grdisplay.py approach for exporting graphs.
 905 |     It should be called immediately after command execution while graphs are still in memory.
 906 | 
 907 |     Args:
 908 |         graph_format: Format for exported graphs ('svg', 'png', or 'pdf')
 909 |         width: Width for graph export (pixels for png, inches for svg/pdf)
 910 |         height: Height for graph export (pixels for png, inches for svg/pdf)
 911 | 
 912 |     Returns:
 913 |         List of dictionaries with graph info: [{"name": "graph1", "path": "/path/to/graph.png", "format": "png", "command": "scatter y x"}, ...]
 914 |     """
 915 |     global stata_available, has_stata, extension_path
 916 | 
 917 |     if not (has_stata and stata_available):
 918 |         return []
 919 | 
 920 |     try:
 921 |         import sfi
 922 |         from pystata.config import stlib, get_encode_str
 923 | 
 924 |         # Use the same approach as PyStata's grdisplay.py
 925 |         logging.debug(f"Interactive graph display: checking for graphs (format: {graph_format})...")
 926 | 
 927 |         # Get the list of graphs (_gr_list should already be on from before file execution)
 928 |         rc = stlib.StataSO_Execute(get_encode_str("qui _gr_list list"), False)
 929 |         logging.debug(f"_gr_list list returned rc={rc}")
 930 |         gnamelist = sfi.Macro.getGlobal("r(_grlist)")
 931 |         logging.debug(f"r(_grlist) returned: '{gnamelist}' (type: {type(gnamelist)}, length: {len(gnamelist) if gnamelist else 0})")
 932 | 
 933 |         if not gnamelist:
 934 |             logging.debug("No graphs found in interactive mode")
 935 |             return []
 936 | 
 937 |         graphs_info = []
 938 |         graph_names = gnamelist.split()
 939 |         logging.info(f"Found {len(graph_names)} graph(s) in interactive mode: {graph_names}")
 940 | 
 941 |         # Create graphs directory
 942 |         if extension_path:
 943 |             graphs_dir = os.path.join(extension_path, 'graphs')
 944 |         else:
 945 |             graphs_dir = os.path.join(tempfile.gettempdir(), 'stata_mcp_graphs')
 946 | 
 947 |         os.makedirs(graphs_dir, exist_ok=True)
 948 |         logging.debug(f"Exporting graphs to: {graphs_dir}")
 949 | 
 950 |         # Export each graph using PyStata's approach
 951 |         for i, gname in enumerate(graph_names):
 952 |             try:
 953 |                 # Display the graph first (required before export)
 954 |                 # Stata graph names should not be quoted in graph display command
 955 |                 gph_disp = f'qui graph display {gname}'
 956 |                 logging.debug(f"Displaying graph: {gph_disp}")
 957 |                 rc = stlib.StataSO_Execute(get_encode_str(gph_disp), False)
 958 |                 if rc != 0:
 959 |                     logging.warning(f"Failed to display graph '{gname}' (rc={rc})")
 960 |                     continue
 961 | 
 962 |                 # Determine file extension and export command based on format
 963 |                 if graph_format == 'svg':
 964 |                     graph_file = os.path.join(graphs_dir, f'{gname}.svg')
 965 |                     if width and height:
 966 |                         gph_exp = f'qui graph export "{graph_file}", name({gname}) replace width({width}) height({height})'
 967 |                     else:
 968 |                         gph_exp = f'qui graph export "{graph_file}", name({gname}) replace'
 969 |                 elif graph_format == 'pdf':
 970 |                     graph_file = os.path.join(graphs_dir, f'{gname}.pdf')
 971 |                     # For PDF, use xsize/ysize instead of width/height
 972 |                     if width and height:
 973 |                         gph_exp = f'qui graph export "{graph_file}", name({gname}) replace xsize({width/96:.2f}) ysize({height/96:.2f})'
 974 |                     else:
 975 |                         gph_exp = f'qui graph export "{graph_file}", name({gname}) replace'
 976 |                 else:  # png (default)
 977 |                     graph_file = os.path.join(graphs_dir, f'{gname}.png')
 978 |                     if width and height:
 979 |                         gph_exp = f'qui graph export "{graph_file}", name({gname}) replace width({width}) height({height})'
 980 |                     else:
 981 |                         gph_exp = f'qui graph export "{graph_file}", name({gname}) replace width(800) height(600)'
 982 | 
 983 |                 # Export the graph
 984 |                 logging.debug(f"Exporting graph: {gph_exp}")
 985 |                 rc = stlib.StataSO_Execute(get_encode_str(gph_exp), False)
 986 |                 if rc != 0:
 987 |                     logging.warning(f"Failed to export graph '{gname}' (rc={rc})")
 988 |                     continue
 989 | 
 990 |                 if os.path.exists(graph_file):
 991 |                     graph_dict = {
 992 |                         "name": gname,
 993 |                         "path": graph_file,
 994 |                         "format": graph_format
 995 |                     }
 996 |                     graphs_info.append(graph_dict)
 997 |                     logging.info(f"Exported graph '{gname}' to {graph_file} (format: {graph_format})")
 998 |                 else:
 999 |                     logging.warning(f"Graph file not found after export: {graph_file}")
1000 | 
1001 |             except Exception as e:
1002 |                 logging.error(f"Error exporting graph '{gname}': {str(e)}")
1003 |                 continue
1004 | 
1005 |         return graphs_info
1006 | 
1007 |     except Exception as e:
1008 |         logging.error(f"Error in interactive graph display: {str(e)}")
1009 |         logging.debug(f"Interactive display error details: {traceback.format_exc()}")
1010 |         return []
1011 | 
1012 | def run_stata_selection(selection, working_dir=None, auto_detect_graphs=False):
1013 |     """Run selected Stata code
1014 | 
1015 |     Args:
1016 |         selection: The Stata code to run
1017 |         working_dir: Optional working directory to change to before execution
1018 |         auto_detect_graphs: Whether to detect and export graphs (default: False for MCP/LLM calls)
1019 |     """
1020 |     # If a working directory is provided, prepend a cd command
1021 |     if working_dir and os.path.isdir(working_dir):
1022 |         logging.info(f"Changing working directory to: {working_dir}")
1023 |         # Normalize path for the OS
1024 |         working_dir = os.path.normpath(working_dir)
1025 |         # On Windows, ensure backslashes
1026 |         if platform.system() == "Windows":
1027 |             working_dir = working_dir.replace('/', '\\')
1028 |         # Use double quotes for the cd command to handle spaces
1029 |         cd_command = f'cd "{working_dir}"'
1030 |         # Combine cd command with the selection
1031 |         full_command = f"{cd_command}\n{selection}"
1032 |         return run_stata_command(full_command, auto_detect_graphs=auto_detect_graphs)
1033 |     else:
1034 |         return run_stata_command(selection, auto_detect_graphs=auto_detect_graphs)
1035 | 
1036 | def run_stata_file(file_path: str, timeout=600, auto_name_graphs=False):
1037 |     """Run a Stata .do file with improved handling for long-running processes
1038 | 
1039 |     Args:
1040 |         file_path: The path to the .do file to run
1041 |         timeout: Timeout in seconds (default: 600 seconds / 10 minutes)
1042 |         auto_name_graphs: Whether to automatically add names to graphs (default: False for MCP/LLM calls)
1043 |     """
1044 |     # Set timeout from parameter instead of hardcoding
1045 |     MAX_TIMEOUT = timeout
1046 |     
1047 |     try:
1048 |         original_path = file_path
1049 | 
1050 |         resolved_path, tried_paths = resolve_do_file_path(file_path)
1051 |         if not resolved_path:
1052 |             tried_display = ', '.join(tried_paths) if tried_paths else os.path.normpath(file_path)
1053 |             error_msg = f"Error: File not found: {original_path}. Tried these paths: {tried_display}"
1054 |             logging.error(error_msg)
1055 |             
1056 |             # Add more helpful error message for Windows
1057 |             if platform.system() == "Windows":
1058 |                 error_msg += "\n\nCommon Windows path issues:\n"
1059 |                 error_msg += "1. Make sure the file path uses correct separators (use \\ instead of /)\n"
1060 |                 error_msg += "2. Check if the file exists in the specified location\n"
1061 |                 error_msg += "3. If using relative paths, the current working directory is: " + os.getcwd()
1062 |             
1063 |             return error_msg
1064 |         
1065 |         file_path = resolved_path
1066 |         
1067 |         # Verify file exists (final check)
1068 |         if not os.path.exists(file_path):
1069 |             error_msg = f"Error: File not found: {file_path}"
1070 |             logging.error(error_msg)
1071 |             
1072 |             # Add more helpful error message for Windows
1073 |             if platform.system() == "Windows":
1074 |                 error_msg += "\n\nCommon Windows path issues:\n"
1075 |                 error_msg += "1. Make sure the file path uses correct separators (use \\ instead of /)\n"
1076 |                 error_msg += "2. Check if the file exists in the specified location\n"
1077 |                 error_msg += "3. If using relative paths, the current working directory is: " + os.getcwd()
1078 |             
1079 |             return error_msg
1080 |             
1081 |         # Check file extension
1082 |         if not file_path.lower().endswith('.do'):
1083 |             error_msg = f"Error: File must be a Stata .do file with .do extension: {file_path}"
1084 |             logging.error(error_msg)
1085 |             return error_msg
1086 | 
1087 |         logging.info(f"Running Stata do file: {file_path}")
1088 | 
1089 |         # Ensure file_path is absolute for consistent behavior
1090 |         file_path = os.path.abspath(file_path)
1091 | 
1092 |         # Get the directory and filename for later use
1093 |         do_file_dir = os.path.dirname(file_path)  # This is now guaranteed to be absolute
1094 |         do_file_name = os.path.basename(file_path)
1095 |         do_file_base = os.path.splitext(do_file_name)[0]
1096 | 
1097 |         # Create a custom log file path based on user settings
1098 |         # The log file path will be absolute, allowing it to be saved anywhere
1099 |         # regardless of Stata's current working directory
1100 |         custom_log_file = get_log_file_path(file_path, do_file_base)
1101 |         logging.info(f"Will save log to: {custom_log_file}")
1102 |         
1103 |         # Read the do file content
1104 |         do_file_content = ""
1105 |         try:
1106 |             with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
1107 |                 do_file_content = f.read()
1108 | 
1109 |             # Create a modified version with log commands commented out and auto-name graphs
1110 |             modified_content = ""
1111 |             log_commands_found = 0
1112 |             graph_counter = 0
1113 | 
1114 |             # Process line by line to comment out log commands and add graph names where needed
1115 |             cls_commands_found = 0
1116 |             for line in do_file_content.splitlines():
1117 |                 # Ensure line is a string (defensive programming)
1118 |                 line = str(line) if line is not None else ""
1119 | 
1120 |                 # Check if this line has a log command
1121 |                 if re.match(r'^\s*(log\s+using|log\s+close|capture\s+log\s+close)', line, re.IGNORECASE):
1122 |                     modified_content += f"* COMMENTED OUT BY MCP: {line}\n"
1123 |                     log_commands_found += 1
1124 |                     continue
1125 | 
1126 |                 # Check if this is a cls command
1127 |                 if re.match(r'^\s*cls\s*$', line, re.IGNORECASE):
1128 |                     modified_content += f"* COMMENTED OUT BY MCP: {line}\n"
1129 |                     cls_commands_found += 1
1130 |                     continue
1131 | 
1132 |                 # Only auto-name graphs if called from VS Code extension (not from LLM/MCP)
1133 |                 if auto_name_graphs:
1134 |                     # Check if this is a graph creation command that might need a name
1135 |                     # Match: scatter, histogram, twoway, kdensity, graph bar/box/dot/etc (but not graph export)
1136 |                     graph_match = re.match(r'^(\s*)(scatter|histogram|twoway|kdensity|graph\s+(bar|box|dot|pie|matrix|hbar|hbox|combine))\s+(.*)$', line, re.IGNORECASE)
1137 | 
1138 |                     if graph_match:
1139 |                         indent = str(graph_match.group(1) or "")
1140 |                         graph_cmd = str(graph_match.group(2) or "")
1141 | 
1142 |                         # Extract and ensure rest is a string
1143 |                         rest_raw = graph_match.group(4) if graph_match.lastindex >= 4 else ""
1144 |                         if rest_raw is None:
1145 |                             rest_raw = ""
1146 |                         # Force conversion to string to handle any edge cases
1147 |                         rest = str(rest_raw)
1148 | 
1149 |                         # Double-check rest is a string before any operations
1150 |                         if not isinstance(rest, str):
1151 |                             logging.warning(f"rest is not a string, type: {type(rest)}, value: {rest}, converting to string")
1152 |                             rest = str(rest)
1153 | 
1154 |                         # Check if it already has name() option
1155 |                         if not re.search(r'\bname\s*\(', rest, re.IGNORECASE):
1156 |                             # Add automatic unique name
1157 |                             graph_counter += 1
1158 |                             graph_name = f"graph{graph_counter}"
1159 | 
1160 |                             # Add name option - if there's a comma, add after it; otherwise add with comma
1161 |                             if ',' in rest:
1162 |                                 # Insert name option right after the first comma
1163 |                                 # Ensure rest is definitely a string before re.sub
1164 |                                 rest = str(rest)
1165 |                                 rest = re.sub(r',', f', name({graph_name}, replace)', rest, 1)
1166 |                             else:
1167 |                                 # No comma yet, add it
1168 |                                 rest = rest.rstrip() + f', name({graph_name}, replace)'
1169 | 
1170 |                             modified_content += f"{indent}{graph_cmd} {rest}\n"
1171 |                             logging.debug(f"Auto-named graph: {graph_name}")
1172 |                             continue
1173 | 
1174 |                 # Keep line as-is (including graph export commands)
1175 |                 modified_content += f"{line}\n"
1176 | 
1177 |             logging.info(f"Found and commented out {log_commands_found} log commands in the do file")
1178 |             if cls_commands_found > 0:
1179 |                 logging.info(f"Found and commented out {cls_commands_found} cls commands in the do file")
1180 |             if graph_counter > 0:
1181 |                 logging.info(f"Auto-named {graph_counter} graph commands")
1182 |             
1183 |             # Save the modified content to a temporary file
1184 |             with tempfile.NamedTemporaryFile(
1185 | 
1186 |                 suffix='.do', delete=False, mode='w', encoding='utf-8'
1187 | 
1188 |             ) as temp_do:
1189 |                 # First close any existing log files
1190 |                 temp_do.write(f"capture log close _all\n")
1191 |                 # Clean up Stata session state to prevent pollution from interrupted executions
1192 |                 # Drop all temporary programs (especially loop programs like 1while, 2while, etc.)
1193 |                 temp_do.write(f"capture program drop _all\n")
1194 |                 # Clear all macros to prevent conflicts
1195 |                 temp_do.write(f"capture macro drop _all\n")
1196 |                 # Change working directory to the .do file's directory
1197 |                 # This ensures the .do file executes in its workspace (relative paths work correctly)
1198 |                 # The log file uses an absolute path, so it's saved to the configured location
1199 |                 temp_do.write(f"cd \"{do_file_dir}\"\n")
1200 |                 # Note: _gr_list on is enabled externally before .do file execution
1201 |                 # Note: Graph names are auto-injected above into modified_content
1202 |                 # Then add our own log command with absolute path
1203 |                 temp_do.write(f"log using \"{custom_log_file}\", replace text\n")
1204 |                 temp_do.write(modified_content)
1205 |                 temp_do.write(f"\ncapture log close _all\n")  # Ensure all logs are closed at the end
1206 |                 # Note: We intentionally do NOT disable _gr_list so graphs persist for detection
1207 |                 modified_do_file = temp_do.name
1208 |                 
1209 |             logging.info(f"Created modified do file at {modified_do_file}")
1210 |                 
1211 |         except Exception as e:
1212 |             import traceback
1213 |             error_msg = f"Error processing do file: {str(e)}"
1214 |             logging.error(error_msg)
1215 |             logging.error(f"Traceback: {traceback.format_exc()}")
1216 |             # Include line number and more details
1217 |             tb = traceback.extract_tb(e.__traceback__)
1218 |             if tb:
1219 |                 last_frame = tb[-1]
1220 |                 error_msg += f"\n  at line {last_frame.lineno} in {last_frame.name}"
1221 |             return error_msg
1222 |             
1223 |         # Prepare command entry for history
1224 |         timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
1225 |         command_entry = f"[{timestamp}] do '{file_path}'"
1226 |         
1227 |         # Create initial result to update the user
1228 |         initial_result = f">>> {command_entry}\nExecuting Stata do file with timeout: {MAX_TIMEOUT} seconds ({MAX_TIMEOUT/60:.1f} minutes)...\n"
1229 |         
1230 |         # Need to define result variable here so it's accessible in all code paths
1231 |         result = initial_result
1232 |         
1233 |         # Create a properly escaped file path for Stata
1234 |         if platform.system() == "Windows":
1235 |             # On Windows, escape backslashes and quotes
1236 |             stata_path = modified_do_file.replace('"', '\\"')
1237 |             # Ensure the path is properly quoted for Windows
1238 |             do_command = f'do "{stata_path}"'
1239 |         else:
1240 |             # On Unix systems (macOS/Linux), use double quotes for better compatibility
1241 |             # Double quotes work more reliably across systems
1242 |             do_command = f'do "{modified_do_file}"'
1243 |         
1244 |         # Run the command in background with timeout
1245 |         try:
1246 |             # Execute the Stata command
1247 |             logging.info(f"Running modified do file: {do_command}")
1248 |             
1249 |             # Set up for PyStata execution
1250 |             if has_stata and stata_available:
1251 |                 # Enable graph listing for this do file execution using low-level API
1252 |                 try:
1253 |                     from pystata.config import stlib, get_encode_str
1254 |                     stlib.StataSO_Execute(get_encode_str("qui _gr_list on"), False)
1255 |                     logging.debug("Enabled graph listing for do file")
1256 |                 except Exception as e:
1257 |                     logging.warning(f"Could not enable graph listing: {str(e)}")
1258 | 
1259 |                 # Record start time for timeout tracking
1260 |                 start_time = time.time()
1261 |                 last_update_time = start_time
1262 |                 update_interval = 60  # Update every 60 seconds (1 minute) initially
1263 |                 
1264 |                 # Initialize log tracking
1265 |                 log_file_exists = False
1266 |                 last_log_size = 0
1267 |                 last_reported_lines = 0
1268 |                 
1269 |                 # Execute command via PyStata in separate thread to allow polling
1270 |                 stata_thread = None
1271 |                 stata_error = None
1272 |                 
1273 |                 def run_stata_thread():
1274 |                     try:
1275 |                         # Make sure to properly quote the path - this is the key fix
1276 |                         # Use inline=False because inline=True calls _gr_list off!
1277 |                         if platform.system() == "Windows":
1278 |                             # Make sure Windows paths are properly escaped
1279 |                             globals()['stata'].run(do_command, echo=False, inline=False)
1280 |                         else:
1281 |                             # On macOS/Linux, double-check the quoting - adding extra safety
1282 |                             if not (do_command.startswith('do "') or do_command.startswith("do '")):
1283 |                                 do_command_fixed = f'do "{stata_path}"'
1284 |                                 globals()['stata'].run(do_command_fixed, echo=False, inline=False)
1285 |                             else:
1286 |                                 globals()['stata'].run(do_command, echo=False, inline=False)
1287 |                     except Exception as e:
1288 |                         nonlocal stata_error
1289 |                         stata_error = str(e)
1290 |                 
1291 |                 import threading
1292 |                 stata_thread = threading.Thread(target=run_stata_thread)
1293 |                 stata_thread.daemon = True
1294 |                 stata_thread.start()
1295 |                 
1296 |                 # Poll for progress while command is running
1297 |                 while stata_thread.is_alive():
1298 |                     # Check for timeout
1299 |                     current_time = time.time()
1300 |                     elapsed_time = current_time - start_time
1301 |                     
1302 |                     if elapsed_time > MAX_TIMEOUT:
1303 |                         logging.warning(f"Execution timed out after {MAX_TIMEOUT} seconds")
1304 |                         result += f"\n*** TIMEOUT: Execution exceeded {MAX_TIMEOUT} seconds ({MAX_TIMEOUT/60:.1f} minutes) ***\n"
1305 |                         
1306 |                         # Force terminate Stata operation with increasing severity
1307 |                         termination_successful = False
1308 | 
1309 |                         try:
1310 |                             # ATTEMPT 1: Send Stata break command
1311 |                             logging.warning(f"TIMEOUT - Attempt 1: Sending Stata break command")
1312 |                             try:
1313 |                                 globals()['stata'].run("break", echo=False)
1314 |                                 time.sleep(0.5)  # Give it a moment
1315 |                                 if not stata_thread.is_alive():
1316 |                                     termination_successful = True
1317 |                                     logging.warning("Thread terminated via Stata break command")
1318 |                             except Exception as e:
1319 |                                 logging.warning(f"Stata break command failed: {str(e)}")
1320 | 
1321 |                             # ATTEMPT 2: Try to forcibly raise an exception in the thread
1322 |                             if not termination_successful and hasattr(stata_thread, "_stop"):
1323 |                                 logging.warning(f"TIMEOUT - Attempt 2: Forcing thread stop")
1324 |                                 try:
1325 |                                     # This is a more aggressive approach
1326 |                                     # The _stop method is not officially supported but often works
1327 |                                     stata_thread._stop()
1328 |                                     time.sleep(0.5)  # Give it a moment
1329 |                                     if not stata_thread.is_alive():
1330 |                                         termination_successful = True
1331 |                                         logging.warning("Thread terminated via thread._stop")
1332 |                                 except Exception as e:
1333 |                                     logging.warning(f"Thread stop failed: {str(e)}")
1334 | 
1335 |                             # ATTEMPT 3: Try to find and kill the Stata process (last resort)
1336 |                             if not termination_successful:
1337 |                                 logging.warning(f"TIMEOUT - Attempt 3: Looking for Stata process to terminate")
1338 |                                 try:
1339 |                                     # Find any Stata processes
1340 |                                     if platform.system() == "Windows":
1341 |                                         # Windows approach
1342 |                                         subprocess.run(["taskkill", "/F", "/IM", "stata*.exe"], 
1343 |                                                       stdout=subprocess.DEVNULL, 
1344 |                                                       stderr=subprocess.DEVNULL)
1345 |                                     else:
1346 |                                         # macOS/Linux approach
1347 |                                         subprocess.run(["pkill", "-f", "stata"], 
1348 |                                                       stdout=subprocess.DEVNULL, 
1349 |                                                       stderr=subprocess.DEVNULL)
1350 |                                     
1351 |                                     logging.warning("Sent kill signal to Stata processes")
1352 |                                 except Exception as e:
1353 |                                     logging.error(f"Process kill failed: {str(e)}")
1354 |                         except Exception as term_error:
1355 |                             logging.error(f"Error during forced termination: {str(term_error)}")
1356 |                         
1357 |                         # Set a flag indicating timeout regardless of termination success
1358 |                         stata_error = f"Operation timed out after {MAX_TIMEOUT} seconds"
1359 |                         logging.warning(f"Setting timeout error: {stata_error}")
1360 |                         break
1361 |                     
1362 |                     # Check if it's time for an update
1363 |                     if current_time - last_update_time >= update_interval:
1364 |                         # IMPORTANT: Log progress frequently to keep SSE connection alive for long-running scripts
1365 |                         logging.info(f"⏱️  Execution in progress: {elapsed_time:.0f}s elapsed ({elapsed_time/60:.1f} minutes) of {MAX_TIMEOUT}s timeout")
1366 | 
1367 |                         # Check if log file exists and has been updated
1368 |                         if os.path.exists(custom_log_file):
1369 |                             log_file_exists = True
1370 | 
1371 |                             # Check log file size
1372 |                             current_log_size = os.path.getsize(custom_log_file)
1373 | 
1374 |                             # If log has grown, report progress
1375 |                             if current_log_size > last_log_size:
1376 |                                 try:
1377 |                                     with open(custom_log_file, 'r', encoding='utf-8', errors='replace') as log:
1378 |                                         log_content = log.read()
1379 |                                         lines = log_content.splitlines()
1380 | 
1381 |                                         # Report only new lines since last update
1382 |                                         if last_reported_lines < len(lines):
1383 |                                             new_lines = lines[last_reported_lines:]
1384 | 
1385 |                                             # Only report meaningful lines (skip empty lines and headers)
1386 |                                             meaningful_lines = [line for line in new_lines if line.strip() and not line.startswith('-')]
1387 | 
1388 |                                             # If we have meaningful content, add it to result
1389 |                                             if meaningful_lines:
1390 |                                                 progress_update = f"\n*** Progress update ({elapsed_time:.0f} seconds) ***\n"
1391 |                                                 progress_update += "\n".join(meaningful_lines[-10:])  # Show last 10 lines
1392 |                                                 result += progress_update
1393 |                                                 # Also log the progress for SSE keep-alive
1394 |                                                 logging.info(f"📊 Progress: Log file grew to {current_log_size} bytes, {len(meaningful_lines)} new meaningful lines")
1395 | 
1396 |                                             last_reported_lines = len(lines)
1397 |                                 except Exception as e:
1398 |                                     logging.warning(f"Error reading log for progress update: {str(e)}")
1399 | 
1400 |                             last_log_size = current_log_size
1401 | 
1402 |                         last_update_time = current_time
1403 |                         
1404 |                         # Adaptive polling - keep interval at 60 seconds to maintain SSE connection
1405 |                         # This ensures we send at least one log message every 60 seconds (1 minute) to keep the connection alive
1406 |                         if elapsed_time > 600:  # After 10 minutes
1407 |                             update_interval = 60  # Check every 60 seconds (1 minute)
1408 |                         elif elapsed_time > 300:  # After 5 minutes
1409 |                             update_interval = 60  # Check every 60 seconds (1 minute)
1410 |                         elif elapsed_time > 60:  # After 1 minute
1411 |                             update_interval = 60  # Check every 60 seconds (1 minute)
1412 |                     
1413 |                     # Sleep briefly to avoid consuming too much CPU
1414 |                     time.sleep(0.5)
1415 |                 
1416 |                 # Thread completed or timed out
1417 |                 if stata_error:
1418 |                     error_msg = f"Error executing Stata command: {stata_error}"
1419 |                     logging.error(error_msg)
1420 |                     result += f"\n*** ERROR: {stata_error} ***\n"
1421 |                     
1422 |                     # Add command to history and return
1423 |                     command_history.append({"command": command_entry, "result": result})
1424 |                     return result
1425 |                 
1426 |                 # Read final log output
1427 |                 if os.path.exists(custom_log_file):
1428 |                     try:
1429 |                         with open(custom_log_file, 'r', encoding='utf-8', errors='replace') as log:
1430 |                             log_content = log.read()
1431 |                             
1432 |                             # Clean up log content - remove headers and Stata startup info
1433 |                             lines = log_content.splitlines()
1434 |                             result_lines = []
1435 |                             
1436 |                             # Skip Stata header if present (search for the separator line)
1437 |                             start_index = 0
1438 |                             for i, line in enumerate(lines):
1439 |                                 if '-------------' in line and i < 20:  # Look in first 20 lines
1440 |                                     start_index = i + 1
1441 |                                     break
1442 |                             
1443 |                             # Process the content
1444 |                             for i in range(start_index, len(lines)):
1445 |                                 # Ensure line is a string (defensive programming)
1446 |                                 line = str(lines[i]) if lines[i] is not None else ""
1447 |                                 line = line.rstrip()
1448 | 
1449 |                                 # Skip empty lines at beginning or redundant empty lines
1450 |                                 if not line.strip() and (not result_lines or not result_lines[-1].strip()):
1451 |                                     continue
1452 | 
1453 |                                 # Clean up SMCL formatting if present
1454 |                                 if '{' in line:
1455 |                                     line = re.sub(r'\{[^}]*\}', '', line)  # Remove {...} codes
1456 |                                     
1457 |                                 result_lines.append(line)
1458 |                             
1459 |                             # Add completion message with final log content
1460 |                             completion_msg = f"\n*** Execution completed in {time.time() - start_time:.1f} seconds ***\n"
1461 |                             completion_msg += "Final output:\n"
1462 |                             completion_msg += "\n".join(result_lines)
1463 | 
1464 |                             # Replace the result with a clean summary
1465 |                             result = f">>> {command_entry}\n{completion_msg}"
1466 | 
1467 |                             # Only detect and export graphs if called from VS Code extension (not from LLM/MCP)
1468 |                             if auto_name_graphs:
1469 |                                 # Detect and export any graphs created by the do file
1470 |                                 # Using interactive mode which should work because inline=True keeps graphs in memory
1471 |                                 try:
1472 |                                     logging.debug("Attempting to detect graphs from do file (interactive mode)...")
1473 |                                     graphs = display_graphs_interactive(graph_format='png', width=800, height=600)
1474 |                                     logging.debug(f"Graph detection returned: {graphs}")
1475 |                                     if graphs:
1476 |                                         graph_info = "\n\n" + "="*60 + "\n"
1477 |                                         graph_info += f"GRAPHS DETECTED: {len(graphs)} graph(s) created\n"
1478 |                                         graph_info += "="*60 + "\n"
1479 |                                         for graph in graphs:
1480 |                                             # Include command if available, using special format for JavaScript parsing
1481 |                                             if 'command' in graph and graph['command']:
1482 |                                                 graph_info += f"  • {graph['name']}: {graph['path']} [CMD: {graph['command']}]\n"
1483 |                                             else:
1484 |                                                 graph_info += f"  • {graph['name']}: {graph['path']}\n"
1485 |                                         result += graph_info
1486 |                                         logging.info(f"Detected {len(graphs)} graphs from do file: {[g['name'] for g in graphs]}")
1487 |                                     else:
1488 |                                         logging.debug("No graphs detected from do file")
1489 |                                 except Exception as e:
1490 |                                     logging.warning(f"Error detecting graphs: {str(e)}")
1491 |                                     logging.debug(f"Graph detection error details: {traceback.format_exc()}")
1492 | 
1493 |                             # Log the final file location
1494 |                             result += f"\n\nLog file saved to: {custom_log_file}"
1495 |                     except Exception as e:
1496 |                         logging.error(f"Error reading final log: {str(e)}")
1497 |                         result += f"\n*** WARNING: Error reading final log: {str(e)} ***\n"
1498 |                 else:
1499 |                     logging.warning(f"Log file not found after execution: {custom_log_file}")
1500 |                     result += f"\n*** WARNING: Log file not found after execution ***\n"
1501 |                     
1502 |                     # Try to get a status update from Stata
1503 |                     try:
1504 |                         status = run_stata_command("display _rc", clear_history=False)
1505 |                         result += f"\nStata return code: {status}\n"
1506 |                     except Exception as e:
1507 |                         pass
1508 |             else:
1509 |                 # Stata not available
1510 |                 error_msg = "Stata is not available. Please check if Stata is installed and configured correctly."
1511 |                 logging.error(error_msg)
1512 |                 result = f">>> {command_entry}\n{error_msg}"
1513 |         except Exception as e:
1514 |             error_msg = f"Error running do file: {str(e)}"
1515 |             logging.error(error_msg)
1516 |             result = f">>> {command_entry}\n{error_msg}"
1517 |         
1518 |         # Add to command history and return result
1519 |         command_history.append({"command": command_entry, "result": result})
1520 |         return result
1521 |         
1522 |     except Exception as e:
1523 |         error_msg = f"Error in run_stata_file: {str(e)}"
1524 |         logging.error(error_msg)
1525 |         return error_msg
1526 | 
1527 | # Function to kill any process using the specified port
1528 | def kill_process_on_port(port):
1529 |     """Kill any process that is currently using the specified port"""
1530 |     try:
1531 |         if platform.system() == "Windows":
1532 |             # Windows command to find and kill process on port
1533 |             find_cmd = f"netstat -ano | findstr :{port}"
1534 |             try:
1535 |                 result = subprocess.check_output(find_cmd, shell=True).decode()
1536 |                 
1537 |                 if result:
1538 |                     # Extract PID from the result
1539 |                     for line in result.strip().split('\n'):
1540 |                         if f":{port}" in line and "LISTENING" in line:
1541 |                             pid = line.strip().split()[-1]
1542 |                             logging.info(f"Found process with PID {pid} using port {port}")
1543 |                             
1544 |                             # Kill the process
1545 |                             kill_cmd = f"taskkill /F /PID {pid}"
1546 |                             subprocess.check_output(kill_cmd, shell=True)
1547 |                             logging.info(f"Killed process with PID {pid}")
1548 |                             break
1549 |                 else:
1550 |                     logging.info(f"No process found using port {port}")
1551 |             except subprocess.CalledProcessError:
1552 |                 # No process found using the port (findstr returns 1 when no matches found)
1553 |                 logging.info(f"No process found using port {port}")
1554 |         else:
1555 |             # macOS/Linux command to find and kill process on port
1556 |             try:
1557 |                 # Find the process IDs using the port
1558 |                 find_cmd = f"lsof -i :{port} -t"
1559 |                 result = subprocess.check_output(find_cmd, shell=True).decode().strip()
1560 |                 
1561 |                 if result:
1562 |                     # Handle multiple PIDs (one per line)
1563 |                     pids = result.split('\n')
1564 |                     for pid in pids:
1565 |                         pid = pid.strip()
1566 |                         if pid:
1567 |                             logging.info(f"Found process with PID {pid} using port {port}")
1568 |                             
1569 |                             # Kill the process
1570 |                             try:
1571 |                                 os.kill(int(pid), signal.SIGKILL)  # Use SIGKILL for more forceful termination
1572 |                                 logging.info(f"Killed process with PID {pid}")
1573 |                             except Exception as kill_error:
1574 |                                 logging.warning(f"Error killing process with PID {pid}: {str(kill_error)}")
1575 |                     
1576 |                     # Wait a moment to ensure the port is released
1577 |                     time.sleep(1)
1578 |                 else:
1579 |                     logging.info(f"No process found using port {port}")
1580 |             except subprocess.CalledProcessError:
1581 |                 # No process found using the port
1582 |                 logging.info(f"No process found using port {port}")
1583 |                 
1584 |     except Exception as e:
1585 |         logging.warning(f"Error killing process on port {port}: {str(e)}")
1586 |     
1587 |     # Double-check if port is still in use
1588 |     try:
1589 |         with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
1590 |             s.settimeout(1)
1591 |             result = s.connect_ex(('localhost', port))
1592 |             if result == 0:
1593 |                 logging.warning(f"Port {port} is still in use after attempting to kill processes")
1594 |                 logging.warning(f"Please manually kill any processes using port {port} or use a different port")
1595 |             else:
1596 |                 logging.info(f"Port {port} is now available")
1597 |     except Exception as socket_error:
1598 |         logging.warning(f"Error checking port availability: {str(socket_error)}")
1599 | 
1600 | # Function to find an available port
1601 | def find_available_port(start_port, max_attempts=10):
1602 |     """Find an available port starting from start_port"""
1603 |     for port_offset in range(max_attempts):
1604 |         port = start_port + port_offset
1605 |         try:
1606 |             with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
1607 |                 s.settimeout(1)
1608 |                 result = s.connect_ex(('localhost', port))
1609 |                 if result != 0:  # Port is available
1610 |                     logging.info(f"Found available port: {port}")
1611 |                     return port
1612 |         except Exception as e:
1613 |             logging.warning(f"Error checking port {port}: {str(e)}")
1614 |     
1615 |     # If we get here, we couldn't find an available port
1616 |     logging.warning(f"Could not find an available port after {max_attempts} attempts")
1617 |     return None
1618 | 
1619 | # Parameter models for the MCP tools
1620 | class RunSelectionParams(BaseModel):
1621 |     selection: str = Field(..., description="The Stata code to execute")
1622 | 
1623 | class RunFileParams(BaseModel):
1624 |     file_path: str = Field(..., description="The full path to the .do file")
1625 |     timeout: int = Field(600, description="Timeout in seconds (default: 600 seconds / 10 minutes)")
1626 | 
1627 | # Define Legacy VS Code Extension Support
1628 | class ToolRequest(BaseModel):
1629 |     tool: str
1630 |     parameters: Dict[str, Any]
1631 | 
1632 | class ToolResponse(BaseModel):
1633 |     status: str
1634 |     result: Optional[str] = None
1635 |     message: Optional[str] = None
1636 | 
1637 | # Define lifespan context manager for startup/shutdown events
1638 | @asynccontextmanager
1639 | async def lifespan(app: FastAPI):
1640 |     """Handle application lifespan events"""
1641 |     # Startup: Log startup
1642 |     logging.info("FastAPI application starting up")
1643 | 
1644 |     # Start HTTP session manager if it exists
1645 |     if hasattr(app.state, '_http_session_manager_starter'):
1646 |         logging.debug("Calling HTTP session manager startup handler")
1647 |         await app.state._http_session_manager_starter()
1648 | 
1649 |     yield  # Application runs
1650 | 
1651 |     # Shutdown: Stop HTTP session manager if it exists
1652 |     if hasattr(app.state, '_http_session_manager_stopper'):
1653 |         logging.debug("Calling HTTP session manager shutdown handler")
1654 |         await app.state._http_session_manager_stopper()
1655 | 
1656 |     # Cleanup if needed
1657 |     logging.info("FastAPI application shutting down")
1658 | 
1659 | # Create the FastAPI app with lifespan handler
1660 | app = FastAPI(
1661 |     title=SERVER_NAME,
1662 |     version=SERVER_VERSION,
1663 |     description="Stata MCP Server - Exposes Stata functionality to AI models via MCP protocol",
1664 |     lifespan=lifespan
1665 | )
1666 | 
1667 | # Define regular FastAPI routes for Stata functions
1668 | @app.post("/run_selection", operation_id="stata_run_selection", response_class=Response)
1669 | async def stata_run_selection_endpoint(selection: str) -> Response:
1670 |     """Run selected Stata code and return the output"""
1671 |     logging.info(f"Running selection: {selection}")
1672 |     result = run_stata_selection(selection)
1673 |     # Format output for better display - replace escaped newlines with actual newlines
1674 |     formatted_result = result.replace("\\n", "\n")
1675 |     return Response(content=formatted_result, media_type="text/plain")
1676 | 
1677 | async def stata_run_file_stream(file_path: str, timeout: int = 600):
1678 |     """Async generator that runs Stata file and yields SSE progress events
1679 | 
1680 |     Args:
1681 |         file_path: Path to the .do file
1682 |         timeout: Timeout in seconds
1683 | 
1684 |     Yields:
1685 |         SSE formatted events with progress updates
1686 |     """
1687 |     import threading
1688 |     import queue
1689 | 
1690 |     # Queue to communicate between threads
1691 |     progress_queue = queue.Queue()
1692 |     result_queue = queue.Queue()
1693 | 
1694 |     def run_with_progress():
1695 |         """Run Stata file in thread, sending progress to queue"""
1696 |         try:
1697 |             # Run the file and collect result
1698 |             result = run_stata_file(file_path, timeout=timeout)
1699 |             result_queue.put(('success', result))
1700 |         except Exception as e:
1701 |             result_queue.put(('error', str(e)))
1702 | 
1703 |     # Start execution thread
1704 |     thread = threading.Thread(target=run_with_progress, daemon=True)
1705 |     thread.start()
1706 | 
1707 |     # Yield initial event
1708 |     yield f"data: Starting execution of {os.path.basename(file_path)}...\n\n"
1709 | 
1710 |     start_time = time.time()
1711 |     last_check = start_time
1712 |     check_interval = 2.0  # Check every 2 seconds for responsive streaming
1713 | 
1714 |     # Monitor progress
1715 |     while thread.is_alive():
1716 |         current_time = time.time()
1717 |         elapsed = current_time - start_time
1718 | 
1719 |         # Check if it's time for an update
1720 |         if current_time - last_check >= check_interval:
1721 |             # Yield progress event
1722 |             yield f"data: Executing... {elapsed:.1f}s elapsed\n\n"
1723 |             last_check = current_time
1724 | 
1725 |         # Sleep briefly to avoid busy waiting
1726 |         await asyncio.sleep(0.1)
1727 | 
1728 |         # Check if execution exceeded timeout
1729 |         if elapsed > timeout:
1730 |             yield f"data: ERROR: Execution timed out after {timeout}s\n\n"
1731 |             break
1732 | 
1733 |     # Get final result
1734 |     try:
1735 |         status, result = result_queue.get(timeout=1.0)
1736 |         if status == 'error':
1737 |             yield f"data: ERROR: {result}\n\n"
1738 |         else:
1739 |             # Format and send final output
1740 |             formatted_result = result.replace("\\n", "\n")
1741 |             # Split into chunks to avoid overwhelming SSE
1742 |             lines = formatted_result.split('\n')
1743 |             for i in range(0, len(lines), 10):
1744 |                 chunk = '\n'.join(lines[i:i+10])
1745 |                 # Escape newlines in SSE data field
1746 |                 escaped_chunk = chunk.replace('\n', '\\n')
1747 |                 yield f"data: {escaped_chunk}\n\n"
1748 |                 await asyncio.sleep(0.05)  # Small delay between chunks
1749 | 
1750 |             yield "data: *** Execution completed ***\n\n"
1751 |     except queue.Empty:
1752 |         yield "data: ERROR: Failed to get execution result\n\n"
1753 | 
1754 | @app.get("/run_file", operation_id="stata_run_file", response_class=Response)
1755 | async def stata_run_file_endpoint(
1756 |     file_path: str,
1757 |     timeout: int = 600
1758 | ) -> Response:
1759 |     """Run a Stata .do file and return the output (MCP-compatible endpoint)
1760 | 
1761 |     Args:
1762 |         file_path: Path to the .do file
1763 |         timeout: Timeout in seconds (default: 600 seconds / 10 minutes)
1764 | 
1765 |     Returns:
1766 |         Response with plain text output
1767 |     """
1768 |     # Ensure timeout is a valid integer
1769 |     try:
1770 |         timeout = int(timeout)
1771 |         if timeout <= 0:
1772 |             logging.warning(f"Invalid timeout value: {timeout}, using default 600")
1773 |             timeout = 600
1774 |     except (ValueError, TypeError):
1775 |         logging.warning(f"Non-integer timeout value: {timeout}, using default 600")
1776 |         timeout = 600
1777 | 
1778 |     logging.info(f"Running file: {file_path} with timeout {timeout} seconds ({timeout/60:.1f} minutes)")
1779 |     result = await asyncio.to_thread(run_stata_file, file_path, timeout=timeout)
1780 | 
1781 |     # Format output for better display - replace escaped newlines with actual newlines
1782 |     formatted_result = result.replace("\\n", "\n")
1783 | 
1784 |     # Log the output (truncated) for debugging
1785 |     logging.debug(f"Run file output (first 100 chars): {formatted_result[:100]}...")
1786 | 
1787 |     return Response(content=formatted_result, media_type="text/plain")
1788 | 
1789 | @app.get("/run_file/stream")
1790 | async def stata_run_file_stream_endpoint(
1791 |     file_path: str,
1792 |     timeout: int = 600
1793 | ):
1794 |     """Run a Stata .do file and stream the output via Server-Sent Events (SSE)
1795 | 
1796 |     This is a separate endpoint for HTTP clients that want real-time streaming updates.
1797 |     For MCP clients, use the regular /run_file endpoint.
1798 | 
1799 |     Args:
1800 |         file_path: Path to the .do file
1801 |         timeout: Timeout in seconds (default: 600 seconds / 10 minutes)
1802 | 
1803 |     Returns:
1804 |         StreamingResponse with text/event-stream content type
1805 |     """
1806 |     # Ensure timeout is a valid integer
1807 |     try:
1808 |         timeout = int(timeout)
1809 |         if timeout <= 0:
1810 |             logging.warning(f"Invalid timeout value: {timeout}, using default 600")
1811 |             timeout = 600
1812 |     except (ValueError, TypeError):
1813 |         logging.warning(f"Non-integer timeout value: {timeout}, using default 600")
1814 |         timeout = 600
1815 | 
1816 |     logging.info(f"[STREAM] Running file: {file_path} with timeout {timeout} seconds ({timeout/60:.1f} minutes)")
1817 | 
1818 |     return StreamingResponse(
1819 |         stata_run_file_stream(file_path, timeout),
1820 |         media_type="text/event-stream",
1821 |         headers={
1822 |             "Cache-Control": "no-cache",
1823 |             "Connection": "keep-alive",
1824 |             "X-Accel-Buffering": "no",  # Disable nginx buffering
1825 |         }
1826 |     )
1827 | 
1828 | # MCP server will be initialized in main() after args are parsed
1829 | 
1830 | # Add FastAPI endpoint for legacy VS Code extension
1831 | @app.post("/v1/tools", include_in_schema=False)
1832 | async def call_tool(request: ToolRequest) -> ToolResponse:
1833 |     try:
1834 |         # Map VS Code extension tool names to MCP tool names
1835 |         tool_name_map = {
1836 |             "run_selection": "stata_run_selection", 
1837 |             "run_file": "stata_run_file"
1838 |         }
1839 |         
1840 |         # Get the actual tool name
1841 |         mcp_tool_name = tool_name_map.get(request.tool, request.tool)
1842 |         
1843 |         # Log the request
1844 |         logging.info(f"REST API request for tool: {request.tool} -> {mcp_tool_name}")
1845 |         
1846 |         # Check if the tool exists
1847 |         if mcp_tool_name not in ["stata_run_selection", "stata_run_file"]:
1848 |             return ToolResponse(
1849 |                 status="error",
1850 |                 message=f"Unknown tool: {request.tool}"
1851 |             )
1852 |         
1853 |         # Execute the appropriate function
1854 |         if mcp_tool_name == "stata_run_selection":
1855 |             if "selection" not in request.parameters:
1856 |                 return ToolResponse(
1857 |                     status="error",
1858 |                     message="Missing required parameter: selection"
1859 |                 )
1860 |             # Get optional working_dir parameter
1861 |             working_dir = request.parameters.get("working_dir", None)
1862 |             # Enable auto_detect_graphs for VS Code extension calls
1863 |             result = run_stata_selection(request.parameters["selection"], working_dir=working_dir, auto_detect_graphs=True)
1864 |             # Format output for better display
1865 |             result = result.replace("\\n", "\n")
1866 |             
1867 |         elif mcp_tool_name == "stata_run_file":
1868 |             if "file_path" not in request.parameters:
1869 |                 return ToolResponse(
1870 |                     status="error",
1871 |                     message="Missing required parameter: file_path"
1872 |                 )
1873 |             
1874 |             # Get the file path from the parameters
1875 |             file_path = request.parameters["file_path"]
1876 |             
1877 |             # Get timeout parameter if provided, otherwise use default (10 minutes)
1878 |             timeout = request.parameters.get("timeout", 600)
1879 |             try:
1880 |                 timeout = int(timeout)  # Ensure it's an integer
1881 |                 if timeout <= 0:
1882 |                     logging.warning(f"Invalid timeout value: {timeout}, using default 600")
1883 |                     timeout = 600
1884 |             except (ValueError, TypeError):
1885 |                 logging.warning(f"Non-integer timeout value: {timeout}, using default 600")
1886 |                 timeout = 600
1887 |                 
1888 |             logging.info(f"MCP run_file request for: {file_path} with timeout {timeout} seconds ({timeout/60:.1f} minutes)")
1889 |             
1890 |             # Normalize the path for cross-platform compatibility
1891 |             file_path = os.path.normpath(file_path)
1892 |             
1893 |             # On Windows, convert forward slashes to backslashes if needed
1894 |             if platform.system() == "Windows" and '/' in file_path:
1895 |                 file_path = file_path.replace('/', '\\')
1896 |             
1897 |             # Run the file through the run_stata_file function with timeout
1898 |             # Enable auto_name_graphs for VS Code extension calls
1899 |             result = run_stata_file(file_path, timeout=timeout, auto_name_graphs=True)
1900 |             
1901 |             # Format output for better display
1902 |             result = result.replace("\\n", "\n")
1903 |             
1904 |             # Log the output length for debugging
1905 |             logging.debug(f"MCP run_file output length: {len(result)}")
1906 |             
1907 |             # If no output was captured, log a warning
1908 |             if "Command executed but" in result and "output not captured" in result:
1909 |                 logging.warning(f"No output captured for file: {file_path}")
1910 |                 
1911 |             # If file not found error, make the message more helpful
1912 |             if "File not found" in result:
1913 |                 # Add help text explaining common issues with Windows paths
1914 |                 if platform.system() == "Windows":
1915 |                     result += "\n\nCommon Windows path issues:\n"
1916 |                     result += "1. Make sure the file path uses correct separators (use \\ instead of /)\n"
1917 |                     result += "2. Check if the file exists in the specified location\n"
1918 |                     result += "3. If using relative paths, the current working directory is: " + os.getcwd()
1919 |         
1920 |         # Return successful response
1921 |         return ToolResponse(
1922 |             status="success",
1923 |             result=result
1924 |         )
1925 |         
1926 |     except Exception as e:
1927 |         logging.error(f"Error handling tool request: {str(e)}")
1928 |         return ToolResponse(
1929 |             status="error",
1930 |             message=f"Server error: {str(e)}"
1931 |         )
1932 | 
1933 | # Simplified health check endpoint - only report server status without executing Stata commands
1934 | @app.get("/health", include_in_schema=False)
1935 | async def health_check():
1936 |     return {
1937 |         "status": "ok",
1938 |         "service": SERVER_NAME,
1939 |         "version": SERVER_VERSION,
1940 |         "stata_available": stata_available
1941 |     }
1942 | 
1943 | # Endpoint to serve graph images
1944 | # Hidden from OpenAPI schema so it won't be exposed to LLMs via MCP
1945 | @app.get("/graphs/{graph_name}", include_in_schema=False)
1946 | async def get_graph(graph_name: str):
1947 |     """Serve a graph image file"""
1948 |     try:
1949 |         # Construct the path to the graph file
1950 |         if extension_path:
1951 |             graphs_dir = os.path.join(extension_path, 'graphs')
1952 |         else:
1953 |             graphs_dir = os.path.join(tempfile.gettempdir(), 'stata_mcp_graphs')
1954 | 
1955 |         # Support both with and without .png extension
1956 |         if not graph_name.endswith('.png'):
1957 |             graph_name = f"{graph_name}.png"
1958 | 
1959 |         graph_path = os.path.join(graphs_dir, graph_name)
1960 | 
1961 |         # Check if file exists
1962 |         if not os.path.exists(graph_path):
1963 |             return Response(
1964 |                 content=f"Graph not found: {graph_name}",
1965 |                 status_code=404,
1966 |                 media_type="text/plain"
1967 |             )
1968 | 
1969 |         # Read and return the image file
1970 |         with open(graph_path, 'rb') as f:
1971 |             image_data = f.read()
1972 | 
1973 |         return Response(content=image_data, media_type="image/png")
1974 | 
1975 |     except Exception as e:
1976 |         logging.error(f"Error serving graph {graph_name}: {str(e)}")
1977 |         return Response(
1978 |             content=f"Error serving graph: {str(e)}",
1979 |             status_code=500
1980 |         )
1981 | 
1982 | @app.post("/clear_history", include_in_schema=False)
1983 | async def clear_history_endpoint():
1984 |     """Clear the command history"""
1985 |     global command_history
1986 |     try:
1987 |         count = len(command_history)
1988 |         command_history = []
1989 |         logging.info(f"Cleared command history ({count} items)")
1990 |         return {"status": "success", "message": f"Cleared {count} items from history"}
1991 |     except Exception as e:
1992 |         logging.error(f"Error clearing history: {str(e)}")
1993 |         return {"status": "error", "message": str(e)}
1994 | 
1995 | @app.get("/view_data", include_in_schema=False)
1996 | async def view_data_endpoint(if_condition: str = None):
1997 |     """Get current Stata data as a pandas DataFrame and return as JSON
1998 | 
1999 |     Args:
2000 |         if_condition: Optional Stata if condition (e.g., "price > 5000 & mpg < 30")
2001 |     """
2002 |     global stata_available, stata
2003 | 
2004 |     try:
2005 |         if not stata_available or stata is None:
2006 |             logging.error("Stata is not available")
2007 |             return Response(
2008 |                 content=json.dumps({
2009 |                     "status": "error",
2010 |                     "message": "Stata is not initialized"
2011 |                 }),
2012 |                 media_type="application/json",
2013 |                 status_code=500
2014 |             )
2015 | 
2016 |         # Apply if condition if provided
2017 |         if if_condition:
2018 |             logging.info(f"Applying filter: if {if_condition}")
2019 |             try:
2020 |                 # Get full data first
2021 |                 df = stata.pdataframe_from_data()
2022 | 
2023 |                 if df is None or df.empty:
2024 |                     raise Exception("No data currently loaded in Stata")
2025 | 
2026 |                 # Use Stata to create a filter marker variable
2027 |                 try:
2028 |                     import sfi
2029 | 
2030 |                     # First, check if variable already exists and drop it
2031 |                     try:
2032 |                         stata.run("capture drop _filter_marker", inline=False, echo=False)
2033 |                     except:
2034 |                         pass
2035 | 
2036 |                     # Generate marker for rows that match the condition
2037 |                     gen_cmd = f"quietly generate byte _filter_marker = ({if_condition})"
2038 |                     logging.debug(f"Running filter command: {gen_cmd}")
2039 | 
2040 |                     try:
2041 |                         stata.run(gen_cmd, inline=False, echo=False)
2042 |                         logging.debug(f"Generate command executed successfully")
2043 |                     except SystemError as se:
2044 |                         logging.error(f"SystemError in generate command: {str(se)}")
2045 |                         raise Exception(f"Invalid condition syntax: {if_condition}")
2046 |                     except Exception as e:
2047 |                         logging.error(f"Exception in generate command: {type(e).__name__}: {str(e)}")
2048 |                         raise Exception(f"Error creating filter: {str(e)}")
2049 | 
2050 |                     # Get the marker variable values using SFI
2051 |                     n_obs = sfi.Data.getObsTotal()
2052 |                     logging.debug(f"Total observations: {n_obs}")
2053 | 
2054 |                     # Get the variable index for _filter_marker
2055 |                     var_index = sfi.Data.getVarIndex('_filter_marker')
2056 |                     logging.debug(f"Filter marker variable index: {var_index}")
2057 | 
2058 |                     if var_index < 0:
2059 |                         raise Exception("Failed to create filter marker variable")
2060 | 
2061 |                     # Read the filter values for all observations
2062 |                     # NOTE: sfi.Data.get() returns nested lists like [[1]] or [[0]]
2063 |                     # We need to extract the actual value
2064 |                     filter_mask = []
2065 |                     for i in range(n_obs):
2066 |                         val = sfi.Data.get('_filter_marker', i)
2067 |                         # Extract the actual value from nested list structure
2068 |                         if isinstance(val, list) and len(val) > 0:
2069 |                             if isinstance(val[0], list) and len(val[0]) > 0:
2070 |                                 actual_val = val[0][0]
2071 |                             else:
2072 |                                 actual_val = val[0]
2073 |                         else:
2074 |                             actual_val = val
2075 |                         filter_mask.append(actual_val == 1)
2076 | 
2077 |                     # Debug: Log first few values and count
2078 |                     true_count = sum(filter_mask)
2079 |                     if n_obs > 0:
2080 |                         sample_vals = [sfi.Data.get('_filter_marker', i) for i in range(min(5, n_obs))]
2081 |                         logging.debug(f"First 5 marker values (raw): {sample_vals}")
2082 |                     logging.debug(f"Filter mask true count: {true_count} out of {n_obs}")
2083 | 
2084 |                     # Drop the temporary marker
2085 |                     stata.run("quietly drop _filter_marker", inline=False, echo=False)
2086 | 
2087 |                     # Filter the DataFrame using the mask
2088 |                     df = df[filter_mask].reset_index(drop=True)
2089 |                     logging.info(f"Filtered data: {len(df)} rows match condition (out of {n_obs} total)")
2090 | 
2091 |                 except Exception as stata_err:
2092 |                     # Clean up if there's an error
2093 |                     try:
2094 |                         stata.run("capture drop _filter_marker", inline=False, echo=False)
2095 |                     except:
2096 |                         pass
2097 |                     logging.error(f"Filter processing error: {type(stata_err).__name__}: {str(stata_err)}")
2098 |                     raise Exception(f"{str(stata_err)}")
2099 | 
2100 |             except Exception as filter_err:
2101 |                 logging.error(f"Filter error: {str(filter_err)}")
2102 |                 return Response(
2103 |                     content=json.dumps({
2104 |                         "status": "error",
2105 |                         "message": f"Filter error: {str(filter_err)}"
2106 |                     }),
2107 |                     media_type="application/json",
2108 |                     status_code=400
2109 |                 )
2110 |         else:
2111 |             # Get data as pandas DataFrame without filtering
2112 |             logging.info("Getting data from Stata using pdataframe_from_data()")
2113 |             df = stata.pdataframe_from_data()
2114 | 
2115 |         # Check if data is empty
2116 |         if df is None or df.empty:
2117 |             logging.info("No data currently loaded in Stata")
2118 |             return Response(
2119 |                 content=json.dumps({
2120 |                     "status": "success",
2121 |                     "message": "No data currently loaded",
2122 |                     "data": [],
2123 |                     "columns": [],
2124 |                     "rows": 0
2125 |                 }),
2126 |                 media_type="application/json"
2127 |             )
2128 | 
2129 |         # Get data info
2130 |         rows, cols = df.shape
2131 |         logging.info(f"Data retrieved: {rows} observations, {cols} variables")
2132 | 
2133 |         # Convert DataFrame to JSON format
2134 |         # Replace NaN with None for proper JSON serialization
2135 |         df_clean = df.replace({float('nan'): None})
2136 | 
2137 |         # Convert to list of lists for better performance
2138 |         data_values = df_clean.values.tolist()
2139 |         column_names = df_clean.columns.tolist()
2140 | 
2141 |         # Get data types for each column
2142 |         dtypes = {col: str(df[col].dtype) for col in df.columns}
2143 | 
2144 |         return Response(
2145 |             content=json.dumps({
2146 |                 "status": "success",
2147 |                 "data": data_values,
2148 |                 "columns": column_names,
2149 |                 "dtypes": dtypes,
2150 |                 "rows": int(rows),
2151 |                 "index": df.index.tolist()
2152 |             }),
2153 |             media_type="application/json"
2154 |         )
2155 | 
2156 |     except Exception as e:
2157 |         error_msg = f"Error getting data: {str(e)}"
2158 |         logging.error(error_msg)
2159 |         logging.error(traceback.format_exc())
2160 |         return Response(
2161 |             content=json.dumps({
2162 |                 "status": "error",
2163 |                 "message": error_msg
2164 |             }),
2165 |             media_type="application/json",
2166 |             status_code=500
2167 |         )
2168 | 
2169 | @app.get("/interactive", include_in_schema=False)
2170 | async def interactive_window(file: str = None, code: str = None):
2171 |     """Serve the interactive Stata window as a full webpage"""
2172 |     # If a file path or code is provided, we'll auto-execute it on page load
2173 |     auto_run_file = file if file else ""
2174 |     auto_run_code = code if code else ""
2175 | 
2176 |     # Use regular string and insert the file path separately to avoid f-string conflicts
2177 |     html_content = """
2178 | <!DOCTYPE html>
2179 | <html lang="en">
2180 | <head>
2181 |     <meta charset="UTF-8">
2182 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
2183 |     <title>Stata Interactive Window</title>
2184 |     <style>
2185 |         * { margin: 0; padding: 0; box-sizing: border-box; }
2186 |         body {
2187 |             font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
2188 |             background: #1e1e1e;
2189 |             color: #d4d4d4;
2190 |             height: 100vh;
2191 |             display: flex;
2192 |             flex-direction: column;
2193 |         }
2194 |         .main-container {
2195 |             display: flex;
2196 |             flex: 1;
2197 |             overflow: hidden;
2198 |         }
2199 |         .left-panel {
2200 |             flex: 1;
2201 |             display: flex;
2202 |             flex-direction: column;
2203 |             border-right: 1px solid #3e3e42;
2204 |             overflow: hidden;
2205 |         }
2206 |         .output-section {
2207 |             flex: 1;
2208 |             overflow-y: auto;
2209 |             padding: 20px;
2210 |         }
2211 |         .output-cell {
2212 |             border-left: 3px solid #007acc;
2213 |             padding-left: 15px;
2214 |             margin-bottom: 20px;
2215 |             background: #252526;
2216 |             padding: 15px;
2217 |             border-radius: 4px;
2218 |         }
2219 |         .command-line {
2220 |             color: #4fc1ff;
2221 |             font-weight: bold;
2222 |             margin-bottom: 10px;
2223 |             font-family: 'Consolas', 'Monaco', monospace;
2224 |         }
2225 |         .command-output {
2226 |             font-family: 'Consolas', 'Monaco', monospace;
2227 |             white-space: pre-wrap;
2228 |             font-size: 13px;
2229 |             line-height: 1.5;
2230 |         }
2231 |         .input-section {
2232 |             border-top: 1px solid #3e3e42;
2233 |             padding: 20px;
2234 |             background: #252526;
2235 |         }
2236 |         .input-container {
2237 |             display: flex;
2238 |             gap: 10px;
2239 |         }
2240 |         #command-input {
2241 |             flex: 1;
2242 |             background: #3c3c3c;
2243 |             border: 1px solid #6c6c6c;
2244 |             color: #d4d4d4;
2245 |             padding: 12px 15px;
2246 |             font-family: 'Consolas', 'Monaco', monospace;
2247 |             font-size: 14px;
2248 |             border-radius: 4px;
2249 |         }
2250 |         #command-input:focus {
2251 |             outline: none;
2252 |             border-color: #007acc;
2253 |         }
2254 |         #run-button {
2255 |             background: #0e639c;
2256 |             color: white;
2257 |             border: none;
2258 |             padding: 12px 30px;
2259 |             font-weight: 600;
2260 |             cursor: pointer;
2261 |             border-radius: 4px;
2262 |             transition: background 0.2s;
2263 |         }
2264 |         #run-button:hover {
2265 |             background: #1177bb;
2266 |         }
2267 |         #run-button:disabled {
2268 |             background: #555;
2269 |             cursor: not-allowed;
2270 |         }
2271 |         .right-panel {
2272 |             width: 40%;
2273 |             overflow-y: auto;
2274 |             padding: 20px;
2275 |             background: #1e1e1e;
2276 |         }
2277 |         .graphs-title {
2278 |             font-size: 20px;
2279 |             font-weight: 600;
2280 |             margin-bottom: 20px;
2281 |             color: #ffffff;
2282 |         }
2283 |         .graph-card {
2284 |             background: #252526;
2285 |             border: 1px solid #3e3e42;
2286 |             border-radius: 8px;
2287 |             padding: 20px;
2288 |             margin-bottom: 20px;
2289 |         }
2290 |         .graph-card h3 {
2291 |             margin-bottom: 15px;
2292 |             color: #ffffff;
2293 |         }
2294 |         .graph-card img {
2295 |             width: 100%;
2296 |             height: auto;
2297 |             border-radius: 4px;
2298 |         }
2299 |         .error {
2300 |             background: #5a1d1d;
2301 |             border-left: 3px solid #f48771;
2302 |             padding: 15px;
2303 |             border-radius: 4px;
2304 |             margin-bottom: 20px;
2305 |         }
2306 |         .hint {
2307 |             color: #858585;
2308 |             font-size: 12px;
2309 |             margin-top: 8px;
2310 |         }
2311 |         .no-graphs {
2312 |             color: #858585;
2313 |             font-style: italic;
2314 |             text-align: center;
2315 |             padding: 40px;
2316 |         }
2317 |     </style>
2318 | </head>
2319 | <body>
2320 |     <div class="main-container">
2321 |         <div class="left-panel">
2322 |             <div class="output-section" id="output-container"></div>
2323 | 
2324 |             <div class="input-section">
2325 |                 <div class="input-container">
2326 |                     <input type="text" id="command-input"
2327 |                            placeholder="Enter Stata command (e.g., summarize, scatter y x, regress y x)..."
2328 |                            autocomplete="off" />
2329 |                     <button id="run-button">Run</button>
2330 |                 </div>
2331 |                 <div class="hint">Press Enter to execute • Ctrl+L to clear output</div>
2332 |             </div>
2333 |         </div>
2334 | 
2335 |         <div class="right-panel">
2336 |             <div class="graphs-title">Graphs</div>
2337 |             <div id="graphs-container">
2338 |                 <div class="no-graphs">No graphs yet. Run commands to generate graphs.</div>
2339 |             </div>
2340 |         </div>
2341 |     </div>
2342 | 
2343 |     <script>
2344 |         const commandInput = document.getElementById('command-input');
2345 |         const runButton = document.getElementById('run-button');
2346 |         const outputContainer = document.getElementById('output-container');
2347 |         const graphsContainer = document.getElementById('graphs-container');
2348 | 
2349 |         runButton.addEventListener('click', executeCommand);
2350 |         commandInput.addEventListener('keypress', (e) => {
2351 |             if (e.key === 'Enter') executeCommand();
2352 |         });
2353 | 
2354 |         document.addEventListener('keydown', async (e) => {
2355 |             if (e.ctrlKey && e.key === 'l') {
2356 |                 e.preventDefault();
2357 |                 // Clear text output visually
2358 |                 outputContainer.innerHTML = '';
2359 |                 // Clear graphs visually
2360 |                 graphsContainer.innerHTML = '<div class="no-graphs">No graphs yet. Run commands to generate graphs.</div>';
2361 |                 // Clear server-side command history so it doesn't come back
2362 |                 try {
2363 |                     const response = await fetch('/clear_history', {
2364 |                         method: 'POST',
2365 |                         headers: { 'Content-Type': 'application/json' }
2366 |                     });
2367 |                     const data = await response.json();
2368 |                     console.log('History cleared:', data.message);
2369 |                 } catch (err) {
2370 |                     console.error('Error clearing history:', err);
2371 |                 }
2372 |             }
2373 |         });
2374 | 
2375 |         async function executeCommand() {
2376 |             const command = commandInput.value.trim();
2377 |             if (!command) return;
2378 | 
2379 |             runButton.disabled = true;
2380 |             runButton.textContent = 'Running...';
2381 | 
2382 |             try {
2383 |                 const response = await fetch('/v1/tools', {
2384 |                     method: 'POST',
2385 |                     headers: { 'Content-Type': 'application/json' },
2386 |                     body: JSON.stringify({
2387 |                         tool: 'run_selection',
2388 |                         parameters: { selection: command }
2389 |                     })
2390 |                 });
2391 | 
2392 |                 const data = await response.json();
2393 | 
2394 |                 if (data.status === 'success') {
2395 |                     addOutputCell(command, data.result);
2396 |                     updateGraphs(data.result);
2397 |                 } else {
2398 |                     addError(data.message || 'Command failed');
2399 |                 }
2400 |             } catch (error) {
2401 |                 addError(error.message);
2402 |             }
2403 | 
2404 |             runButton.disabled = false;
2405 |             runButton.textContent = 'Run';
2406 |             commandInput.value = '';
2407 |             commandInput.focus();
2408 |         }
2409 | 
2410 |         function addOutputCell(command, output) {
2411 |             const cell = document.createElement('div');
2412 |             cell.className = 'output-cell';
2413 |             cell.innerHTML = `
2414 |                 <div class="command-line">> ${escapeHtml(command)}</div>
2415 |                 <div class="command-output">${escapeHtml(output)}</div>
2416 |             `;
2417 |             outputContainer.appendChild(cell);
2418 |             outputContainer.scrollTop = outputContainer.scrollHeight;
2419 |         }
2420 | 
2421 |         function addError(message) {
2422 |             const error = document.createElement('div');
2423 |             error.className = 'error';
2424 |             error.textContent = 'Error: ' + message;
2425 |             outputContainer.appendChild(error);
2426 |             outputContainer.scrollTop = outputContainer.scrollHeight;
2427 |         }
2428 | 
2429 |         function updateGraphs(output) {
2430 |             // Updated regex to capture optional command: • name: path [CMD: command]
2431 |             // Use [^\\n\\[] to stop at newlines or opening bracket
2432 |             const graphRegex = /• ([^:]+): ([^\\n\\[]+)(?:\\[CMD: ([^\\]]+)\\])?/g;
2433 |             const matches = [...output.matchAll(graphRegex)];
2434 | 
2435 |             if (matches.length > 0) {
2436 |                 // Remove "no graphs" message if it exists
2437 |                 const noGraphsMsg = graphsContainer.querySelector('.no-graphs');
2438 |                 if (noGraphsMsg) {
2439 |                     graphsContainer.innerHTML = '';
2440 |                 }
2441 | 
2442 |                 // Add or update each graph
2443 |                 matches.forEach(match => {
2444 |                     const name = match[1].trim();
2445 |                     const path = match[2].trim();
2446 |                     const command = match[3] ? match[3].trim() : null;
2447 | 
2448 |                     // Check if graph already exists
2449 |                     const existingGraph = graphsContainer.querySelector(`[data-graph-name="${name}"]`);
2450 |                     if (existingGraph) {
2451 |                         // Update existing graph - force reload by adding timestamp
2452 |                         updateGraph(existingGraph, name, `/graphs/${encodeURIComponent(name)}`, command);
2453 |                     } else {
2454 |                         // Add new graph
2455 |                         addGraph(name, `/graphs/${encodeURIComponent(name)}`, command);
2456 |                     }
2457 |                 });
2458 |             }
2459 |         }
2460 | 
2461 |         function updateGraph(existingCard, name, url, command) {
2462 |             // Force reload by adding timestamp to bypass cache
2463 |             const timestamp = new Date().getTime();
2464 |             const urlWithTimestamp = `${url}?t=${timestamp}`;
2465 | 
2466 |             const commandHtml = command ? `<div style="color: #858585; font-size: 12px; margin-bottom: 8px; font-family: 'Courier New', monospace; background: #1a1a1a; padding: 6px; border-radius: 3px; border-left: 3px solid #4a9eff;">$ ${escapeHtml(command)}</div>` : '';
2467 |             existingCard.innerHTML = `
2468 |                 <h3>${escapeHtml(name)}</h3>
2469 |                 ${commandHtml}
2470 |                 <img src="${urlWithTimestamp}" alt="${escapeHtml(name)}"
2471 |                      onerror="this.parentElement.innerHTML='<p style=\\'color:#f48771\\'>Failed to load graph</p>'">
2472 |             `;
2473 |         }
2474 | 
2475 |         function addGraph(name, url, command) {
2476 |             const card = document.createElement('div');
2477 |             card.className = 'graph-card';
2478 |             card.setAttribute('data-graph-name', name);
2479 |             const commandHtml = command ? `<div style="color: #858585; font-size: 12px; margin-bottom: 8px; font-family: 'Courier New', monospace; background: #1a1a1a; padding: 6px; border-radius: 3px; border-left: 3px solid #4a9eff;">$ ${escapeHtml(command)}</div>` : '';
2480 |             card.innerHTML = `
2481 |                 <h3>${escapeHtml(name)}</h3>
2482 |                 ${commandHtml}
2483 |                 <img src="${url}" alt="${escapeHtml(name)}"
2484 |                      onerror="this.parentElement.innerHTML='<p style=\\'color:#f48771\\'>Failed to load graph</p>'">
2485 |             `;
2486 |             graphsContainer.appendChild(card);
2487 |         }
2488 | 
2489 |         function escapeHtml(text) {
2490 |             const div = document.createElement('div');
2491 |             div.textContent = text;
2492 |             return div.innerHTML;
2493 |         }
2494 | 
2495 |         // Auto-execute file or code if provided in URL parameter
2496 |         const urlParams = new URLSearchParams(window.location.search);
2497 |         const autoRunFile = urlParams.get('file');
2498 |         const autoRunCode = urlParams.get('code');
2499 | 
2500 |         if (autoRunFile) {
2501 |             console.log('Auto-running file from URL parameter:', autoRunFile);
2502 |             // Run the file on page load
2503 |             fetch('/v1/tools', {
2504 |                 method: 'POST',
2505 |                 headers: { 'Content-Type': 'application/json' },
2506 |                 body: JSON.stringify({
2507 |                     tool: 'run_file',
2508 |                     parameters: { file_path: autoRunFile }
2509 |                 })
2510 |             })
2511 |             .then(response => response.json())
2512 |             .then(data => {
2513 |                 if (data.status === 'success') {
2514 |                     addOutputCell('Running file: ' + autoRunFile, data.result);
2515 |                     updateGraphs(data.result);
2516 |                 } else {
2517 |                     addError(data.message || 'Failed to run file');
2518 |                 }
2519 |             })
2520 |             .catch(error => {
2521 |                 addError('Error running file: ' + error.message);
2522 |             });
2523 |         } else if (autoRunCode) {
2524 |             console.log('Auto-running code from URL parameter');
2525 |             // Run the selected code on page load
2526 |             fetch('/v1/tools', {
2527 |                 method: 'POST',
2528 |                 headers: { 'Content-Type': 'application/json' },
2529 |                 body: JSON.stringify({
2530 |                     tool: 'run_selection',
2531 |                     parameters: { selection: autoRunCode }
2532 |                 })
2533 |             })
2534 |             .then(response => response.json())
2535 |             .then(data => {
2536 |                 if (data.status === 'success') {
2537 |                     addOutputCell('Running selection', data.result);
2538 |                     updateGraphs(data.result);
2539 |                 } else {
2540 |                     addError(data.message || 'Failed to run code');
2541 |                 }
2542 |             })
2543 |             .catch(error => {
2544 |                 addError('Error running code: ' + error.message);
2545 |             });
2546 |         }
2547 | 
2548 |         commandInput.focus();
2549 |     </script>
2550 | </body>
2551 | </html>
2552 |     """
2553 |     # Replace the placeholder with the actual file path (with proper escaping)
2554 |     if auto_run_file:
2555 |         # Escape the file path for JavaScript string
2556 |         escaped_file = auto_run_file.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
2557 |         html_content = html_content.replace('AUTO_RUN_FILE_PLACEHOLDER', escaped_file)
2558 | 
2559 |     return Response(content=html_content, media_type="text/html")
2560 | 
2561 | 
2562 | def main():
2563 |     """Main function to set up and run the server"""
2564 |     try:
2565 |         # Get Stata path from arguments
2566 |         parser = argparse.ArgumentParser(description='Stata MCP Server')
2567 |         parser.add_argument('--stata-path', type=str, help='Path to Stata installation')
2568 |         parser.add_argument('--port', type=int, default=4000, help='Port to run MCP server on')
2569 |         parser.add_argument('--host', type=str, default='localhost', help='Host to bind the server to')
2570 |         parser.add_argument('--log-level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], 
2571 |                           default='INFO', help='Logging level')
2572 |         parser.add_argument('--force-port', action='store_true', help='Force the specified port, even if it requires killing processes')
2573 |         parser.add_argument('--log-file', type=str, help='Path to log file (default: stata_mcp_server.log in current directory)')
2574 |         parser.add_argument('--stata-edition', type=str, choices=['mp', 'se', 'be'], default='mp', 
2575 |                           help='Stata edition to use (mp, se, be) - default: mp')
2576 |         parser.add_argument('--log-file-location', type=str, choices=['extension', 'workspace', 'custom'], default='extension',
2577 |                           help='Location for .do file logs (extension, workspace, custom) - default: extension')
2578 |         parser.add_argument('--custom-log-directory', type=str, default='',
2579 |                           help='Custom directory for .do file logs (when location is custom)')
2580 |         
2581 |         # Special handling when running as a module
2582 |         if is_running_as_module:
2583 |             print(f"Command line arguments when running as module: {sys.argv}")
2584 |             # When run as a module, the first arg won't be the script path
2585 |             args_to_parse = sys.argv[1:]
2586 |         else:
2587 |             # Regular mode - arg 0 is script path
2588 |             #print(f"[MCP Server] Original command line arguments: {sys.argv}")
2589 |             args_to_parse = sys.argv
2590 |             
2591 |             # Skip if an argument is a duplicate script path (e.g., on Windows with shell:true)
2592 |             clean_args = []
2593 |             script_path_found = False
2594 |             
2595 |             for arg in args_to_parse:
2596 |                 # Skip duplicate script paths, but keep the first one (sys.argv[0])
2597 |                 if arg.endswith('stata_mcp_server.py'):
2598 |                     if script_path_found and arg != sys.argv[0]:
2599 |                         logging.debug(f"Skipping duplicate script path: {arg}")
2600 |                         continue
2601 |                     script_path_found = True
2602 |                 
2603 |                 clean_args.append(arg)
2604 |             
2605 |             args_to_parse = clean_args
2606 |         
2607 |         # Process commands for Stata path with spaces
2608 |         fixed_args = []
2609 |         i = 0
2610 |         while i < len(args_to_parse):
2611 |             arg = args_to_parse[i]
2612 |                 
2613 |             if arg == '--stata-path' and i + 1 < len(args_to_parse):
2614 |                 # The next argument might be a path that got split
2615 |                 stata_path = args_to_parse[i + 1]
2616 |                 
2617 |                 # Check if this is a quoted path
2618 |                 if (stata_path.startswith('"') and not stata_path.endswith('"')) or (stata_path.startswith("'") and not stata_path.endswith("'")):
2619 |                     # Look for the rest of the path in subsequent arguments
2620 |                     i += 2  # Move past '--stata-path' and the first part
2621 |                     
2622 |                     # Get the quote character (single or double)
2623 |                     quote_char = stata_path[0]
2624 |                     path_parts = [stata_path[1:]]  # Remove the starting quote
2625 |                     
2626 |                     # Collect all parts until we find the end quote
2627 |                     while i < len(args_to_parse):
2628 |                         current = args_to_parse[i]
2629 |                         if current.endswith(quote_char):
2630 |                             # Found the end quote
2631 |                             path_parts.append(current[:-1])  # Remove the ending quote
2632 |                             break
2633 |                         else:
2634 |                             path_parts.append(current)
2635 |                         i += 1
2636 |                     
2637 |                     # Join all parts to form the complete path
2638 |                     complete_path = " ".join(path_parts)
2639 |                     fixed_args.append('--stata-path')
2640 |                     fixed_args.append(complete_path)
2641 |                 else:
2642 |                     # Normal path handling (either without quotes or with properly matched quotes)
2643 |                     fixed_args.append(arg)
2644 |                     fixed_args.append(stata_path)
2645 |                     i += 2
2646 |             else:
2647 |             # For all other arguments, add them as-is
2648 |                 fixed_args.append(arg)
2649 |                 i += 1
2650 |         
2651 |         # Print debug info
2652 |         print(f"Command line arguments: {fixed_args}")
2653 |         
2654 |         # Use the fixed arguments
2655 |         args = parser.parse_args(fixed_args[1:] if fixed_args and not is_running_as_module else fixed_args)
2656 |         print(f"Parsed arguments: stata_path={args.stata_path}, port={args.port}")
2657 |         
2658 |         # Check if args.stata_path accidentally captured other arguments
2659 |         if args.stata_path and ' --' in args.stata_path:
2660 |             # The stata_path might have captured other arguments
2661 |             parts = args.stata_path.split(' --')
2662 |             # The first part is the actual stata_path
2663 |             stata_path = parts[0].strip()
2664 |             print(f"WARNING: Detected merged arguments in Stata path. Fixing: {args.stata_path} -> {stata_path}")
2665 |             logging.warning(f"Fixed merged arguments in Stata path: {args.stata_path} -> {stata_path}")
2666 |             args.stata_path = stata_path
2667 |         
2668 |         # If Stata path was enclosed in quotes, remove them
2669 |         if args.stata_path:
2670 |             args.stata_path = args.stata_path.strip('"\'')
2671 |             logging.debug(f"Cleaned Stata path: {args.stata_path}")
2672 | 
2673 |         # Configure log file
2674 |         log_file = args.log_file or 'stata_mcp_server.log'
2675 |         log_dir = os.path.dirname(log_file)
2676 |         
2677 |         # Create log directory if needed
2678 |         if log_dir and not os.path.exists(log_dir):
2679 |             try:
2680 |                 os.makedirs(log_dir, exist_ok=True)
2681 |                 print(f"Created log directory: {log_dir}")
2682 |             except Exception as e:
2683 |                 print(f"ERROR: Failed to create log directory {log_dir}: {str(e)}")
2684 |                 # Continue anyway, the file handler creation will fail if needed
2685 |         
2686 |         # Always print where we're trying to log
2687 |         print(f"Logging to: {os.path.abspath(log_file)}")
2688 |             
2689 |         # Remove existing handlers
2690 |         for handler in logging.getLogger().handlers[:]:
2691 |             logging.getLogger().removeHandler(handler)
2692 |             
2693 |         # Add file handler
2694 |         try:
2695 |             file_handler = logging.FileHandler(log_file, mode='a', encoding='utf-8')
2696 |             file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
2697 |             logging.getLogger().addHandler(file_handler)
2698 |             print(f"Successfully configured log file: {os.path.abspath(log_file)}")
2699 |         except Exception as log_error:
2700 |             print(f"ERROR: Failed to configure log file {log_file}: {str(log_error)}")
2701 |             # Continue with console logging only
2702 |         
2703 |         # Re-add console handler
2704 |         logging.getLogger().addHandler(console_handler)
2705 |         
2706 |         # Set log level
2707 |         log_level = getattr(logging, args.log_level)
2708 |         logging.getLogger().setLevel(log_level)
2709 |         
2710 |         # Set Stata edition
2711 |         global stata_edition, log_file_location, custom_log_directory, extension_path
2712 |         stata_edition = args.stata_edition.lower()
2713 |         log_file_location = args.log_file_location
2714 |         custom_log_directory = args.custom_log_directory
2715 |         
2716 |         # Try to determine extension path from the log file path
2717 |         if args.log_file:
2718 |             # If log file is in a logs subdirectory, the parent of that is the extension path
2719 |             log_file_dir = os.path.dirname(os.path.abspath(args.log_file))
2720 |             if log_file_dir.endswith('logs'):
2721 |                 extension_path = os.path.dirname(log_file_dir)
2722 |             else:
2723 |                 extension_path = log_file_dir
2724 |         
2725 |         logging.info(f"Using Stata {stata_edition.upper()} edition")
2726 |         logging.info(f"Log file location setting: {log_file_location}")
2727 |         if custom_log_directory:
2728 |             logging.info(f"Custom log directory: {custom_log_directory}")
2729 |         if extension_path:
2730 |             logging.info(f"Extension path: {extension_path}")
2731 |         
2732 |         # Log startup information
2733 |         logging.info(f"Log initialized at {os.path.abspath(log_file)}")
2734 |         logging.info(f"Log level set to {args.log_level}")
2735 |         logging.info(f"Platform: {platform.system()} {platform.release()}")
2736 |         logging.info(f"Python version: {sys.version}")
2737 |         logging.info(f"Working directory: {os.getcwd()}")
2738 | 
2739 |         # Set Stata path
2740 |         global STATA_PATH
2741 |         if args.stata_path:
2742 |             # Strip quotes if present
2743 |             STATA_PATH = args.stata_path.strip('"\'')
2744 |         else:
2745 |             STATA_PATH = os.environ.get('STATA_PATH')
2746 |             if not STATA_PATH:
2747 |                 if platform.system() == 'Darwin':  # macOS
2748 |                     STATA_PATH = '/Applications/Stata'
2749 |                 elif platform.system() == 'Windows':
2750 |                     # Try common Windows paths
2751 |                     potential_paths = [
2752 |                         'C:\\Program Files\\Stata18',
2753 |                         'C:\\Program Files\\Stata17', 
2754 |                         'C:\\Program Files\\Stata16',
2755 |                         'C:\\Program Files (x86)\\Stata18',
2756 |                         'C:\\Program Files (x86)\\Stata17',
2757 |                         'C:\\Program Files (x86)\\Stata16'
2758 |                     ]
2759 |                     for path in potential_paths:
2760 |                         if os.path.exists(path):
2761 |                             STATA_PATH = path
2762 |                             break
2763 |                     if not STATA_PATH:
2764 |                         STATA_PATH = 'C:\\Program Files\\Stata18'  # Default if none found
2765 |                 else:  # Linux
2766 |                     STATA_PATH = '/usr/local/stata'
2767 |                     
2768 |         logging.info(f"Using Stata path: {STATA_PATH}")
2769 |         if not os.path.exists(STATA_PATH):
2770 |             logging.error(f"Stata path does not exist: {STATA_PATH}")
2771 |             print(f"ERROR: Stata path does not exist: {STATA_PATH}")
2772 |             sys.exit(1)
2773 |         
2774 |         # Check if the requested port is available
2775 |         port = args.port
2776 |         
2777 |         if args.force_port:
2778 |             # Kill any existing process on the port
2779 |             kill_process_on_port(port)
2780 |         else:
2781 |             # Always kill processes on port 4000
2782 |             if port == 4000:
2783 |                 logging.info(f"Ensuring port 4000 is available by terminating any existing processes")
2784 |                 kill_process_on_port(port)
2785 |             else:
2786 |                 # For other ports, check if available
2787 |                 with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
2788 |                     s.settimeout(1)
2789 |                     result = s.connect_ex(('localhost', port))
2790 |                     if result == 0:  # Port is in use
2791 |                         logging.warning(f"Port {port} is already in use")
2792 |                         # Kill the process on the port instead of finding a new one
2793 |                         logging.info(f"Attempting to kill process using port {port}")
2794 |                         kill_process_on_port(port)
2795 |         
2796 |         # Try to initialize Stata
2797 |         try_init_stata(STATA_PATH)
2798 |         
2799 |         # Create and mount the MCP server
2800 |         # Only expose run_selection and run_file to LLMs
2801 |         # Other endpoints are still accessible via direct HTTP calls from VS Code extension
2802 |         # Configure HTTP client with ASGI transport and extended timeout for long-running Stata operations
2803 |         http_client = httpx.AsyncClient(
2804 |             transport=httpx.ASGITransport(app=app, raise_app_exceptions=False),
2805 |             base_url="http://apiserver",
2806 |             timeout=1200.0  # 20 minutes timeout for long Stata operations
2807 |         )
2808 | 
2809 |         mcp = FastApiMCP(
2810 |             app,
2811 |             name=SERVER_NAME,
2812 |             description="This server provides tools for running Stata commands and scripts. Use stata_run_selection for running code snippets and stata_run_file for executing .do files.",
2813 |             http_client=http_client,
2814 |             exclude_operations=[
2815 |                 "call_tool_v1_tools_post",  # Legacy VS Code extension endpoint
2816 |                 "health_check_health_get",  # Health check endpoint
2817 |                 "view_data_endpoint_view_data_get",  # Data viewer endpoint (VS Code only)
2818 |                 "get_graph_graphs_graph_name_get",  # Graph serving endpoint (VS Code only)
2819 |                 "clear_history_endpoint_clear_history_post",  # History clearing (VS Code only)
2820 |                 "interactive_window_interactive_get",  # Interactive window (VS Code only)
2821 |                 "stata_run_file_stream_endpoint_run_file_stream_get"  # SSE streaming endpoint (HTTP clients only)
2822 |             ]
2823 |         )
2824 | 
2825 |         # Mount SSE transport at /mcp for backward compatibility
2826 |         mcp.mount(mount_path="/mcp", transport="sse")
2827 | 
2828 |         # ========================================================================
2829 |         # HTTP (Streamable) Transport - Separate Server Instance
2830 |         # ========================================================================
2831 |         # Create a SEPARATE MCP server instance for HTTP to avoid session conflicts
2832 |         # This ensures notifications go to the correct transport
2833 |         from mcp.server import Server as MCPServer
2834 |         from mcp.server.streamable_http_manager import StreamableHTTPSessionManager
2835 |         from starlette.responses import StreamingResponse as StarletteStreamingResponse
2836 | 
2837 |         logging.info("Creating separate MCP server instance for HTTP transport...")
2838 |         http_mcp_server = MCPServer(SERVER_NAME)
2839 | 
2840 |         # Register list_tools handler to expose the same tools
2841 |         @http_mcp_server.list_tools()
2842 |         async def list_tools_http():
2843 |             """List available tools - delegate to main server"""
2844 |             # Get tools from the main fastapi_mcp server
2845 |             import mcp.types as types
2846 | 
2847 |             tools_list = []
2848 |             # stata_run_selection tool
2849 |             tools_list.append(types.Tool(
2850 |                 name="stata_run_selection",
2851 |                 description="Stata Run Selection Endpoint\n\nRun selected Stata code and return the output\n\n### Responses:\n\n**200**: Successful Response (Success Response)",
2852 |                 inputSchema={
2853 |                     "type": "object",
2854 |                     "properties": {
2855 |                         "selection": {"type": "string", "title": "selection"}
2856 |                     },
2857 |                     "title": "stata_run_selectionArguments",
2858 |                     "required": ["selection"]
2859 |                 }
2860 |             ))
2861 |             # stata_run_file tool
2862 |             tools_list.append(types.Tool(
2863 |                 name="stata_run_file",
2864 |                 description="Stata Run File Endpoint\n\nRun a Stata .do file and return the output (MCP-compatible endpoint)\n\nArgs:\n    file_path: Path to the .do file\n    timeout: Timeout in seconds (default: 600 seconds / 10 minutes)\n\nReturns:\n    Response with plain text output\n\n### Responses:\n\n**200**: Successful Response (Success Response)",
2865 |                 inputSchema={
2866 |                     "type": "object",
2867 |                     "properties": {
2868 |                         "file_path": {"type": "string", "title": "file_path"},
2869 |                         "timeout": {"type": "integer", "default": 600, "title": "timeout"}
2870 |                     },
2871 |                     "title": "stata_run_fileArguments",
2872 |                     "required": ["file_path"]
2873 |                 }
2874 |             ))
2875 |             return tools_list
2876 | 
2877 |         # Register call_tool handler to execute tools with HTTP server's context
2878 |         @http_mcp_server.call_tool()
2879 |         async def call_tool_http(name: str, arguments: dict) -> list:
2880 |             """Execute tools using HTTP server's own context for proper notification routing"""
2881 |             import mcp.types as types
2882 | 
2883 |             logging.debug(f"HTTP server executing tool: {name}")
2884 | 
2885 |             # Call the fastapi_mcp's execute method, which has the streaming wrapper
2886 |             # The streaming wrapper will check http_mcp_server.request_context (which is set by StreamableHTTPSessionManager)
2887 |             result = await mcp._execute_api_tool(
2888 |                 client=http_client,
2889 |                 tool_name=name,
2890 |                 arguments=arguments,
2891 |                 operation_map=mcp.operation_map,  # Correct attribute name
2892 |                 http_request_info=None
2893 |             )
2894 | 
2895 |             return result
2896 | 
2897 |         logging.debug("Registered tool handlers with HTTP server")
2898 | 
2899 |         # Create HTTP session manager with dedicated server
2900 |         http_session_manager = StreamableHTTPSessionManager(
2901 |             app=http_mcp_server,  # Use dedicated HTTP server, not shared
2902 |             event_store=None,
2903 |             json_response=False,  # Use SSE format for responses
2904 |             stateless=False,  # Maintain session state
2905 |         )
2906 |         logging.info("HTTP transport configured with dedicated MCP server")
2907 | 
2908 |         # Create a custom Response class that properly handles ASGI streaming
2909 |         class ASGIPassthroughResponse(StarletteStreamingResponse):
2910 |             """Response that passes through ASGI calls without buffering"""
2911 |             def __init__(self, asgi_handler, scope, receive):
2912 |                 # Initialize the parent class with a dummy streaming function
2913 |                 # We need this to set up all required attributes like background, headers, etc.
2914 |                 super().__init__(content=iter([]), media_type="text/event-stream")
2915 | 
2916 |                 # Store our ASGI handler
2917 |                 self.asgi_handler = asgi_handler
2918 |                 self.scope_data = scope
2919 |                 self.receive_func = receive
2920 | 
2921 |             async def __call__(self, scope, receive, send):
2922 |                 """Handle ASGI request/response cycle"""
2923 |                 # Call the ASGI handler directly with the provided send callback
2924 |                 # This allows SSE events to be sent immediately without buffering
2925 |                 await self.asgi_handler(self.scope_data, self.receive_func, send)
2926 | 
2927 |         @app.api_route(
2928 |             "/mcp-streamable",
2929 |             methods=["GET", "POST", "DELETE"],
2930 |             include_in_schema=False,
2931 |             operation_id="mcp_http_streamable"
2932 |         )
2933 |         async def handle_mcp_streamable(request: Request):
2934 |             """Handle MCP Streamable HTTP requests with proper ASGI passthrough"""
2935 |             # Return a response that directly passes through to the ASGI handler
2936 |             # This avoids any buffering by FastAPI/Starlette
2937 |             return ASGIPassthroughResponse(
2938 |                 asgi_handler=http_session_manager.handle_request,
2939 |                 scope=request.scope,
2940 |                 receive=request.receive
2941 |             )
2942 | 
2943 |         # Store the session manager for startup/shutdown
2944 |         app.state.http_session_manager = http_session_manager
2945 |         app.state.http_session_manager_cm = None
2946 | 
2947 |         # Define startup handler for the HTTP session manager
2948 |         async def _start_http_session_manager():
2949 |             """Start the HTTP session manager task group"""
2950 |             try:
2951 |                 logging.info("Starting StreamableHTTP session manager...")
2952 |                 # Enter the context manager
2953 |                 app.state.http_session_manager_cm = http_session_manager.run()
2954 |                 await app.state.http_session_manager_cm.__aenter__()
2955 |                 logging.info("✓ StreamableHTTP session manager started successfully")
2956 |             except Exception as e:
2957 |                 logging.error(f"Failed to start StreamableHTTP session manager: {e}", exc_info=True)
2958 |                 raise
2959 | 
2960 |         # Define shutdown handler for the HTTP session manager
2961 |         async def _stop_http_session_manager():
2962 |             """Stop the HTTP session manager"""
2963 |             if app.state.http_session_manager_cm:
2964 |                 try:
2965 |                     logging.info("Stopping StreamableHTTP session manager...")
2966 |                     await app.state.http_session_manager_cm.__aexit__(None, None, None)
2967 |                     logging.info("✓ StreamableHTTP session manager stopped")
2968 |                 except Exception as e:
2969 |                     logging.error(f"Error stopping HTTP session manager: {e}", exc_info=True)
2970 | 
2971 |         # Store handlers on app.state for the lifespan manager to call
2972 |         app.state._http_session_manager_starter = _start_http_session_manager
2973 |         app.state._http_session_manager_stopper = _stop_http_session_manager
2974 |         logging.debug("HTTP session manager startup/shutdown handlers registered with lifespan")
2975 | 
2976 |         # Store reference
2977 |         mcp._http_transport = http_session_manager
2978 |         logging.info("MCP HTTP Streamable transport mounted at /mcp-streamable with TRUE SSE streaming (ASGI direct)")
2979 | 
2980 |         LOG_LEVEL_RANK = {
2981 |             "debug": 0,
2982 |             "info": 1,
2983 |             "notice": 2,
2984 |             "warning": 3,
2985 |             "error": 4,
2986 |             "critical": 5,
2987 |             "alert": 6,
2988 |             "emergency": 7,
2989 |         }
2990 |         DEFAULT_LOG_LEVEL = "notice"
2991 | 
2992 |         @mcp.server.set_logging_level()
2993 |         async def handle_set_logging_level(level: str):
2994 |             """Persist client-requested log level for the current session."""
2995 |             try:
2996 |                 ctx = mcp.server.request_context
2997 |             except LookupError:
2998 |                 logging.debug("logging/setLevel received outside of request context")
2999 |                 return
3000 | 
3001 |             session = getattr(ctx, "session", None)
3002 |             if session is not None:
3003 |                 setattr(session, "_stata_log_level", (level or "info").lower())
3004 |                 logging.debug(f"Set MCP log level for session to {level}")
3005 | 
3006 |         # Enhance stata_run_file with MCP-native streaming updates
3007 |         original_execute = mcp._execute_api_tool
3008 | 
3009 |         async def execute_with_streaming(*call_args, **call_kwargs):
3010 |             """Wrap tool execution to stream progress for long-running Stata jobs."""
3011 |             if not call_args:
3012 |                 raise TypeError("execute_with_streaming requires bound 'self'")
3013 | 
3014 |             bound_self = call_args[0]
3015 |             original_args = call_args[1:]
3016 |             original_kwargs = dict(call_kwargs)
3017 | 
3018 |             # Extract known keyword arguments
3019 |             working_kwargs = dict(call_kwargs)
3020 |             client = working_kwargs.pop("client", None)
3021 |             tool_name = working_kwargs.pop("tool_name", None)
3022 |             arguments = working_kwargs.pop("arguments", None)
3023 |             operation_map = working_kwargs.pop("operation_map", None)
3024 |             http_request_info = working_kwargs.pop("http_request_info", None)
3025 | 
3026 |             # Log and discard unexpected kwargs to stay forwards-compatible
3027 |             for extra_key in list(working_kwargs.keys()):
3028 |                 extra_val = working_kwargs.pop(extra_key, None)
3029 |                 logging.debug(f"Ignoring unexpected MCP execute kwarg: {extra_key}={extra_val!r}")
3030 | 
3031 |             remaining = list(original_args)
3032 | 
3033 |             # Fill from positional args if any are missing
3034 |             if client is None and remaining:
3035 |                 client = remaining.pop(0)
3036 |             if tool_name is None and remaining:
3037 |                 tool_name = remaining.pop(0)
3038 |             if arguments is None and remaining:
3039 |                 arguments = remaining.pop(0)
3040 |             if operation_map is None and remaining:
3041 |                 operation_map = remaining.pop(0)
3042 |             if http_request_info is None and remaining:
3043 |                 http_request_info = remaining.pop(0)
3044 | 
3045 |             # If not our tool or required data missing, fall back to original implementation
3046 |             if (
3047 |                 tool_name != "stata_run_file"
3048 |                 or client is None
3049 |                 or operation_map is None
3050 |             ):
3051 |                 return await original_execute(*original_args, **original_kwargs)
3052 | 
3053 |             arguments_dict = dict(arguments or {})
3054 | 
3055 |             # Try to get request context from either HTTP or SSE server
3056 |             # IMPORTANT: Check HTTP first! If we check SSE first, we might get stale SSE context
3057 |             # even when the request came through HTTP.
3058 |             ctx = None
3059 |             server_type = "unknown"
3060 |             try:
3061 |                 ctx = http_mcp_server.request_context
3062 |                 server_type = "HTTP"
3063 |                 logging.debug(f"Using HTTP server request context: {ctx}")
3064 |             except (LookupError, NameError):
3065 |                 # HTTP server has no context, try SSE server
3066 |                 try:
3067 |                     ctx = bound_self.server.request_context
3068 |                     server_type = "SSE"
3069 |                     logging.debug(f"Using SSE server request context: {ctx}")
3070 |                 except LookupError:
3071 |                     logging.debug("No MCP request context available; skipping streaming wrapper")
3072 |                     return await original_execute(
3073 |                         client=client,
3074 |                         tool_name=tool_name,
3075 |                         arguments=arguments_dict,
3076 |                         operation_map=operation_map,
3077 |                         http_request_info=http_request_info,
3078 |                     )
3079 | 
3080 |             session = getattr(ctx, "session", None)
3081 |             request_id = getattr(ctx, "request_id", None)
3082 |             progress_token = getattr(getattr(ctx, "meta", None), "progressToken", None)
3083 | 
3084 |             # DEBUG: Log session information
3085 |             logging.info(f"✓ Streaming enabled via {server_type} server - Tool: {tool_name}")
3086 |             if session:
3087 |                 session_attrs = [attr for attr in dir(session) if not attr.startswith('__')]
3088 |                 logging.debug(f"Session type: {type(session)}, Attributes: {session_attrs[:10]}")
3089 |                 session_id = getattr(session, "_session_id", getattr(session, "session_id", getattr(session, "id", None)))
3090 |             else:
3091 |                 session_id = None
3092 |             logging.debug(f"Tool execution - Server: {server_type}, Session ID: {session_id}, Request ID: {request_id}, Progress Token: {progress_token}")
3093 | 
3094 |             if session is None:
3095 |                 logging.debug("MCP session not available; falling back to default execution")
3096 |                 return await original_execute(
3097 |                     client=client,
3098 |                     tool_name=tool_name,
3099 |                     arguments=arguments_dict,
3100 |                     operation_map=operation_map,
3101 |                     http_request_info=http_request_info,
3102 |                 )
3103 | 
3104 |             if not hasattr(session, "_stata_log_level"):
3105 |                 setattr(session, "_stata_log_level", DEFAULT_LOG_LEVEL)
3106 | 
3107 |             file_path = arguments_dict.get("file_path", "")
3108 | 
3109 |             try:
3110 |                 timeout = int(arguments_dict.get("timeout", 600))
3111 |             except (TypeError, ValueError):
3112 |                 timeout = 600
3113 | 
3114 |             resolved_path, resolution_candidates = resolve_do_file_path(file_path)
3115 |             effective_path = resolved_path or os.path.abspath(file_path)
3116 |             base_name = os.path.splitext(os.path.basename(effective_path))[0]
3117 |             log_file_path = get_log_file_path(effective_path, base_name)
3118 | 
3119 |             logging.info(f"📡 MCP streaming enabled for {os.path.basename(file_path)}")
3120 |             logging.debug(f"MCP log streaming monitoring: {log_file_path}")
3121 |             if not resolved_path:
3122 |                 logging.debug(f"Resolution attempts: {resolution_candidates}")
3123 | 
3124 |             import asyncio as _asyncio
3125 |             import time as _time
3126 | 
3127 |             async def send_log(level: str, message: str):
3128 |                 level = (level or "info").lower()
3129 |                 session_level = getattr(session, "_stata_log_level", DEFAULT_LOG_LEVEL)
3130 |                 if LOG_LEVEL_RANK.get(level, 0) < LOG_LEVEL_RANK.get(session_level, LOG_LEVEL_RANK[DEFAULT_LOG_LEVEL]):
3131 |                     return
3132 |                 logging.debug(f"MCP streaming log [{level}] (session level {session_level}): {message}")
3133 |                 try:
3134 |                     await session.send_log_message(
3135 |                         level=level,
3136 |                         data=message,
3137 |                         logger="stata-mcp",
3138 |                         related_request_id=request_id,
3139 |                     )
3140 |                 except Exception as send_exc:  # noqa: BLE001
3141 |                     logging.debug(f"Unable to send MCP log message: {send_exc}")
3142 | 
3143 |             async def send_progress(elapsed: float, message: str | None = None):
3144 |                 if progress_token is None:
3145 |                     return
3146 |                 try:
3147 |                     await session.send_progress_notification(
3148 |                         progress_token=progress_token,
3149 |                         progress=elapsed,
3150 |                         total=timeout,
3151 |                         message=message,
3152 |                         related_request_id=request_id,
3153 |                     )
3154 |                 except Exception as send_exc:  # noqa: BLE001
3155 |                     logging.debug(f"Unable to send MCP progress notification: {send_exc}")
3156 | 
3157 |             task = _asyncio.create_task(
3158 |                 original_execute(
3159 |                     client=client,
3160 |                     tool_name=tool_name,
3161 |                     arguments=arguments_dict,
3162 |                     operation_map=operation_map,
3163 |                     http_request_info=http_request_info,
3164 |                 )
3165 |             )
3166 | 
3167 |             start_time = _time.time()
3168 |             stream_interval = 5
3169 |             poll_interval = 2
3170 |             last_stream = 0.0
3171 |             last_offset = 0
3172 | 
3173 |             start_message = f"▶️  Starting Stata execution: {os.path.basename(effective_path)}"
3174 |             await send_log("notice", start_message)
3175 |             await send_progress(0.0, start_message)
3176 | 
3177 |             try:
3178 |                 while not task.done():
3179 |                     await _asyncio.sleep(poll_interval)
3180 |                     now = _time.time()
3181 |                     elapsed = now - start_time
3182 | 
3183 |                     if now - last_stream >= stream_interval:
3184 |                         progress_msg = f"⏱️  {elapsed:.0f}s elapsed / {timeout}s timeout"
3185 |                         await send_progress(elapsed, progress_msg)
3186 | 
3187 |                         if os.path.exists(log_file_path):
3188 |                             await send_log(
3189 |                                 "notice",
3190 |                                 f"{progress_msg}\n\n(📁 Inspecting Stata log for new output...)",
3191 |                             )
3192 |                             try:
3193 |                                 with open(log_file_path, "r", encoding="utf-8", errors="replace") as log_file:
3194 |                                     log_file.seek(last_offset)
3195 |                                     new_content = log_file.read()
3196 |                                     last_offset = log_file.tell()
3197 | 
3198 |                                 snippet = ""
3199 |                                 if new_content.strip():
3200 |                                     lines = new_content.strip().splitlines()
3201 |                                     snippet = "\n".join(lines[-3:])
3202 | 
3203 | 
3204 |                                 if snippet:
3205 |                                     progress_msg = f"{progress_msg}\n\n📝 Recent output:\n{snippet}"
3206 | 
3207 |                                 await send_log("notice", progress_msg)
3208 |                             except Exception as read_exc:  # noqa: BLE001
3209 |                                 logging.debug(f"Error reading log for streaming: {read_exc}")
3210 |                                 await send_log(
3211 |                                     "notice",
3212 |                                     f"{progress_msg} (waiting for output...)",
3213 |                                 )
3214 |                         else:
3215 |                             await send_log(
3216 |                                 "notice",
3217 |                                 f"{progress_msg} (initializing...)",
3218 |                             )
3219 | 
3220 |                         last_stream = now
3221 | 
3222 |                 result = await task
3223 |                 total_time = _time.time() - start_time
3224 |                 await send_log("notice", f"✅ Execution completed in {total_time:.1f}s")
3225 |                 return result
3226 |             except Exception as exc:
3227 |                 logging.error(f"❌ Error during MCP streaming: {exc}", exc_info=True)
3228 |                 await send_log("error", f"Error during execution: {exc}")
3229 |                 raise
3230 | 
3231 |         import types as _types
3232 | 
3233 |         mcp._execute_api_tool = _types.MethodType(execute_with_streaming, mcp)
3234 |         logging.info("📡 MCP streaming wrapper installed for stata_run_file")
3235 | 
3236 |         # Mark MCP as initialized (will also be set in startup event)
3237 |         global mcp_initialized
3238 |         mcp_initialized = True
3239 |         logging.info("MCP server mounted and initialized")
3240 | 
3241 |         try:
3242 |             # Start the server
3243 |             logging.info(f"Starting Stata MCP Server on {args.host}:{port}")
3244 |             logging.info(f"Stata available: {stata_available}")
3245 |             
3246 |             # Print to stdout as well to ensure visibility
3247 |             if platform.system() == 'Windows':
3248 |                 # For Windows, completely skip the startup message if another instance is detected
3249 |                 # as we already printed information above
3250 |                 if not stata_banner_displayed:
3251 |                     print(f"INITIALIZATION SUCCESS: Stata MCP Server starting on {args.host}:{port}")
3252 |                     print(f"Stata available: {stata_available}")
3253 |                     print(f"Log file: {os.path.abspath(log_file)}")
3254 |             else:
3255 |                 # Normal behavior for macOS/Linux
3256 |                 print(f"INITIALIZATION SUCCESS: Stata MCP Server starting on {args.host}:{port}")
3257 |                 print(f"Stata available: {stata_available}")
3258 |                 print(f"Log file: {os.path.abspath(log_file)}")
3259 |             
3260 |             import uvicorn
3261 |             uvicorn.run(
3262 |                 app, 
3263 |                 host=args.host, 
3264 |                 port=port, 
3265 |                 log_level="warning",  # Use warning to allow important messages through
3266 |                 access_log=False  # Disable access logs
3267 |             )
3268 |             
3269 |         except Exception as e:
3270 |             logging.error(f"Server error: {str(e)}")
3271 |             traceback.print_exc()
3272 |             sys.exit(1)
3273 | 
3274 |     except Exception as e:
3275 |         logging.error(f"Error in main function: {str(e)}")
3276 |         traceback.print_exc()
3277 |         sys.exit(1)
3278 | 
3279 | if __name__ == "__main__":
3280 |     main() 
3281 | 
```