This is page 4 of 4. Use http://codebase.md/hanlulong/stata-mcp?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .github
│ ├── .gitattributes
│ ├── CLI_USAGE.md
│ └── CONTRIBUTING.md
├── .gitignore
├── .vscodeignore
├── CHANGELOG.md
├── docs
│ ├── examples
│ │ ├── auto_report.pdf
│ │ └── jupyter.ipynb
│ ├── incidents
│ │ ├── CLAUDE_CLIENTS_STREAMING_COMPARISON.md
│ │ ├── CLAUDE_CODE_NOTIFICATION_DIAGNOSIS.md
│ │ ├── CLAUDE_CODE_NOTIFICATION_ISSUE.md
│ │ ├── DUAL_TRANSPORT.md
│ │ ├── FINAL_DIAGNOSIS.md
│ │ ├── FINAL_STATUS_REPORT.md
│ │ ├── FINAL_TIMEOUT_TEST_RESULTS.md
│ │ ├── KEEP_ALIVE_IMPLEMENTATION.md
│ │ ├── LONG_EXECUTION_ISSUE.md
│ │ ├── MCP_CLIENT_VERIFICATION_SUCCESS.md
│ │ ├── MCP_ERROR_FIX.md
│ │ ├── MCP_TIMEOUT_SOLUTION.md
│ │ ├── MCP_TRANSPORT_FIX.md
│ │ ├── NOTIFICATION_FIX_COMPLETE.md
│ │ ├── NOTIFICATION_FIX_VERIFIED.md
│ │ ├── NOTIFICATION_ROUTING_BUG.md
│ │ ├── PROGRESSIVE_OUTPUT_APPROACH.md
│ │ ├── README.md
│ │ ├── SESSION_ACCESS_SOLUTION.md
│ │ ├── SSE_STREAMING_IMPLEMENTATION.md
│ │ ├── STREAMING_DIAGNOSIS.md
│ │ ├── STREAMING_IMPLEMENTATION_GUIDE.md
│ │ ├── STREAMING_SOLUTION.md
│ │ ├── STREAMING_STATUS.md
│ │ ├── STREAMING_TEST_GUIDE.md
│ │ ├── TIMEOUT_FIX_SUMMARY.md
│ │ └── TIMEOUT_TEST_REPORT.md
│ ├── jupyter-stata.md
│ ├── jupyter-stata.zh-CN.md
│ ├── release_notes.md
│ ├── release_notes.zh-CN.md
│ ├── releases
│ │ └── INSTALL_v0.3.4.md
│ └── REPO_STRUCTURE.md
├── images
│ ├── demo_2x.gif
│ ├── demo.mp4
│ ├── jupyterlab.png
│ ├── JupyterLabExample.png
│ ├── logo.png
│ ├── pystata.png
│ ├── Stata_MCP_logo_144x144.png
│ └── Stata_MCP_logo_400x400.png
├── LICENSE
├── package.json
├── README.md
├── README.zh-CN.md
├── src
│ ├── check-python.js
│ ├── devtools
│ │ ├── prepare-npm-package.js
│ │ └── restore-vscode-package.js
│ ├── extension.js
│ ├── language-configuration.json
│ ├── requirements.txt
│ ├── start-server.js
│ ├── stata_mcp_server.py
│ └── syntaxes
│ └── stata.tmLanguage.json
└── tests
├── README.md
├── simple_mcp_test.py
├── test_gr_list_issue.do
├── test_graph_issue.do
├── test_graph_name_param.do
├── test_keepalive.do
├── test_log_location.do
├── test_notifications.py
├── test_stata.do
├── test_streaming_http.py
├── test_streaming.do
├── test_timeout_direct.py
├── test_timeout.do
└── test_understanding.do
```
# Files
--------------------------------------------------------------------------------
/src/stata_mcp_server.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | Stata MCP Server - Exposes Stata functionality to AI models via MCP protocol
5 | Using fastapi-mcp for clean implementation
6 | """
7 |
8 | import os
9 | import tempfile
10 | import json
11 | import sys
12 | import time
13 | import argparse
14 | import logging
15 | import platform
16 | import signal
17 | import subprocess
18 | import traceback
19 | import socket
20 | import asyncio
21 | from typing import Dict, Any, Optional
22 | import warnings
23 | import re
24 |
25 | # Fix encoding issues on Windows for Unicode characters
26 | if platform.system() == 'Windows':
27 | # Force UTF-8 encoding for stdout and stderr on Windows
28 | import io
29 | if sys.stdout.encoding != 'utf-8':
30 | sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace', line_buffering=True)
31 | if sys.stderr.encoding != 'utf-8':
32 | sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace', line_buffering=True)
33 | # Set environment variable for Python to use UTF-8
34 | os.environ['PYTHONIOENCODING'] = 'utf-8'
35 |
36 | # Hide Python process from Mac Dock (server should be background process)
37 | if platform.system() == 'Darwin':
38 | try:
39 | from AppKit import NSApplication
40 | # Set activation policy to accessory - hides dock icon but allows functionality
41 | # This must be called early, before any GUI operations (like Stata's JVM graphics)
42 | app = NSApplication.sharedApplication()
43 | # NSApplicationActivationPolicyAccessory = 1 (hidden from dock, can show windows)
44 | # NSApplicationActivationPolicyProhibited = 2 (completely hidden)
45 | app.setActivationPolicy_(1) # Use Accessory to allow Stata's GUI operations
46 | except Exception:
47 | # Silently ignore if AppKit not available or fails
48 | # This is just a UI improvement, not critical for functionality
49 | pass
50 |
51 | # Check if running as a module (using -m flag)
52 | is_running_as_module = __name__ == "__main__" and not sys.argv[0].endswith('stata_mcp_server.py')
53 | if is_running_as_module:
54 | print(f"Running as a module, using modified command-line handling")
55 |
56 | # Check Python version on Windows but don't exit immediately to allow logging
57 | if platform.system() == 'Windows':
58 | required_version = (3, 11)
59 | current_version = (sys.version_info.major, sys.version_info.minor)
60 | if current_version < required_version:
61 | print(f"WARNING: Python 3.11 or higher is recommended on Windows. Current version: {sys.version}")
62 | print("Please install Python 3.11 from python.org for best compatibility.")
63 | # Log this but don't exit immediately so logs can be written
64 |
65 | try:
66 | from fastapi import FastAPI, Request, Response, Query
67 | from fastapi.responses import StreamingResponse
68 | from fastapi_mcp import FastApiMCP
69 | from pydantic import BaseModel, Field
70 | from contextlib import asynccontextmanager
71 | import httpx
72 | except ImportError as e:
73 | print(f"ERROR: Required Python packages not found: {str(e)}")
74 | print("Please install the required packages:")
75 | print("pip install fastapi uvicorn fastapi-mcp pydantic")
76 |
77 | # On Windows, provide more guidance
78 | if platform.system() == 'Windows':
79 | print("\nOn Windows, you can install required packages by running:")
80 | print("py -3.11 -m pip install fastapi uvicorn fastapi-mcp pydantic")
81 | print("\nIf you need to install Python 3.11, download it from: https://www.python.org/downloads/")
82 |
83 | # Exit with error
84 | sys.exit(1)
85 |
86 | # Configure logging - will be updated in main() with proper log file
87 | # Start with basic console logging
88 | logging.basicConfig(
89 | level=logging.INFO, # Changed from DEBUG to INFO to reduce verbosity
90 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
91 | stream=sys.stdout # Default to stdout until log file is configured
92 | )
93 |
94 | # Create console handler for debugging
95 | console_handler = logging.StreamHandler(sys.stdout)
96 | console_handler.setLevel(logging.WARNING) # Only show WARNING level and above to keep console output clean
97 | formatter = logging.Formatter('%(levelname)s: %(message)s')
98 | console_handler.setFormatter(formatter)
99 | logging.getLogger().addHandler(console_handler)
100 |
101 | # Silence uvicorn access logs but allow warnings
102 | logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
103 | logging.getLogger("uvicorn.error").setLevel(logging.WARNING)
104 |
105 | # Server info
106 | SERVER_NAME = "Stata MCP Server"
107 | SERVER_VERSION = "1.0.0"
108 |
109 | # Flag for Stata availability
110 | stata_available = False
111 | has_stata = False
112 | stata = None # Module-level reference to stata module
113 | STATA_PATH = None
114 | # Add a flag to track if we've already displayed the Stata banner
115 | stata_banner_displayed = False
116 | # Add a flag to track if MCP server is fully initialized
117 | mcp_initialized = False
118 | # Add a storage for continuous command history
119 | command_history = []
120 | # Store the current Stata edition
121 | stata_edition = 'mp' # Default to MP edition
122 | # Store log file settings
123 | log_file_location = 'extension' # Default to extension directory
124 | custom_log_directory = '' # Custom log directory
125 | extension_path = None # Path to the extension directory
126 |
127 | # Try to import pandas
128 | try:
129 | import pandas as pd
130 | has_pandas = True
131 | logging.info("pandas module loaded successfully")
132 | except ImportError:
133 | has_pandas = False
134 | logging.warning("pandas not available, data transfer functionality will be limited")
135 | warnings.warn("pandas not available, data transfer functionality will be limited")
136 |
137 | # Try to initialize Stata with the given path
138 | def try_init_stata(stata_path):
139 | """Try to initialize Stata with the given path"""
140 | global stata_available, has_stata, stata, STATA_PATH, stata_banner_displayed, stata_edition
141 |
142 | # If Stata is already available, don't re-initialize
143 | if stata_available and has_stata and stata is not None:
144 | logging.debug("Stata already initialized, skipping re-initialization")
145 | return True
146 |
147 | # Clean the path (remove quotes if present)
148 | if stata_path:
149 | # Remove any quotes that might have been added
150 | stata_path = stata_path.strip('"\'')
151 | STATA_PATH = stata_path
152 | logging.info(f"Using Stata path: {stata_path}")
153 |
154 | logging.info(f"Initializing Stata from path: {stata_path}")
155 |
156 | try:
157 | # Add environment variables to help with library loading
158 | if stata_path:
159 | if not os.path.exists(stata_path):
160 | error_msg = f"Stata path does not exist: {stata_path}"
161 | logging.error(error_msg)
162 | print(f"ERROR: {error_msg}")
163 | return False
164 |
165 | os.environ['SYSDIR_STATA'] = stata_path
166 |
167 | stata_utilities_path = os.path.join(os.environ.get('SYSDIR_STATA', ''), 'utilities')
168 | if os.path.exists(stata_utilities_path):
169 | sys.path.insert(0, stata_utilities_path)
170 | logging.debug(f"Added Stata utilities path to sys.path: {stata_utilities_path}")
171 | else:
172 | warning_msg = f"Stata utilities path not found: {stata_utilities_path}"
173 | logging.warning(warning_msg)
174 |
175 | # Try to import pystata or stata-sfi
176 | try:
177 | # First try pystata
178 | from pystata import config
179 | logging.debug("Successfully imported pystata")
180 |
181 | # Try to initialize Stata
182 | try:
183 | # Only show banner once (suppress if we've shown it before)
184 | if not stata_banner_displayed and platform.system() == 'Windows':
185 | # On Windows, the banner appears even if we try to suppress it
186 | # At least mark that we've displayed it
187 | stata_banner_displayed = True
188 | logging.debug("Stata banner will be displayed (first time)")
189 | else:
190 | # On subsequent initializations, try to suppress the banner
191 | # This doesn't always work on Windows, but at least we're trying
192 | logging.debug("Attempting to suppress Stata banner on re-initialization")
193 | os.environ['STATA_QUIETLY'] = '1' # Add this environment variable
194 |
195 | # Set Java headless mode to prevent Dock icon on Mac (must be before config.init)
196 | # When Stata's embedded JVM initializes for graphics, it normally creates a Dock icon
197 | # Setting headless=true prevents this GUI behavior
198 | if platform.system() == 'Darwin':
199 | os.environ['JAVA_TOOL_OPTIONS'] = '-Djava.awt.headless=true'
200 | logging.debug("Set Java headless mode to prevent Dock icon")
201 |
202 | # Initialize with the specified Stata edition
203 | config.init(stata_edition)
204 | logging.info(f"Stata initialized successfully with {stata_edition.upper()} edition")
205 |
206 | # Fix encoding for PyStata output on Windows
207 | if platform.system() == 'Windows':
208 | import io
209 | # Replace PyStata's output file handle with UTF-8 encoded version
210 | config.stoutputf = io.TextIOWrapper(
211 | sys.stdout.buffer,
212 | encoding='utf-8',
213 | errors='replace',
214 | line_buffering=True
215 | )
216 | logging.debug("Configured PyStata output with UTF-8 encoding for Windows")
217 |
218 | # Now import stata after initialization
219 | from pystata import stata as stata_module
220 | # Set module-level stata reference
221 | globals()['stata'] = stata_module
222 |
223 | # Successfully initialized Stata
224 | has_stata = True
225 | stata_available = True
226 |
227 | # Initialize PNG export capability to prevent JVM crash in daemon threads (Mac-specific)
228 | #
229 | # Root cause: On Mac, Stata's graphics use embedded JVM. When PNG export is first
230 | # called from a daemon thread, the JVM initialization fails with SIGBUS error in
231 | # CodeHeap::allocate(). This is Mac-specific due to different JVM/threading model
232 | # in libstata-mp.dylib compared to Windows stata-mp-64.dll.
233 | #
234 | # Solution: Initialize JVM in main thread by doing one PNG export at startup.
235 | # All subsequent daemon thread PNG exports will reuse the initialized JVM.
236 | #
237 | # See: tests/MAC_SPECIFIC_ANALYSIS.md for detailed technical analysis
238 | try:
239 | from pystata.config import stlib, get_encode_str
240 | import tempfile
241 |
242 | # Create minimal dataset and graph (2 obs, 1 var)
243 | stlib.StataSO_Execute(get_encode_str("qui clear"), False)
244 | stlib.StataSO_Execute(get_encode_str("qui set obs 2"), False)
245 | stlib.StataSO_Execute(get_encode_str("qui gen x=1"), False)
246 | stlib.StataSO_Execute(get_encode_str("qui twoway scatter x x, name(_init, replace)"), False)
247 |
248 | # Export tiny PNG (10x10px) to initialize JVM in main thread
249 | # This prevents SIGBUS crash when daemon threads later export PNG
250 | png_init = os.path.join(tempfile.gettempdir(), "_stata_png_init.png")
251 | stlib.StataSO_Execute(get_encode_str(f'qui graph export "{png_init}", name(_init) replace width(10) height(10)'), False)
252 | stlib.StataSO_Execute(get_encode_str("qui graph drop _init"), False)
253 |
254 | # Cleanup temporary files
255 | if os.path.exists(png_init):
256 | os.unlink(png_init)
257 |
258 | logging.debug("PNG export initialized successfully (Mac JVM fix)")
259 | except Exception as png_init_error:
260 | # Non-fatal: log but continue - PNG may still work on some platforms
261 | logging.warning(f"PNG initialization failed (non-fatal): {str(png_init_error)}")
262 |
263 | return True
264 | except Exception as init_error:
265 | error_msg = f"Failed to initialize Stata: {str(init_error)}"
266 | logging.error(error_msg)
267 | print(f"ERROR: {error_msg}")
268 | print("Will attempt to continue without full Stata integration")
269 | print("Check if Stata is already running in another instance, or if your Stata license is valid")
270 |
271 | # Some features will still work without full initialization
272 | has_stata = False
273 | stata_available = False
274 |
275 | return False
276 | except ImportError as config_error:
277 | # Try stata-sfi as fallback
278 | try:
279 | import stata_setup
280 |
281 | # Only show banner once
282 | if not stata_banner_displayed and platform.system() == 'Windows':
283 | stata_banner_displayed = True
284 | logging.debug("Stata banner will be displayed (first time)")
285 | else:
286 | # On subsequent initializations, try to suppress the banner
287 | logging.debug("Attempting to suppress Stata banner on re-initialization")
288 | os.environ['STATA_QUIETLY'] = '1'
289 |
290 | stata_setup.config(stata_path, stata_edition)
291 | logging.debug("Successfully configured stata_setup")
292 |
293 | try:
294 | import sfi
295 | # Set module-level stata reference for compatibility
296 | globals()['stata'] = sfi
297 |
298 | has_stata = True
299 | stata_available = True
300 | logging.info("Stata initialized successfully using sfi")
301 |
302 | return True
303 | except ImportError as sfi_error:
304 | error_msg = f"Could not import sfi: {str(sfi_error)}"
305 | logging.error(error_msg)
306 | print(f"ERROR: {error_msg}")
307 | has_stata = False
308 | stata_available = False
309 | return False
310 | except Exception as setup_error:
311 | error_msg = f"Could not import pystata or sfi: {str(setup_error)}"
312 | logging.error(error_msg)
313 | print(f"ERROR: {error_msg}")
314 | print("Stata commands will not be available")
315 | has_stata = False
316 | stata_available = False
317 |
318 | return False
319 | except Exception as e:
320 | error_msg = f"General error setting up Stata environment: {str(e)}"
321 | logging.error(error_msg)
322 | print(f"ERROR: {error_msg}")
323 | print("Stata commands will not be available")
324 | print(f"Check if the Stata path is correct: {stata_path}")
325 | print("And ensure Stata is properly licensed and not running in another process")
326 | has_stata = False
327 | stata_available = False
328 |
329 | return False
330 |
331 | # Lock file mechanism removed - VS Code/Cursor handles extension instances properly
332 | # If there are port conflicts, the server will fail to start cleanly
333 |
334 | def get_log_file_path(do_file_path, do_file_base):
335 | """Get the appropriate log file path based on user settings
336 |
337 | Returns an absolute path to ensure log files are saved to the correct location
338 | regardless of Stata's working directory.
339 | """
340 | global log_file_location, custom_log_directory, extension_path
341 |
342 | if log_file_location == 'extension':
343 | # Use logs folder in extension directory
344 | if extension_path:
345 | logs_dir = os.path.join(extension_path, 'logs')
346 | # Create logs directory if it doesn't exist
347 | os.makedirs(logs_dir, exist_ok=True)
348 | log_path = os.path.join(logs_dir, f"{do_file_base}_mcp.log")
349 | return os.path.abspath(log_path)
350 | else:
351 | # Fallback to workspace if extension path is not available
352 | do_file_dir = os.path.dirname(do_file_path)
353 | log_path = os.path.join(do_file_dir, f"{do_file_base}_mcp.log")
354 | return os.path.abspath(log_path)
355 | elif log_file_location == 'custom':
356 | # Use custom directory
357 | if custom_log_directory and os.path.exists(custom_log_directory):
358 | log_path = os.path.join(custom_log_directory, f"{do_file_base}_mcp.log")
359 | return os.path.abspath(log_path)
360 | else:
361 | # Fallback to workspace if custom directory is invalid
362 | logging.warning(f"Custom log directory not valid: {custom_log_directory}, falling back to workspace")
363 | do_file_dir = os.path.dirname(do_file_path)
364 | log_path = os.path.join(do_file_dir, f"{do_file_base}_mcp.log")
365 | return os.path.abspath(log_path)
366 | else: # workspace
367 | # Use same directory as .do file (original behavior)
368 | do_file_dir = os.path.dirname(do_file_path)
369 | log_path = os.path.join(do_file_dir, f"{do_file_base}_mcp.log")
370 | return os.path.abspath(log_path)
371 |
372 | def resolve_do_file_path(file_path: str) -> tuple[Optional[str], list[str]]:
373 | """Resolve a .do file path to an absolute location, mirroring run_stata_file logic.
374 |
375 | Returns:
376 | A tuple of (resolved_path, tried_paths). resolved_path is None if the file
377 | could not be located. tried_paths contains the normalized paths that were examined.
378 | """
379 | original_path = file_path
380 | normalized_path = os.path.normpath(file_path)
381 |
382 | # Normalize Windows paths to use backslashes for consistency
383 | if platform.system() == "Windows" and '/' in normalized_path:
384 | normalized_path = normalized_path.replace('/', '\\')
385 | logging.info(f"Converted path for Windows: {normalized_path}")
386 |
387 | candidates: list[str] = []
388 | tried_paths: list[str] = []
389 |
390 | if not os.path.isabs(normalized_path):
391 | cwd = os.getcwd()
392 | logging.info(f"File path is not absolute. Current working directory: {cwd}")
393 |
394 | candidates.extend([
395 | normalized_path,
396 | os.path.join(cwd, normalized_path),
397 | os.path.join(cwd, os.path.basename(normalized_path)),
398 | ])
399 |
400 | if platform.system() == "Windows":
401 | if '/' in original_path:
402 | win_path = original_path.replace('/', '\\')
403 | candidates.append(win_path)
404 | candidates.append(os.path.join(cwd, win_path))
405 | elif '\\' in original_path:
406 | unix_path = original_path.replace('\\', '/')
407 | candidates.append(unix_path)
408 | candidates.append(os.path.join(cwd, unix_path))
409 |
410 | # Search subdirectories up to two levels deep for the file
411 | for root, dirs, files in os.walk(cwd, topdown=True, followlinks=False):
412 | if os.path.basename(normalized_path) in files and root != cwd:
413 | subdir_path = os.path.join(root, os.path.basename(normalized_path))
414 | candidates.append(subdir_path)
415 |
416 | # Limit depth to two levels
417 | if root.replace(cwd, '').count(os.sep) >= 2:
418 | dirs[:] = []
419 | else:
420 | candidates.append(normalized_path)
421 |
422 | # Deduplicate while preserving order
423 | seen = set()
424 | unique_candidates = []
425 | for candidate in candidates:
426 | normalized_candidate = os.path.normpath(candidate)
427 | if normalized_candidate not in seen:
428 | seen.add(normalized_candidate)
429 | unique_candidates.append(normalized_candidate)
430 |
431 | for candidate in unique_candidates:
432 | tried_paths.append(candidate)
433 | if os.path.isfile(candidate) and candidate.lower().endswith('.do'):
434 | resolved = os.path.abspath(candidate)
435 | logging.info(f"Found file at: {resolved}")
436 | return resolved, tried_paths
437 |
438 | return None, tried_paths
439 |
440 | def get_stata_path():
441 | """Get the Stata executable path based on the platform and configured path"""
442 | global STATA_PATH
443 |
444 | if not STATA_PATH:
445 | return None
446 |
447 | # Build the actual executable path based on the platform
448 | if platform.system() == "Windows":
449 | # On Windows, executable is StataMP.exe or similar
450 | # Try different executable names
451 | for exe_name in ["StataMP-64.exe", "StataMP.exe", "StataSE-64.exe", "StataSE.exe", "Stata-64.exe", "Stata.exe"]:
452 | exe_path = os.path.join(STATA_PATH, exe_name)
453 | if os.path.exists(exe_path):
454 | return exe_path
455 |
456 | # If no specific executable found, use the default path with StataMP.exe
457 | return os.path.join(STATA_PATH, "StataMP.exe")
458 | else:
459 | # On macOS, executable is StataMPC inside the app bundle
460 | if platform.system() == "Darwin": # macOS
461 | # Check if STATA_PATH is the app bundle path
462 | if STATA_PATH.endswith(".app"):
463 | # App bundle format like /Applications/Stata/StataMC.app
464 | exe_path = os.path.join(STATA_PATH, "Contents", "MacOS", "StataMP")
465 | if os.path.exists(exe_path):
466 | return exe_path
467 |
468 | # Try other Stata variants
469 | for variant in ["StataSE", "Stata"]:
470 | exe_path = os.path.join(STATA_PATH, "Contents", "MacOS", variant)
471 | if os.path.exists(exe_path):
472 | return exe_path
473 | else:
474 | # Direct path like /Applications/Stata
475 | for variant in ["StataMP", "StataSE", "Stata"]:
476 | # Check if there's an app bundle inside the directory
477 | app_path = os.path.join(STATA_PATH, f"{variant}.app")
478 | if os.path.exists(app_path):
479 | exe_path = os.path.join(app_path, "Contents", "MacOS", variant)
480 | if os.path.exists(exe_path):
481 | return exe_path
482 |
483 | # Also check for direct executable
484 | exe_path = os.path.join(STATA_PATH, variant)
485 | if os.path.exists(exe_path):
486 | return exe_path
487 | else:
488 | # Linux - executable should be inside the path directly
489 | for variant in ["stata-mp", "stata-se", "stata"]:
490 | exe_path = os.path.join(STATA_PATH, variant)
491 | if os.path.exists(exe_path):
492 | return exe_path
493 |
494 | # If we get here, we couldn't find the executable
495 | logging.error(f"Could not find Stata executable in {STATA_PATH}")
496 | return STATA_PATH # Return the base path as fallback
497 |
498 | def check_stata_installed():
499 | """Check if Stata is installed and available"""
500 | global stata_available
501 |
502 | # First check if we have working Python integration
503 | if stata_available and 'stata' in globals():
504 | return True
505 |
506 | # Otherwise check for executable
507 | stata_path = get_stata_path()
508 | if not stata_path:
509 | return False
510 |
511 | # Check if the file exists and is executable
512 | if not os.path.exists(stata_path):
513 | return False
514 |
515 | # On non-Windows, check if it's executable
516 | if platform.system() != "Windows" and not os.access(stata_path, os.X_OK):
517 | return False
518 |
519 | return True
520 |
521 | # Function to run a Stata command
522 | def run_stata_command(command: str, clear_history=False, auto_detect_graphs=False):
523 | """Run a Stata command
524 |
525 | Args:
526 | command: The Stata command to run
527 | clear_history: Whether to clear command history
528 | auto_detect_graphs: Whether to detect and export graphs after execution (default: False for MCP/LLM calls)
529 |
530 | Note: This function manually enables _gr_list on before execution and detects graphs after.
531 | We do NOT use inline=True because it calls _gr_list off at the end, clearing our graph list!
532 | This function is only called from /v1/tools endpoint which is excluded from MCP.
533 | """
534 | global stata_available, has_stata, command_history
535 |
536 | # Only log at debug level instead of info to reduce verbosity
537 | logging.debug(f"Running Stata command: {command}")
538 |
539 | # Clear history if requested
540 | if clear_history:
541 | logging.info(f"Clearing command history (had {len(command_history)} items)")
542 | command_history = []
543 | # If it's just a clear request with no command, return empty
544 | if not command or command.strip() == '':
545 | logging.info("Clear history request completed")
546 | return ''
547 |
548 | # For multi-line commands, don't add semicolons - just clean up whitespace
549 | if "\n" in command:
550 | # Clean up the commands to ensure proper formatting without adding semicolons
551 | command = "\n".join(line.strip() for line in command.splitlines() if line.strip())
552 | logging.debug(f"Processed multiline command: {command}")
553 |
554 | # Special handling for 'do' commands with file paths
555 | if command.lower().startswith('do '):
556 | # Extract the file path part
557 | parts = command.split(' ', 1)
558 | if len(parts) > 1:
559 | file_path = parts[1].strip()
560 |
561 | # Remove any existing quotes
562 | if (file_path.startswith('"') and file_path.endswith('"')) or \
563 | (file_path.startswith("'") and file_path.endswith("'")):
564 | file_path = file_path[1:-1]
565 |
566 | # Normalize path for OS
567 | file_path = os.path.normpath(file_path)
568 |
569 | # On Windows, make sure backslashes are used
570 | if platform.system() == "Windows" and '/' in file_path:
571 | file_path = file_path.replace('/', '\\')
572 | logging.debug(f"Converted path for Windows: {file_path}")
573 |
574 | # For Stata's do command, ALWAYS use double quotes regardless of platform
575 | # This is the most reliable approach to handle spaces and special characters
576 | file_path = f'"{file_path}"'
577 |
578 | # Reconstruct the command with the properly formatted path
579 | command = f"do {file_path}"
580 | logging.debug(f"Reformatted 'do' command: {command}")
581 |
582 | # Check if pystata is available
583 | if has_stata and stata_available:
584 | # Run the command via pystata
585 | try:
586 | # Enable graph listing for this command using low-level API
587 | try:
588 | from pystata.config import stlib, get_encode_str
589 | logging.debug("Enabling graph listing with _gr_list on...")
590 | stlib.StataSO_Execute(get_encode_str("qui _gr_list on"), False)
591 | logging.debug("Successfully enabled graph listing")
592 | except Exception as e:
593 | logging.warning(f"Could not enable graph listing: {str(e)}")
594 | logging.debug(f"Graph listing enable error: {traceback.format_exc()}")
595 |
596 | # Initialize graphs list (will be populated if graphs are found)
597 | graphs_from_interactive = []
598 |
599 | # Create a temp file to capture output
600 | with tempfile.NamedTemporaryFile(
601 |
602 | suffix='.do', delete=False, mode='w', encoding='utf-8'
603 |
604 | ) as f:
605 | # Write the command to the file
606 | f.write(f"capture log close _all\n")
607 | f.write(f"log using \"{f.name}.log\", replace text\n")
608 |
609 | # Process command line by line to comment out cls commands
610 | cls_commands_found = 0
611 | processed_command = ""
612 | for line in command.splitlines():
613 | # Ensure line is a string (defensive programming)
614 | line = str(line) if line is not None else ""
615 |
616 | # Check if this is a cls command
617 | if re.match(r'^\s*cls\s*$', line, re.IGNORECASE):
618 | processed_command += f"* COMMENTED OUT BY MCP: {line}\n"
619 | cls_commands_found += 1
620 | else:
621 | processed_command += f"{line}\n"
622 |
623 | if cls_commands_found > 0:
624 | logging.info(f"Found and commented out {cls_commands_found} cls commands in the selection")
625 |
626 | # Special handling for 'do' commands to ensure proper quoting
627 | if command.lower().startswith('do '):
628 | # For do commands, we need to make sure the file path is properly handled
629 | # The command already has the file in quotes from the code above
630 | f.write(f"{processed_command}")
631 | else:
632 | # Normal commands don't need special treatment
633 | f.write(f"{processed_command}")
634 |
635 | f.write(f"capture log close\n")
636 | do_file = f.name
637 |
638 | # Execute the do file with echo=False to completely silence Stata output to console
639 | try:
640 | # Redirect stdout temporarily to silence Stata output
641 | original_stdout = sys.stdout
642 | sys.stdout = open(os.devnull, 'w')
643 |
644 | try:
645 | # Always use double quotes for the do file path for PyStata
646 | run_cmd = f"do \"{do_file}\""
647 | # Use inline=False because inline=True calls _gr_list off at the end!
648 | globals()['stata'].run(run_cmd, echo=False, inline=False)
649 | logging.debug(f"Command executed successfully via pystata: {run_cmd}")
650 | except Exception as e:
651 | # If command fails, try to reinitialize Stata once
652 | logging.warning(f"Stata command failed, attempting to reinitialize: {str(e)}")
653 |
654 | # Try to reinitialize Stata with the global path
655 | if STATA_PATH:
656 | if try_init_stata(STATA_PATH):
657 | # Retry the command if reinitialization succeeded
658 | try:
659 | globals()['stata'].run(f"do \"{do_file}\"", echo=False, inline=False)
660 | logging.info(f"Command succeeded after Stata reinitialization")
661 | except Exception as retry_error:
662 | logging.error(f"Command still failed after reinitializing Stata: {str(retry_error)}")
663 | raise retry_error
664 | else:
665 | logging.error(f"Failed to reinitialize Stata")
666 | raise e
667 | else:
668 | logging.error(f"No Stata path available for reinitialization")
669 | raise e
670 | finally:
671 | # Restore stdout
672 | sys.stdout.close()
673 | sys.stdout = original_stdout
674 |
675 | # Only detect and export graphs if enabled (not from LLM/MCP)
676 | if auto_detect_graphs:
677 | # Immediately check for graphs while they're still in memory
678 | # This happens right after stata.run() completes, before any cleanup
679 | try:
680 | logging.debug("Checking for graphs immediately after execution (interactive mode)...")
681 | graphs_from_interactive = display_graphs_interactive(graph_format='png', width=800, height=600)
682 | if graphs_from_interactive:
683 | logging.info(f"Captured {len(graphs_from_interactive)} graphs in interactive mode")
684 | except Exception as graph_err:
685 | logging.warning(f"Could not capture graphs in interactive mode: {str(graph_err)}")
686 |
687 | except Exception as exec_error:
688 | error_msg = f"Error running command: {str(exec_error)}"
689 | logging.error(error_msg)
690 | return error_msg
691 |
692 | # Read the log file
693 | log_file = f"{do_file}.log"
694 | logging.debug(f"Reading log file: {log_file}")
695 |
696 | # Wait for the log file to be written
697 | max_attempts = 10
698 | attempts = 0
699 | while not os.path.exists(log_file) and attempts < max_attempts:
700 | time.sleep(0.3)
701 | attempts += 1
702 |
703 | if not os.path.exists(log_file):
704 | logging.error(f"Log file not created: {log_file}")
705 | return "Command executed but no output was captured"
706 |
707 | # Wait a moment for file writing to complete
708 | time.sleep(0.5)
709 |
710 | try:
711 | with open(log_file, 'r', encoding='utf-8', errors='replace') as f:
712 | log_content = f.read()
713 |
714 | # MUCH SIMPLER APPROACH: Just filter beginning and end of log file
715 | lines = log_content.strip().split('\n')
716 |
717 | # Find the first actual command (first line that starts with a dot that's not log related)
718 | start_index = 0
719 | for i, line in enumerate(lines):
720 | if line.strip().startswith('.') and 'log ' not in line and 'capture log close' not in line:
721 | # Found the first actual command, so output starts right after this
722 | start_index = i + 1
723 | break
724 |
725 | # Find end of output (the "capture log close" or "end of do-file" at the end)
726 | end_index = len(lines)
727 | for i in range(len(lines)-1, 0, -1):
728 | if 'capture log close' in lines[i] or 'end of do-file' in lines[i]:
729 | end_index = i
730 | break
731 |
732 | # Extract just the middle part (the actual output)
733 | result_lines = []
734 | for i in range(start_index, end_index):
735 | line = lines[i].rstrip() # Remove trailing whitespace
736 |
737 | # Skip empty lines at beginning or end
738 | if not line.strip():
739 | continue
740 |
741 | # Keep command lines (don't filter out lines starting with '.')
742 |
743 | # Remove consecutive blank lines (keep just one)
744 | if (not line.strip() and result_lines and not result_lines[-1].strip()):
745 | continue
746 |
747 | result_lines.append(line)
748 |
749 | # Clean up temporary files
750 | try:
751 | os.unlink(do_file)
752 | os.unlink(log_file)
753 | except Exception as e:
754 | logging.warning(f"Could not delete temporary files: {str(e)}")
755 |
756 | # Add timestamp to the result
757 | timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
758 | command_entry = f"[{timestamp}] {command}"
759 |
760 | # Return properly formatted output
761 | if not result_lines:
762 | result = "Command executed successfully (no output)"
763 | else:
764 | result = "\n".join(result_lines)
765 |
766 | # Use graphs captured in interactive mode (if any)
767 | # These were already captured right after execution while still in memory
768 | if graphs_from_interactive:
769 | graph_info = "\n\n" + "="*60 + "\n"
770 | graph_info += f"GRAPHS DETECTED: {len(graphs_from_interactive)} graph(s) created\n"
771 | graph_info += "="*60 + "\n"
772 | for graph in graphs_from_interactive:
773 | # Include command if available, using special format for JavaScript parsing
774 | if 'command' in graph and graph['command']:
775 | graph_info += f" • {graph['name']}: {graph['path']} [CMD: {graph['command']}]\n"
776 | else:
777 | graph_info += f" • {graph['name']}: {graph['path']}\n"
778 | result += graph_info
779 | logging.info(f"Added {len(graphs_from_interactive)} graphs to output (from interactive mode)")
780 | else:
781 | logging.debug("No graphs were captured in interactive mode")
782 |
783 | # Disable graph listing after detection
784 | try:
785 | from pystata.config import stlib, get_encode_str
786 | stlib.StataSO_Execute(get_encode_str("qui _gr_list off"), False)
787 | logging.debug("Disabled graph listing")
788 | except Exception as e:
789 | logging.warning(f"Could not disable graph listing: {str(e)}")
790 |
791 | # For interactive window, just return the current result
792 | # The client will handle displaying history
793 | return result
794 |
795 | except Exception as e:
796 | error_msg = f"Error reading log file: {str(e)}"
797 | logging.error(error_msg)
798 | return error_msg
799 |
800 | except Exception as e:
801 | error_msg = f"Error executing Stata command: {str(e)}"
802 | logging.error(error_msg)
803 | # Add to command history
804 | timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
805 | command_entry = f"[{timestamp}] {command}"
806 | command_history.append({"command": command_entry, "result": error_msg})
807 | return error_msg
808 |
809 | else:
810 | error_msg = "Stata is not available. Please check if Stata is installed and configured correctly."
811 | logging.error(error_msg)
812 | # Add to command history
813 | timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
814 | command_entry = f"[{timestamp}] {command}"
815 | command_history.append({"command": command_entry, "result": error_msg})
816 | return error_msg
817 |
818 | def detect_and_export_graphs():
819 | """Detect and export any graphs created by Stata commands
820 |
821 | Returns:
822 | List of dictionaries with graph info: [{"name": "graph1", "path": "/path/to/graph.png"}, ...]
823 | """
824 | global stata_available, has_stata, extension_path
825 |
826 | if not (has_stata and stata_available):
827 | return []
828 |
829 | try:
830 | import sfi
831 | from pystata.config import stlib, get_encode_str
832 |
833 | # Get list of graphs using low-level API like PyStata does
834 | logging.debug("Checking for graphs using _gr_list (low-level API)...")
835 |
836 | # Get the list (_gr_list should already be on from before command execution)
837 | rc = stlib.StataSO_Execute(get_encode_str("qui _gr_list list"), False)
838 | logging.debug(f"_gr_list list returned rc={rc}")
839 | gnamelist = sfi.Macro.getGlobal("r(_grlist)")
840 | logging.debug(f"r(_grlist) returned: '{gnamelist}' (type: {type(gnamelist)}, length: {len(gnamelist) if gnamelist else 0})")
841 |
842 | if not gnamelist:
843 | logging.debug("No graphs found (gnamelist is empty)")
844 | return []
845 |
846 | graphs_info = []
847 | graph_names = gnamelist.split()
848 | logging.info(f"Found {len(graph_names)} graph(s): {graph_names}")
849 |
850 | # Create graphs directory in extension path or temp
851 | if extension_path:
852 | graphs_dir = os.path.join(extension_path, 'graphs')
853 | else:
854 | graphs_dir = os.path.join(tempfile.gettempdir(), 'stata_mcp_graphs')
855 |
856 | os.makedirs(graphs_dir, exist_ok=True)
857 | logging.debug(f"Exporting graphs to: {graphs_dir}")
858 |
859 | # Export each graph to PNG
860 | for i, gname in enumerate(graph_names):
861 | try:
862 | # Display the graph first using low-level API
863 | # Stata graph names should not be quoted in graph display command
864 | gph_disp = f'qui graph display {gname}'
865 | rc = stlib.StataSO_Execute(get_encode_str(gph_disp), False)
866 | if rc != 0:
867 | logging.warning(f"Failed to display graph '{gname}' (rc={rc})")
868 | continue
869 |
870 | # Export as PNG (best for VS Code display)
871 | # Use a sanitized filename but keep the original name for the name() option
872 | graph_file = os.path.join(graphs_dir, f'{gname}.png')
873 | # The name() option does NOT need quotes - it's a Stata name, not a string
874 | gph_exp = f'qui graph export "{graph_file}", name({gname}) replace width(800) height(600)'
875 |
876 | logging.debug(f"Executing graph export command: {gph_exp}")
877 | rc = stlib.StataSO_Execute(get_encode_str(gph_exp), False)
878 | if rc != 0:
879 | logging.warning(f"Failed to export graph '{gname}' (rc={rc})")
880 | continue
881 |
882 | if os.path.exists(graph_file):
883 | graphs_info.append({
884 | "name": gname,
885 | "path": graph_file
886 | })
887 | logging.info(f"Exported graph '{gname}' to {graph_file}")
888 | else:
889 | logging.warning(f"Failed to export graph '{gname}' - file not created")
890 |
891 | except Exception as e:
892 | logging.error(f"Error exporting graph '{gname}': {str(e)}")
893 | continue
894 |
895 | return graphs_info
896 |
897 | except Exception as e:
898 | logging.error(f"Error detecting graphs: {str(e)}")
899 | return []
900 |
901 | def display_graphs_interactive(graph_format='png', width=800, height=600):
902 | """Display graphs using PyStata's interactive approach (similar to Jupyter)
903 |
904 | This function mimics PyStata's grdisplay.py approach for exporting graphs.
905 | It should be called immediately after command execution while graphs are still in memory.
906 |
907 | Args:
908 | graph_format: Format for exported graphs ('svg', 'png', or 'pdf')
909 | width: Width for graph export (pixels for png, inches for svg/pdf)
910 | height: Height for graph export (pixels for png, inches for svg/pdf)
911 |
912 | Returns:
913 | List of dictionaries with graph info: [{"name": "graph1", "path": "/path/to/graph.png", "format": "png", "command": "scatter y x"}, ...]
914 | """
915 | global stata_available, has_stata, extension_path
916 |
917 | if not (has_stata and stata_available):
918 | return []
919 |
920 | try:
921 | import sfi
922 | from pystata.config import stlib, get_encode_str
923 |
924 | # Use the same approach as PyStata's grdisplay.py
925 | logging.debug(f"Interactive graph display: checking for graphs (format: {graph_format})...")
926 |
927 | # Get the list of graphs (_gr_list should already be on from before file execution)
928 | rc = stlib.StataSO_Execute(get_encode_str("qui _gr_list list"), False)
929 | logging.debug(f"_gr_list list returned rc={rc}")
930 | gnamelist = sfi.Macro.getGlobal("r(_grlist)")
931 | logging.debug(f"r(_grlist) returned: '{gnamelist}' (type: {type(gnamelist)}, length: {len(gnamelist) if gnamelist else 0})")
932 |
933 | if not gnamelist:
934 | logging.debug("No graphs found in interactive mode")
935 | return []
936 |
937 | graphs_info = []
938 | graph_names = gnamelist.split()
939 | logging.info(f"Found {len(graph_names)} graph(s) in interactive mode: {graph_names}")
940 |
941 | # Create graphs directory
942 | if extension_path:
943 | graphs_dir = os.path.join(extension_path, 'graphs')
944 | else:
945 | graphs_dir = os.path.join(tempfile.gettempdir(), 'stata_mcp_graphs')
946 |
947 | os.makedirs(graphs_dir, exist_ok=True)
948 | logging.debug(f"Exporting graphs to: {graphs_dir}")
949 |
950 | # Export each graph using PyStata's approach
951 | for i, gname in enumerate(graph_names):
952 | try:
953 | # Display the graph first (required before export)
954 | # Stata graph names should not be quoted in graph display command
955 | gph_disp = f'qui graph display {gname}'
956 | logging.debug(f"Displaying graph: {gph_disp}")
957 | rc = stlib.StataSO_Execute(get_encode_str(gph_disp), False)
958 | if rc != 0:
959 | logging.warning(f"Failed to display graph '{gname}' (rc={rc})")
960 | continue
961 |
962 | # Determine file extension and export command based on format
963 | if graph_format == 'svg':
964 | graph_file = os.path.join(graphs_dir, f'{gname}.svg')
965 | if width and height:
966 | gph_exp = f'qui graph export "{graph_file}", name({gname}) replace width({width}) height({height})'
967 | else:
968 | gph_exp = f'qui graph export "{graph_file}", name({gname}) replace'
969 | elif graph_format == 'pdf':
970 | graph_file = os.path.join(graphs_dir, f'{gname}.pdf')
971 | # For PDF, use xsize/ysize instead of width/height
972 | if width and height:
973 | gph_exp = f'qui graph export "{graph_file}", name({gname}) replace xsize({width/96:.2f}) ysize({height/96:.2f})'
974 | else:
975 | gph_exp = f'qui graph export "{graph_file}", name({gname}) replace'
976 | else: # png (default)
977 | graph_file = os.path.join(graphs_dir, f'{gname}.png')
978 | if width and height:
979 | gph_exp = f'qui graph export "{graph_file}", name({gname}) replace width({width}) height({height})'
980 | else:
981 | gph_exp = f'qui graph export "{graph_file}", name({gname}) replace width(800) height(600)'
982 |
983 | # Export the graph
984 | logging.debug(f"Exporting graph: {gph_exp}")
985 | rc = stlib.StataSO_Execute(get_encode_str(gph_exp), False)
986 | if rc != 0:
987 | logging.warning(f"Failed to export graph '{gname}' (rc={rc})")
988 | continue
989 |
990 | if os.path.exists(graph_file):
991 | graph_dict = {
992 | "name": gname,
993 | "path": graph_file,
994 | "format": graph_format
995 | }
996 | graphs_info.append(graph_dict)
997 | logging.info(f"Exported graph '{gname}' to {graph_file} (format: {graph_format})")
998 | else:
999 | logging.warning(f"Graph file not found after export: {graph_file}")
1000 |
1001 | except Exception as e:
1002 | logging.error(f"Error exporting graph '{gname}': {str(e)}")
1003 | continue
1004 |
1005 | return graphs_info
1006 |
1007 | except Exception as e:
1008 | logging.error(f"Error in interactive graph display: {str(e)}")
1009 | logging.debug(f"Interactive display error details: {traceback.format_exc()}")
1010 | return []
1011 |
1012 | def run_stata_selection(selection, working_dir=None, auto_detect_graphs=False):
1013 | """Run selected Stata code
1014 |
1015 | Args:
1016 | selection: The Stata code to run
1017 | working_dir: Optional working directory to change to before execution
1018 | auto_detect_graphs: Whether to detect and export graphs (default: False for MCP/LLM calls)
1019 | """
1020 | # If a working directory is provided, prepend a cd command
1021 | if working_dir and os.path.isdir(working_dir):
1022 | logging.info(f"Changing working directory to: {working_dir}")
1023 | # Normalize path for the OS
1024 | working_dir = os.path.normpath(working_dir)
1025 | # On Windows, ensure backslashes
1026 | if platform.system() == "Windows":
1027 | working_dir = working_dir.replace('/', '\\')
1028 | # Use double quotes for the cd command to handle spaces
1029 | cd_command = f'cd "{working_dir}"'
1030 | # Combine cd command with the selection
1031 | full_command = f"{cd_command}\n{selection}"
1032 | return run_stata_command(full_command, auto_detect_graphs=auto_detect_graphs)
1033 | else:
1034 | return run_stata_command(selection, auto_detect_graphs=auto_detect_graphs)
1035 |
1036 | def run_stata_file(file_path: str, timeout=600, auto_name_graphs=False):
1037 | """Run a Stata .do file with improved handling for long-running processes
1038 |
1039 | Args:
1040 | file_path: The path to the .do file to run
1041 | timeout: Timeout in seconds (default: 600 seconds / 10 minutes)
1042 | auto_name_graphs: Whether to automatically add names to graphs (default: False for MCP/LLM calls)
1043 | """
1044 | # Set timeout from parameter instead of hardcoding
1045 | MAX_TIMEOUT = timeout
1046 |
1047 | try:
1048 | original_path = file_path
1049 |
1050 | resolved_path, tried_paths = resolve_do_file_path(file_path)
1051 | if not resolved_path:
1052 | tried_display = ', '.join(tried_paths) if tried_paths else os.path.normpath(file_path)
1053 | error_msg = f"Error: File not found: {original_path}. Tried these paths: {tried_display}"
1054 | logging.error(error_msg)
1055 |
1056 | # Add more helpful error message for Windows
1057 | if platform.system() == "Windows":
1058 | error_msg += "\n\nCommon Windows path issues:\n"
1059 | error_msg += "1. Make sure the file path uses correct separators (use \\ instead of /)\n"
1060 | error_msg += "2. Check if the file exists in the specified location\n"
1061 | error_msg += "3. If using relative paths, the current working directory is: " + os.getcwd()
1062 |
1063 | return error_msg
1064 |
1065 | file_path = resolved_path
1066 |
1067 | # Verify file exists (final check)
1068 | if not os.path.exists(file_path):
1069 | error_msg = f"Error: File not found: {file_path}"
1070 | logging.error(error_msg)
1071 |
1072 | # Add more helpful error message for Windows
1073 | if platform.system() == "Windows":
1074 | error_msg += "\n\nCommon Windows path issues:\n"
1075 | error_msg += "1. Make sure the file path uses correct separators (use \\ instead of /)\n"
1076 | error_msg += "2. Check if the file exists in the specified location\n"
1077 | error_msg += "3. If using relative paths, the current working directory is: " + os.getcwd()
1078 |
1079 | return error_msg
1080 |
1081 | # Check file extension
1082 | if not file_path.lower().endswith('.do'):
1083 | error_msg = f"Error: File must be a Stata .do file with .do extension: {file_path}"
1084 | logging.error(error_msg)
1085 | return error_msg
1086 |
1087 | logging.info(f"Running Stata do file: {file_path}")
1088 |
1089 | # Ensure file_path is absolute for consistent behavior
1090 | file_path = os.path.abspath(file_path)
1091 |
1092 | # Get the directory and filename for later use
1093 | do_file_dir = os.path.dirname(file_path) # This is now guaranteed to be absolute
1094 | do_file_name = os.path.basename(file_path)
1095 | do_file_base = os.path.splitext(do_file_name)[0]
1096 |
1097 | # Create a custom log file path based on user settings
1098 | # The log file path will be absolute, allowing it to be saved anywhere
1099 | # regardless of Stata's current working directory
1100 | custom_log_file = get_log_file_path(file_path, do_file_base)
1101 | logging.info(f"Will save log to: {custom_log_file}")
1102 |
1103 | # Read the do file content
1104 | do_file_content = ""
1105 | try:
1106 | with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
1107 | do_file_content = f.read()
1108 |
1109 | # Create a modified version with log commands commented out and auto-name graphs
1110 | modified_content = ""
1111 | log_commands_found = 0
1112 | graph_counter = 0
1113 |
1114 | # Process line by line to comment out log commands and add graph names where needed
1115 | cls_commands_found = 0
1116 | for line in do_file_content.splitlines():
1117 | # Ensure line is a string (defensive programming)
1118 | line = str(line) if line is not None else ""
1119 |
1120 | # Check if this line has a log command
1121 | if re.match(r'^\s*(log\s+using|log\s+close|capture\s+log\s+close)', line, re.IGNORECASE):
1122 | modified_content += f"* COMMENTED OUT BY MCP: {line}\n"
1123 | log_commands_found += 1
1124 | continue
1125 |
1126 | # Check if this is a cls command
1127 | if re.match(r'^\s*cls\s*$', line, re.IGNORECASE):
1128 | modified_content += f"* COMMENTED OUT BY MCP: {line}\n"
1129 | cls_commands_found += 1
1130 | continue
1131 |
1132 | # Only auto-name graphs if called from VS Code extension (not from LLM/MCP)
1133 | if auto_name_graphs:
1134 | # Check if this is a graph creation command that might need a name
1135 | # Match: scatter, histogram, twoway, kdensity, graph bar/box/dot/etc (but not graph export)
1136 | graph_match = re.match(r'^(\s*)(scatter|histogram|twoway|kdensity|graph\s+(bar|box|dot|pie|matrix|hbar|hbox|combine))\s+(.*)$', line, re.IGNORECASE)
1137 |
1138 | if graph_match:
1139 | indent = str(graph_match.group(1) or "")
1140 | graph_cmd = str(graph_match.group(2) or "")
1141 |
1142 | # Extract and ensure rest is a string
1143 | rest_raw = graph_match.group(4) if graph_match.lastindex >= 4 else ""
1144 | if rest_raw is None:
1145 | rest_raw = ""
1146 | # Force conversion to string to handle any edge cases
1147 | rest = str(rest_raw)
1148 |
1149 | # Double-check rest is a string before any operations
1150 | if not isinstance(rest, str):
1151 | logging.warning(f"rest is not a string, type: {type(rest)}, value: {rest}, converting to string")
1152 | rest = str(rest)
1153 |
1154 | # Check if it already has name() option
1155 | if not re.search(r'\bname\s*\(', rest, re.IGNORECASE):
1156 | # Add automatic unique name
1157 | graph_counter += 1
1158 | graph_name = f"graph{graph_counter}"
1159 |
1160 | # Add name option - if there's a comma, add after it; otherwise add with comma
1161 | if ',' in rest:
1162 | # Insert name option right after the first comma
1163 | # Ensure rest is definitely a string before re.sub
1164 | rest = str(rest)
1165 | rest = re.sub(r',', f', name({graph_name}, replace)', rest, 1)
1166 | else:
1167 | # No comma yet, add it
1168 | rest = rest.rstrip() + f', name({graph_name}, replace)'
1169 |
1170 | modified_content += f"{indent}{graph_cmd} {rest}\n"
1171 | logging.debug(f"Auto-named graph: {graph_name}")
1172 | continue
1173 |
1174 | # Keep line as-is (including graph export commands)
1175 | modified_content += f"{line}\n"
1176 |
1177 | logging.info(f"Found and commented out {log_commands_found} log commands in the do file")
1178 | if cls_commands_found > 0:
1179 | logging.info(f"Found and commented out {cls_commands_found} cls commands in the do file")
1180 | if graph_counter > 0:
1181 | logging.info(f"Auto-named {graph_counter} graph commands")
1182 |
1183 | # Save the modified content to a temporary file
1184 | with tempfile.NamedTemporaryFile(
1185 |
1186 | suffix='.do', delete=False, mode='w', encoding='utf-8'
1187 |
1188 | ) as temp_do:
1189 | # First close any existing log files
1190 | temp_do.write(f"capture log close _all\n")
1191 | # Clean up Stata session state to prevent pollution from interrupted executions
1192 | # Drop all temporary programs (especially loop programs like 1while, 2while, etc.)
1193 | temp_do.write(f"capture program drop _all\n")
1194 | # Clear all macros to prevent conflicts
1195 | temp_do.write(f"capture macro drop _all\n")
1196 | # Change working directory to the .do file's directory
1197 | # This ensures the .do file executes in its workspace (relative paths work correctly)
1198 | # The log file uses an absolute path, so it's saved to the configured location
1199 | temp_do.write(f"cd \"{do_file_dir}\"\n")
1200 | # Note: _gr_list on is enabled externally before .do file execution
1201 | # Note: Graph names are auto-injected above into modified_content
1202 | # Then add our own log command with absolute path
1203 | temp_do.write(f"log using \"{custom_log_file}\", replace text\n")
1204 | temp_do.write(modified_content)
1205 | temp_do.write(f"\ncapture log close _all\n") # Ensure all logs are closed at the end
1206 | # Note: We intentionally do NOT disable _gr_list so graphs persist for detection
1207 | modified_do_file = temp_do.name
1208 |
1209 | logging.info(f"Created modified do file at {modified_do_file}")
1210 |
1211 | except Exception as e:
1212 | import traceback
1213 | error_msg = f"Error processing do file: {str(e)}"
1214 | logging.error(error_msg)
1215 | logging.error(f"Traceback: {traceback.format_exc()}")
1216 | # Include line number and more details
1217 | tb = traceback.extract_tb(e.__traceback__)
1218 | if tb:
1219 | last_frame = tb[-1]
1220 | error_msg += f"\n at line {last_frame.lineno} in {last_frame.name}"
1221 | return error_msg
1222 |
1223 | # Prepare command entry for history
1224 | timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
1225 | command_entry = f"[{timestamp}] do '{file_path}'"
1226 |
1227 | # Create initial result to update the user
1228 | initial_result = f">>> {command_entry}\nExecuting Stata do file with timeout: {MAX_TIMEOUT} seconds ({MAX_TIMEOUT/60:.1f} minutes)...\n"
1229 |
1230 | # Need to define result variable here so it's accessible in all code paths
1231 | result = initial_result
1232 |
1233 | # Create a properly escaped file path for Stata
1234 | if platform.system() == "Windows":
1235 | # On Windows, escape backslashes and quotes
1236 | stata_path = modified_do_file.replace('"', '\\"')
1237 | # Ensure the path is properly quoted for Windows
1238 | do_command = f'do "{stata_path}"'
1239 | else:
1240 | # On Unix systems (macOS/Linux), use double quotes for better compatibility
1241 | # Double quotes work more reliably across systems
1242 | do_command = f'do "{modified_do_file}"'
1243 |
1244 | # Run the command in background with timeout
1245 | try:
1246 | # Execute the Stata command
1247 | logging.info(f"Running modified do file: {do_command}")
1248 |
1249 | # Set up for PyStata execution
1250 | if has_stata and stata_available:
1251 | # Enable graph listing for this do file execution using low-level API
1252 | try:
1253 | from pystata.config import stlib, get_encode_str
1254 | stlib.StataSO_Execute(get_encode_str("qui _gr_list on"), False)
1255 | logging.debug("Enabled graph listing for do file")
1256 | except Exception as e:
1257 | logging.warning(f"Could not enable graph listing: {str(e)}")
1258 |
1259 | # Record start time for timeout tracking
1260 | start_time = time.time()
1261 | last_update_time = start_time
1262 | update_interval = 60 # Update every 60 seconds (1 minute) initially
1263 |
1264 | # Initialize log tracking
1265 | log_file_exists = False
1266 | last_log_size = 0
1267 | last_reported_lines = 0
1268 |
1269 | # Execute command via PyStata in separate thread to allow polling
1270 | stata_thread = None
1271 | stata_error = None
1272 |
1273 | def run_stata_thread():
1274 | try:
1275 | # Make sure to properly quote the path - this is the key fix
1276 | # Use inline=False because inline=True calls _gr_list off!
1277 | if platform.system() == "Windows":
1278 | # Make sure Windows paths are properly escaped
1279 | globals()['stata'].run(do_command, echo=False, inline=False)
1280 | else:
1281 | # On macOS/Linux, double-check the quoting - adding extra safety
1282 | if not (do_command.startswith('do "') or do_command.startswith("do '")):
1283 | do_command_fixed = f'do "{stata_path}"'
1284 | globals()['stata'].run(do_command_fixed, echo=False, inline=False)
1285 | else:
1286 | globals()['stata'].run(do_command, echo=False, inline=False)
1287 | except Exception as e:
1288 | nonlocal stata_error
1289 | stata_error = str(e)
1290 |
1291 | import threading
1292 | stata_thread = threading.Thread(target=run_stata_thread)
1293 | stata_thread.daemon = True
1294 | stata_thread.start()
1295 |
1296 | # Poll for progress while command is running
1297 | while stata_thread.is_alive():
1298 | # Check for timeout
1299 | current_time = time.time()
1300 | elapsed_time = current_time - start_time
1301 |
1302 | if elapsed_time > MAX_TIMEOUT:
1303 | logging.warning(f"Execution timed out after {MAX_TIMEOUT} seconds")
1304 | result += f"\n*** TIMEOUT: Execution exceeded {MAX_TIMEOUT} seconds ({MAX_TIMEOUT/60:.1f} minutes) ***\n"
1305 |
1306 | # Force terminate Stata operation with increasing severity
1307 | termination_successful = False
1308 |
1309 | try:
1310 | # ATTEMPT 1: Send Stata break command
1311 | logging.warning(f"TIMEOUT - Attempt 1: Sending Stata break command")
1312 | try:
1313 | globals()['stata'].run("break", echo=False)
1314 | time.sleep(0.5) # Give it a moment
1315 | if not stata_thread.is_alive():
1316 | termination_successful = True
1317 | logging.warning("Thread terminated via Stata break command")
1318 | except Exception as e:
1319 | logging.warning(f"Stata break command failed: {str(e)}")
1320 |
1321 | # ATTEMPT 2: Try to forcibly raise an exception in the thread
1322 | if not termination_successful and hasattr(stata_thread, "_stop"):
1323 | logging.warning(f"TIMEOUT - Attempt 2: Forcing thread stop")
1324 | try:
1325 | # This is a more aggressive approach
1326 | # The _stop method is not officially supported but often works
1327 | stata_thread._stop()
1328 | time.sleep(0.5) # Give it a moment
1329 | if not stata_thread.is_alive():
1330 | termination_successful = True
1331 | logging.warning("Thread terminated via thread._stop")
1332 | except Exception as e:
1333 | logging.warning(f"Thread stop failed: {str(e)}")
1334 |
1335 | # ATTEMPT 3: Try to find and kill the Stata process (last resort)
1336 | if not termination_successful:
1337 | logging.warning(f"TIMEOUT - Attempt 3: Looking for Stata process to terminate")
1338 | try:
1339 | # Find any Stata processes
1340 | if platform.system() == "Windows":
1341 | # Windows approach
1342 | subprocess.run(["taskkill", "/F", "/IM", "stata*.exe"],
1343 | stdout=subprocess.DEVNULL,
1344 | stderr=subprocess.DEVNULL)
1345 | else:
1346 | # macOS/Linux approach
1347 | subprocess.run(["pkill", "-f", "stata"],
1348 | stdout=subprocess.DEVNULL,
1349 | stderr=subprocess.DEVNULL)
1350 |
1351 | logging.warning("Sent kill signal to Stata processes")
1352 | except Exception as e:
1353 | logging.error(f"Process kill failed: {str(e)}")
1354 | except Exception as term_error:
1355 | logging.error(f"Error during forced termination: {str(term_error)}")
1356 |
1357 | # Set a flag indicating timeout regardless of termination success
1358 | stata_error = f"Operation timed out after {MAX_TIMEOUT} seconds"
1359 | logging.warning(f"Setting timeout error: {stata_error}")
1360 | break
1361 |
1362 | # Check if it's time for an update
1363 | if current_time - last_update_time >= update_interval:
1364 | # IMPORTANT: Log progress frequently to keep SSE connection alive for long-running scripts
1365 | logging.info(f"⏱️ Execution in progress: {elapsed_time:.0f}s elapsed ({elapsed_time/60:.1f} minutes) of {MAX_TIMEOUT}s timeout")
1366 |
1367 | # Check if log file exists and has been updated
1368 | if os.path.exists(custom_log_file):
1369 | log_file_exists = True
1370 |
1371 | # Check log file size
1372 | current_log_size = os.path.getsize(custom_log_file)
1373 |
1374 | # If log has grown, report progress
1375 | if current_log_size > last_log_size:
1376 | try:
1377 | with open(custom_log_file, 'r', encoding='utf-8', errors='replace') as log:
1378 | log_content = log.read()
1379 | lines = log_content.splitlines()
1380 |
1381 | # Report only new lines since last update
1382 | if last_reported_lines < len(lines):
1383 | new_lines = lines[last_reported_lines:]
1384 |
1385 | # Only report meaningful lines (skip empty lines and headers)
1386 | meaningful_lines = [line for line in new_lines if line.strip() and not line.startswith('-')]
1387 |
1388 | # If we have meaningful content, add it to result
1389 | if meaningful_lines:
1390 | progress_update = f"\n*** Progress update ({elapsed_time:.0f} seconds) ***\n"
1391 | progress_update += "\n".join(meaningful_lines[-10:]) # Show last 10 lines
1392 | result += progress_update
1393 | # Also log the progress for SSE keep-alive
1394 | logging.info(f"📊 Progress: Log file grew to {current_log_size} bytes, {len(meaningful_lines)} new meaningful lines")
1395 |
1396 | last_reported_lines = len(lines)
1397 | except Exception as e:
1398 | logging.warning(f"Error reading log for progress update: {str(e)}")
1399 |
1400 | last_log_size = current_log_size
1401 |
1402 | last_update_time = current_time
1403 |
1404 | # Adaptive polling - keep interval at 60 seconds to maintain SSE connection
1405 | # This ensures we send at least one log message every 60 seconds (1 minute) to keep the connection alive
1406 | if elapsed_time > 600: # After 10 minutes
1407 | update_interval = 60 # Check every 60 seconds (1 minute)
1408 | elif elapsed_time > 300: # After 5 minutes
1409 | update_interval = 60 # Check every 60 seconds (1 minute)
1410 | elif elapsed_time > 60: # After 1 minute
1411 | update_interval = 60 # Check every 60 seconds (1 minute)
1412 |
1413 | # Sleep briefly to avoid consuming too much CPU
1414 | time.sleep(0.5)
1415 |
1416 | # Thread completed or timed out
1417 | if stata_error:
1418 | error_msg = f"Error executing Stata command: {stata_error}"
1419 | logging.error(error_msg)
1420 | result += f"\n*** ERROR: {stata_error} ***\n"
1421 |
1422 | # Add command to history and return
1423 | command_history.append({"command": command_entry, "result": result})
1424 | return result
1425 |
1426 | # Read final log output
1427 | if os.path.exists(custom_log_file):
1428 | try:
1429 | with open(custom_log_file, 'r', encoding='utf-8', errors='replace') as log:
1430 | log_content = log.read()
1431 |
1432 | # Clean up log content - remove headers and Stata startup info
1433 | lines = log_content.splitlines()
1434 | result_lines = []
1435 |
1436 | # Skip Stata header if present (search for the separator line)
1437 | start_index = 0
1438 | for i, line in enumerate(lines):
1439 | if '-------------' in line and i < 20: # Look in first 20 lines
1440 | start_index = i + 1
1441 | break
1442 |
1443 | # Process the content
1444 | for i in range(start_index, len(lines)):
1445 | # Ensure line is a string (defensive programming)
1446 | line = str(lines[i]) if lines[i] is not None else ""
1447 | line = line.rstrip()
1448 |
1449 | # Skip empty lines at beginning or redundant empty lines
1450 | if not line.strip() and (not result_lines or not result_lines[-1].strip()):
1451 | continue
1452 |
1453 | # Clean up SMCL formatting if present
1454 | if '{' in line:
1455 | line = re.sub(r'\{[^}]*\}', '', line) # Remove {...} codes
1456 |
1457 | result_lines.append(line)
1458 |
1459 | # Add completion message with final log content
1460 | completion_msg = f"\n*** Execution completed in {time.time() - start_time:.1f} seconds ***\n"
1461 | completion_msg += "Final output:\n"
1462 | completion_msg += "\n".join(result_lines)
1463 |
1464 | # Replace the result with a clean summary
1465 | result = f">>> {command_entry}\n{completion_msg}"
1466 |
1467 | # Only detect and export graphs if called from VS Code extension (not from LLM/MCP)
1468 | if auto_name_graphs:
1469 | # Detect and export any graphs created by the do file
1470 | # Using interactive mode which should work because inline=True keeps graphs in memory
1471 | try:
1472 | logging.debug("Attempting to detect graphs from do file (interactive mode)...")
1473 | graphs = display_graphs_interactive(graph_format='png', width=800, height=600)
1474 | logging.debug(f"Graph detection returned: {graphs}")
1475 | if graphs:
1476 | graph_info = "\n\n" + "="*60 + "\n"
1477 | graph_info += f"GRAPHS DETECTED: {len(graphs)} graph(s) created\n"
1478 | graph_info += "="*60 + "\n"
1479 | for graph in graphs:
1480 | # Include command if available, using special format for JavaScript parsing
1481 | if 'command' in graph and graph['command']:
1482 | graph_info += f" • {graph['name']}: {graph['path']} [CMD: {graph['command']}]\n"
1483 | else:
1484 | graph_info += f" • {graph['name']}: {graph['path']}\n"
1485 | result += graph_info
1486 | logging.info(f"Detected {len(graphs)} graphs from do file: {[g['name'] for g in graphs]}")
1487 | else:
1488 | logging.debug("No graphs detected from do file")
1489 | except Exception as e:
1490 | logging.warning(f"Error detecting graphs: {str(e)}")
1491 | logging.debug(f"Graph detection error details: {traceback.format_exc()}")
1492 |
1493 | # Log the final file location
1494 | result += f"\n\nLog file saved to: {custom_log_file}"
1495 | except Exception as e:
1496 | logging.error(f"Error reading final log: {str(e)}")
1497 | result += f"\n*** WARNING: Error reading final log: {str(e)} ***\n"
1498 | else:
1499 | logging.warning(f"Log file not found after execution: {custom_log_file}")
1500 | result += f"\n*** WARNING: Log file not found after execution ***\n"
1501 |
1502 | # Try to get a status update from Stata
1503 | try:
1504 | status = run_stata_command("display _rc", clear_history=False)
1505 | result += f"\nStata return code: {status}\n"
1506 | except Exception as e:
1507 | pass
1508 | else:
1509 | # Stata not available
1510 | error_msg = "Stata is not available. Please check if Stata is installed and configured correctly."
1511 | logging.error(error_msg)
1512 | result = f">>> {command_entry}\n{error_msg}"
1513 | except Exception as e:
1514 | error_msg = f"Error running do file: {str(e)}"
1515 | logging.error(error_msg)
1516 | result = f">>> {command_entry}\n{error_msg}"
1517 |
1518 | # Add to command history and return result
1519 | command_history.append({"command": command_entry, "result": result})
1520 | return result
1521 |
1522 | except Exception as e:
1523 | error_msg = f"Error in run_stata_file: {str(e)}"
1524 | logging.error(error_msg)
1525 | return error_msg
1526 |
1527 | # Function to kill any process using the specified port
1528 | def kill_process_on_port(port):
1529 | """Kill any process that is currently using the specified port"""
1530 | try:
1531 | if platform.system() == "Windows":
1532 | # Windows command to find and kill process on port
1533 | find_cmd = f"netstat -ano | findstr :{port}"
1534 | try:
1535 | result = subprocess.check_output(find_cmd, shell=True).decode()
1536 |
1537 | if result:
1538 | # Extract PID from the result
1539 | for line in result.strip().split('\n'):
1540 | if f":{port}" in line and "LISTENING" in line:
1541 | pid = line.strip().split()[-1]
1542 | logging.info(f"Found process with PID {pid} using port {port}")
1543 |
1544 | # Kill the process
1545 | kill_cmd = f"taskkill /F /PID {pid}"
1546 | subprocess.check_output(kill_cmd, shell=True)
1547 | logging.info(f"Killed process with PID {pid}")
1548 | break
1549 | else:
1550 | logging.info(f"No process found using port {port}")
1551 | except subprocess.CalledProcessError:
1552 | # No process found using the port (findstr returns 1 when no matches found)
1553 | logging.info(f"No process found using port {port}")
1554 | else:
1555 | # macOS/Linux command to find and kill process on port
1556 | try:
1557 | # Find the process IDs using the port
1558 | find_cmd = f"lsof -i :{port} -t"
1559 | result = subprocess.check_output(find_cmd, shell=True).decode().strip()
1560 |
1561 | if result:
1562 | # Handle multiple PIDs (one per line)
1563 | pids = result.split('\n')
1564 | for pid in pids:
1565 | pid = pid.strip()
1566 | if pid:
1567 | logging.info(f"Found process with PID {pid} using port {port}")
1568 |
1569 | # Kill the process
1570 | try:
1571 | os.kill(int(pid), signal.SIGKILL) # Use SIGKILL for more forceful termination
1572 | logging.info(f"Killed process with PID {pid}")
1573 | except Exception as kill_error:
1574 | logging.warning(f"Error killing process with PID {pid}: {str(kill_error)}")
1575 |
1576 | # Wait a moment to ensure the port is released
1577 | time.sleep(1)
1578 | else:
1579 | logging.info(f"No process found using port {port}")
1580 | except subprocess.CalledProcessError:
1581 | # No process found using the port
1582 | logging.info(f"No process found using port {port}")
1583 |
1584 | except Exception as e:
1585 | logging.warning(f"Error killing process on port {port}: {str(e)}")
1586 |
1587 | # Double-check if port is still in use
1588 | try:
1589 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
1590 | s.settimeout(1)
1591 | result = s.connect_ex(('localhost', port))
1592 | if result == 0:
1593 | logging.warning(f"Port {port} is still in use after attempting to kill processes")
1594 | logging.warning(f"Please manually kill any processes using port {port} or use a different port")
1595 | else:
1596 | logging.info(f"Port {port} is now available")
1597 | except Exception as socket_error:
1598 | logging.warning(f"Error checking port availability: {str(socket_error)}")
1599 |
1600 | # Function to find an available port
1601 | def find_available_port(start_port, max_attempts=10):
1602 | """Find an available port starting from start_port"""
1603 | for port_offset in range(max_attempts):
1604 | port = start_port + port_offset
1605 | try:
1606 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
1607 | s.settimeout(1)
1608 | result = s.connect_ex(('localhost', port))
1609 | if result != 0: # Port is available
1610 | logging.info(f"Found available port: {port}")
1611 | return port
1612 | except Exception as e:
1613 | logging.warning(f"Error checking port {port}: {str(e)}")
1614 |
1615 | # If we get here, we couldn't find an available port
1616 | logging.warning(f"Could not find an available port after {max_attempts} attempts")
1617 | return None
1618 |
1619 | # Parameter models for the MCP tools
1620 | class RunSelectionParams(BaseModel):
1621 | selection: str = Field(..., description="The Stata code to execute")
1622 |
1623 | class RunFileParams(BaseModel):
1624 | file_path: str = Field(..., description="The full path to the .do file")
1625 | timeout: int = Field(600, description="Timeout in seconds (default: 600 seconds / 10 minutes)")
1626 |
1627 | # Define Legacy VS Code Extension Support
1628 | class ToolRequest(BaseModel):
1629 | tool: str
1630 | parameters: Dict[str, Any]
1631 |
1632 | class ToolResponse(BaseModel):
1633 | status: str
1634 | result: Optional[str] = None
1635 | message: Optional[str] = None
1636 |
1637 | # Define lifespan context manager for startup/shutdown events
1638 | @asynccontextmanager
1639 | async def lifespan(app: FastAPI):
1640 | """Handle application lifespan events"""
1641 | # Startup: Log startup
1642 | logging.info("FastAPI application starting up")
1643 |
1644 | # Start HTTP session manager if it exists
1645 | if hasattr(app.state, '_http_session_manager_starter'):
1646 | logging.debug("Calling HTTP session manager startup handler")
1647 | await app.state._http_session_manager_starter()
1648 |
1649 | yield # Application runs
1650 |
1651 | # Shutdown: Stop HTTP session manager if it exists
1652 | if hasattr(app.state, '_http_session_manager_stopper'):
1653 | logging.debug("Calling HTTP session manager shutdown handler")
1654 | await app.state._http_session_manager_stopper()
1655 |
1656 | # Cleanup if needed
1657 | logging.info("FastAPI application shutting down")
1658 |
1659 | # Create the FastAPI app with lifespan handler
1660 | app = FastAPI(
1661 | title=SERVER_NAME,
1662 | version=SERVER_VERSION,
1663 | description="Stata MCP Server - Exposes Stata functionality to AI models via MCP protocol",
1664 | lifespan=lifespan
1665 | )
1666 |
1667 | # Define regular FastAPI routes for Stata functions
1668 | @app.post("/run_selection", operation_id="stata_run_selection", response_class=Response)
1669 | async def stata_run_selection_endpoint(selection: str) -> Response:
1670 | """Run selected Stata code and return the output"""
1671 | logging.info(f"Running selection: {selection}")
1672 | result = run_stata_selection(selection)
1673 | # Format output for better display - replace escaped newlines with actual newlines
1674 | formatted_result = result.replace("\\n", "\n")
1675 | return Response(content=formatted_result, media_type="text/plain")
1676 |
1677 | async def stata_run_file_stream(file_path: str, timeout: int = 600):
1678 | """Async generator that runs Stata file and yields SSE progress events
1679 |
1680 | Args:
1681 | file_path: Path to the .do file
1682 | timeout: Timeout in seconds
1683 |
1684 | Yields:
1685 | SSE formatted events with progress updates
1686 | """
1687 | import threading
1688 | import queue
1689 |
1690 | # Queue to communicate between threads
1691 | progress_queue = queue.Queue()
1692 | result_queue = queue.Queue()
1693 |
1694 | def run_with_progress():
1695 | """Run Stata file in thread, sending progress to queue"""
1696 | try:
1697 | # Run the file and collect result
1698 | result = run_stata_file(file_path, timeout=timeout)
1699 | result_queue.put(('success', result))
1700 | except Exception as e:
1701 | result_queue.put(('error', str(e)))
1702 |
1703 | # Start execution thread
1704 | thread = threading.Thread(target=run_with_progress, daemon=True)
1705 | thread.start()
1706 |
1707 | # Yield initial event
1708 | yield f"data: Starting execution of {os.path.basename(file_path)}...\n\n"
1709 |
1710 | start_time = time.time()
1711 | last_check = start_time
1712 | check_interval = 2.0 # Check every 2 seconds for responsive streaming
1713 |
1714 | # Monitor progress
1715 | while thread.is_alive():
1716 | current_time = time.time()
1717 | elapsed = current_time - start_time
1718 |
1719 | # Check if it's time for an update
1720 | if current_time - last_check >= check_interval:
1721 | # Yield progress event
1722 | yield f"data: Executing... {elapsed:.1f}s elapsed\n\n"
1723 | last_check = current_time
1724 |
1725 | # Sleep briefly to avoid busy waiting
1726 | await asyncio.sleep(0.1)
1727 |
1728 | # Check if execution exceeded timeout
1729 | if elapsed > timeout:
1730 | yield f"data: ERROR: Execution timed out after {timeout}s\n\n"
1731 | break
1732 |
1733 | # Get final result
1734 | try:
1735 | status, result = result_queue.get(timeout=1.0)
1736 | if status == 'error':
1737 | yield f"data: ERROR: {result}\n\n"
1738 | else:
1739 | # Format and send final output
1740 | formatted_result = result.replace("\\n", "\n")
1741 | # Split into chunks to avoid overwhelming SSE
1742 | lines = formatted_result.split('\n')
1743 | for i in range(0, len(lines), 10):
1744 | chunk = '\n'.join(lines[i:i+10])
1745 | # Escape newlines in SSE data field
1746 | escaped_chunk = chunk.replace('\n', '\\n')
1747 | yield f"data: {escaped_chunk}\n\n"
1748 | await asyncio.sleep(0.05) # Small delay between chunks
1749 |
1750 | yield "data: *** Execution completed ***\n\n"
1751 | except queue.Empty:
1752 | yield "data: ERROR: Failed to get execution result\n\n"
1753 |
1754 | @app.get("/run_file", operation_id="stata_run_file", response_class=Response)
1755 | async def stata_run_file_endpoint(
1756 | file_path: str,
1757 | timeout: int = 600
1758 | ) -> Response:
1759 | """Run a Stata .do file and return the output (MCP-compatible endpoint)
1760 |
1761 | Args:
1762 | file_path: Path to the .do file
1763 | timeout: Timeout in seconds (default: 600 seconds / 10 minutes)
1764 |
1765 | Returns:
1766 | Response with plain text output
1767 | """
1768 | # Ensure timeout is a valid integer
1769 | try:
1770 | timeout = int(timeout)
1771 | if timeout <= 0:
1772 | logging.warning(f"Invalid timeout value: {timeout}, using default 600")
1773 | timeout = 600
1774 | except (ValueError, TypeError):
1775 | logging.warning(f"Non-integer timeout value: {timeout}, using default 600")
1776 | timeout = 600
1777 |
1778 | logging.info(f"Running file: {file_path} with timeout {timeout} seconds ({timeout/60:.1f} minutes)")
1779 | result = await asyncio.to_thread(run_stata_file, file_path, timeout=timeout)
1780 |
1781 | # Format output for better display - replace escaped newlines with actual newlines
1782 | formatted_result = result.replace("\\n", "\n")
1783 |
1784 | # Log the output (truncated) for debugging
1785 | logging.debug(f"Run file output (first 100 chars): {formatted_result[:100]}...")
1786 |
1787 | return Response(content=formatted_result, media_type="text/plain")
1788 |
1789 | @app.get("/run_file/stream")
1790 | async def stata_run_file_stream_endpoint(
1791 | file_path: str,
1792 | timeout: int = 600
1793 | ):
1794 | """Run a Stata .do file and stream the output via Server-Sent Events (SSE)
1795 |
1796 | This is a separate endpoint for HTTP clients that want real-time streaming updates.
1797 | For MCP clients, use the regular /run_file endpoint.
1798 |
1799 | Args:
1800 | file_path: Path to the .do file
1801 | timeout: Timeout in seconds (default: 600 seconds / 10 minutes)
1802 |
1803 | Returns:
1804 | StreamingResponse with text/event-stream content type
1805 | """
1806 | # Ensure timeout is a valid integer
1807 | try:
1808 | timeout = int(timeout)
1809 | if timeout <= 0:
1810 | logging.warning(f"Invalid timeout value: {timeout}, using default 600")
1811 | timeout = 600
1812 | except (ValueError, TypeError):
1813 | logging.warning(f"Non-integer timeout value: {timeout}, using default 600")
1814 | timeout = 600
1815 |
1816 | logging.info(f"[STREAM] Running file: {file_path} with timeout {timeout} seconds ({timeout/60:.1f} minutes)")
1817 |
1818 | return StreamingResponse(
1819 | stata_run_file_stream(file_path, timeout),
1820 | media_type="text/event-stream",
1821 | headers={
1822 | "Cache-Control": "no-cache",
1823 | "Connection": "keep-alive",
1824 | "X-Accel-Buffering": "no", # Disable nginx buffering
1825 | }
1826 | )
1827 |
1828 | # MCP server will be initialized in main() after args are parsed
1829 |
1830 | # Add FastAPI endpoint for legacy VS Code extension
1831 | @app.post("/v1/tools", include_in_schema=False)
1832 | async def call_tool(request: ToolRequest) -> ToolResponse:
1833 | try:
1834 | # Map VS Code extension tool names to MCP tool names
1835 | tool_name_map = {
1836 | "run_selection": "stata_run_selection",
1837 | "run_file": "stata_run_file"
1838 | }
1839 |
1840 | # Get the actual tool name
1841 | mcp_tool_name = tool_name_map.get(request.tool, request.tool)
1842 |
1843 | # Log the request
1844 | logging.info(f"REST API request for tool: {request.tool} -> {mcp_tool_name}")
1845 |
1846 | # Check if the tool exists
1847 | if mcp_tool_name not in ["stata_run_selection", "stata_run_file"]:
1848 | return ToolResponse(
1849 | status="error",
1850 | message=f"Unknown tool: {request.tool}"
1851 | )
1852 |
1853 | # Execute the appropriate function
1854 | if mcp_tool_name == "stata_run_selection":
1855 | if "selection" not in request.parameters:
1856 | return ToolResponse(
1857 | status="error",
1858 | message="Missing required parameter: selection"
1859 | )
1860 | # Get optional working_dir parameter
1861 | working_dir = request.parameters.get("working_dir", None)
1862 | # Enable auto_detect_graphs for VS Code extension calls
1863 | result = run_stata_selection(request.parameters["selection"], working_dir=working_dir, auto_detect_graphs=True)
1864 | # Format output for better display
1865 | result = result.replace("\\n", "\n")
1866 |
1867 | elif mcp_tool_name == "stata_run_file":
1868 | if "file_path" not in request.parameters:
1869 | return ToolResponse(
1870 | status="error",
1871 | message="Missing required parameter: file_path"
1872 | )
1873 |
1874 | # Get the file path from the parameters
1875 | file_path = request.parameters["file_path"]
1876 |
1877 | # Get timeout parameter if provided, otherwise use default (10 minutes)
1878 | timeout = request.parameters.get("timeout", 600)
1879 | try:
1880 | timeout = int(timeout) # Ensure it's an integer
1881 | if timeout <= 0:
1882 | logging.warning(f"Invalid timeout value: {timeout}, using default 600")
1883 | timeout = 600
1884 | except (ValueError, TypeError):
1885 | logging.warning(f"Non-integer timeout value: {timeout}, using default 600")
1886 | timeout = 600
1887 |
1888 | logging.info(f"MCP run_file request for: {file_path} with timeout {timeout} seconds ({timeout/60:.1f} minutes)")
1889 |
1890 | # Normalize the path for cross-platform compatibility
1891 | file_path = os.path.normpath(file_path)
1892 |
1893 | # On Windows, convert forward slashes to backslashes if needed
1894 | if platform.system() == "Windows" and '/' in file_path:
1895 | file_path = file_path.replace('/', '\\')
1896 |
1897 | # Run the file through the run_stata_file function with timeout
1898 | # Enable auto_name_graphs for VS Code extension calls
1899 | result = run_stata_file(file_path, timeout=timeout, auto_name_graphs=True)
1900 |
1901 | # Format output for better display
1902 | result = result.replace("\\n", "\n")
1903 |
1904 | # Log the output length for debugging
1905 | logging.debug(f"MCP run_file output length: {len(result)}")
1906 |
1907 | # If no output was captured, log a warning
1908 | if "Command executed but" in result and "output not captured" in result:
1909 | logging.warning(f"No output captured for file: {file_path}")
1910 |
1911 | # If file not found error, make the message more helpful
1912 | if "File not found" in result:
1913 | # Add help text explaining common issues with Windows paths
1914 | if platform.system() == "Windows":
1915 | result += "\n\nCommon Windows path issues:\n"
1916 | result += "1. Make sure the file path uses correct separators (use \\ instead of /)\n"
1917 | result += "2. Check if the file exists in the specified location\n"
1918 | result += "3. If using relative paths, the current working directory is: " + os.getcwd()
1919 |
1920 | # Return successful response
1921 | return ToolResponse(
1922 | status="success",
1923 | result=result
1924 | )
1925 |
1926 | except Exception as e:
1927 | logging.error(f"Error handling tool request: {str(e)}")
1928 | return ToolResponse(
1929 | status="error",
1930 | message=f"Server error: {str(e)}"
1931 | )
1932 |
1933 | # Simplified health check endpoint - only report server status without executing Stata commands
1934 | @app.get("/health", include_in_schema=False)
1935 | async def health_check():
1936 | return {
1937 | "status": "ok",
1938 | "service": SERVER_NAME,
1939 | "version": SERVER_VERSION,
1940 | "stata_available": stata_available
1941 | }
1942 |
1943 | # Endpoint to serve graph images
1944 | # Hidden from OpenAPI schema so it won't be exposed to LLMs via MCP
1945 | @app.get("/graphs/{graph_name}", include_in_schema=False)
1946 | async def get_graph(graph_name: str):
1947 | """Serve a graph image file"""
1948 | try:
1949 | # Construct the path to the graph file
1950 | if extension_path:
1951 | graphs_dir = os.path.join(extension_path, 'graphs')
1952 | else:
1953 | graphs_dir = os.path.join(tempfile.gettempdir(), 'stata_mcp_graphs')
1954 |
1955 | # Support both with and without .png extension
1956 | if not graph_name.endswith('.png'):
1957 | graph_name = f"{graph_name}.png"
1958 |
1959 | graph_path = os.path.join(graphs_dir, graph_name)
1960 |
1961 | # Check if file exists
1962 | if not os.path.exists(graph_path):
1963 | return Response(
1964 | content=f"Graph not found: {graph_name}",
1965 | status_code=404,
1966 | media_type="text/plain"
1967 | )
1968 |
1969 | # Read and return the image file
1970 | with open(graph_path, 'rb') as f:
1971 | image_data = f.read()
1972 |
1973 | return Response(content=image_data, media_type="image/png")
1974 |
1975 | except Exception as e:
1976 | logging.error(f"Error serving graph {graph_name}: {str(e)}")
1977 | return Response(
1978 | content=f"Error serving graph: {str(e)}",
1979 | status_code=500
1980 | )
1981 |
1982 | @app.post("/clear_history", include_in_schema=False)
1983 | async def clear_history_endpoint():
1984 | """Clear the command history"""
1985 | global command_history
1986 | try:
1987 | count = len(command_history)
1988 | command_history = []
1989 | logging.info(f"Cleared command history ({count} items)")
1990 | return {"status": "success", "message": f"Cleared {count} items from history"}
1991 | except Exception as e:
1992 | logging.error(f"Error clearing history: {str(e)}")
1993 | return {"status": "error", "message": str(e)}
1994 |
1995 | @app.get("/view_data", include_in_schema=False)
1996 | async def view_data_endpoint(if_condition: str = None):
1997 | """Get current Stata data as a pandas DataFrame and return as JSON
1998 |
1999 | Args:
2000 | if_condition: Optional Stata if condition (e.g., "price > 5000 & mpg < 30")
2001 | """
2002 | global stata_available, stata
2003 |
2004 | try:
2005 | if not stata_available or stata is None:
2006 | logging.error("Stata is not available")
2007 | return Response(
2008 | content=json.dumps({
2009 | "status": "error",
2010 | "message": "Stata is not initialized"
2011 | }),
2012 | media_type="application/json",
2013 | status_code=500
2014 | )
2015 |
2016 | # Apply if condition if provided
2017 | if if_condition:
2018 | logging.info(f"Applying filter: if {if_condition}")
2019 | try:
2020 | # Get full data first
2021 | df = stata.pdataframe_from_data()
2022 |
2023 | if df is None or df.empty:
2024 | raise Exception("No data currently loaded in Stata")
2025 |
2026 | # Use Stata to create a filter marker variable
2027 | try:
2028 | import sfi
2029 |
2030 | # First, check if variable already exists and drop it
2031 | try:
2032 | stata.run("capture drop _filter_marker", inline=False, echo=False)
2033 | except:
2034 | pass
2035 |
2036 | # Generate marker for rows that match the condition
2037 | gen_cmd = f"quietly generate byte _filter_marker = ({if_condition})"
2038 | logging.debug(f"Running filter command: {gen_cmd}")
2039 |
2040 | try:
2041 | stata.run(gen_cmd, inline=False, echo=False)
2042 | logging.debug(f"Generate command executed successfully")
2043 | except SystemError as se:
2044 | logging.error(f"SystemError in generate command: {str(se)}")
2045 | raise Exception(f"Invalid condition syntax: {if_condition}")
2046 | except Exception as e:
2047 | logging.error(f"Exception in generate command: {type(e).__name__}: {str(e)}")
2048 | raise Exception(f"Error creating filter: {str(e)}")
2049 |
2050 | # Get the marker variable values using SFI
2051 | n_obs = sfi.Data.getObsTotal()
2052 | logging.debug(f"Total observations: {n_obs}")
2053 |
2054 | # Get the variable index for _filter_marker
2055 | var_index = sfi.Data.getVarIndex('_filter_marker')
2056 | logging.debug(f"Filter marker variable index: {var_index}")
2057 |
2058 | if var_index < 0:
2059 | raise Exception("Failed to create filter marker variable")
2060 |
2061 | # Read the filter values for all observations
2062 | # NOTE: sfi.Data.get() returns nested lists like [[1]] or [[0]]
2063 | # We need to extract the actual value
2064 | filter_mask = []
2065 | for i in range(n_obs):
2066 | val = sfi.Data.get('_filter_marker', i)
2067 | # Extract the actual value from nested list structure
2068 | if isinstance(val, list) and len(val) > 0:
2069 | if isinstance(val[0], list) and len(val[0]) > 0:
2070 | actual_val = val[0][0]
2071 | else:
2072 | actual_val = val[0]
2073 | else:
2074 | actual_val = val
2075 | filter_mask.append(actual_val == 1)
2076 |
2077 | # Debug: Log first few values and count
2078 | true_count = sum(filter_mask)
2079 | if n_obs > 0:
2080 | sample_vals = [sfi.Data.get('_filter_marker', i) for i in range(min(5, n_obs))]
2081 | logging.debug(f"First 5 marker values (raw): {sample_vals}")
2082 | logging.debug(f"Filter mask true count: {true_count} out of {n_obs}")
2083 |
2084 | # Drop the temporary marker
2085 | stata.run("quietly drop _filter_marker", inline=False, echo=False)
2086 |
2087 | # Filter the DataFrame using the mask
2088 | df = df[filter_mask].reset_index(drop=True)
2089 | logging.info(f"Filtered data: {len(df)} rows match condition (out of {n_obs} total)")
2090 |
2091 | except Exception as stata_err:
2092 | # Clean up if there's an error
2093 | try:
2094 | stata.run("capture drop _filter_marker", inline=False, echo=False)
2095 | except:
2096 | pass
2097 | logging.error(f"Filter processing error: {type(stata_err).__name__}: {str(stata_err)}")
2098 | raise Exception(f"{str(stata_err)}")
2099 |
2100 | except Exception as filter_err:
2101 | logging.error(f"Filter error: {str(filter_err)}")
2102 | return Response(
2103 | content=json.dumps({
2104 | "status": "error",
2105 | "message": f"Filter error: {str(filter_err)}"
2106 | }),
2107 | media_type="application/json",
2108 | status_code=400
2109 | )
2110 | else:
2111 | # Get data as pandas DataFrame without filtering
2112 | logging.info("Getting data from Stata using pdataframe_from_data()")
2113 | df = stata.pdataframe_from_data()
2114 |
2115 | # Check if data is empty
2116 | if df is None or df.empty:
2117 | logging.info("No data currently loaded in Stata")
2118 | return Response(
2119 | content=json.dumps({
2120 | "status": "success",
2121 | "message": "No data currently loaded",
2122 | "data": [],
2123 | "columns": [],
2124 | "rows": 0
2125 | }),
2126 | media_type="application/json"
2127 | )
2128 |
2129 | # Get data info
2130 | rows, cols = df.shape
2131 | logging.info(f"Data retrieved: {rows} observations, {cols} variables")
2132 |
2133 | # Convert DataFrame to JSON format
2134 | # Replace NaN with None for proper JSON serialization
2135 | df_clean = df.replace({float('nan'): None})
2136 |
2137 | # Convert to list of lists for better performance
2138 | data_values = df_clean.values.tolist()
2139 | column_names = df_clean.columns.tolist()
2140 |
2141 | # Get data types for each column
2142 | dtypes = {col: str(df[col].dtype) for col in df.columns}
2143 |
2144 | return Response(
2145 | content=json.dumps({
2146 | "status": "success",
2147 | "data": data_values,
2148 | "columns": column_names,
2149 | "dtypes": dtypes,
2150 | "rows": int(rows),
2151 | "index": df.index.tolist()
2152 | }),
2153 | media_type="application/json"
2154 | )
2155 |
2156 | except Exception as e:
2157 | error_msg = f"Error getting data: {str(e)}"
2158 | logging.error(error_msg)
2159 | logging.error(traceback.format_exc())
2160 | return Response(
2161 | content=json.dumps({
2162 | "status": "error",
2163 | "message": error_msg
2164 | }),
2165 | media_type="application/json",
2166 | status_code=500
2167 | )
2168 |
2169 | @app.get("/interactive", include_in_schema=False)
2170 | async def interactive_window(file: str = None, code: str = None):
2171 | """Serve the interactive Stata window as a full webpage"""
2172 | # If a file path or code is provided, we'll auto-execute it on page load
2173 | auto_run_file = file if file else ""
2174 | auto_run_code = code if code else ""
2175 |
2176 | # Use regular string and insert the file path separately to avoid f-string conflicts
2177 | html_content = """
2178 | <!DOCTYPE html>
2179 | <html lang="en">
2180 | <head>
2181 | <meta charset="UTF-8">
2182 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
2183 | <title>Stata Interactive Window</title>
2184 | <style>
2185 | * { margin: 0; padding: 0; box-sizing: border-box; }
2186 | body {
2187 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
2188 | background: #1e1e1e;
2189 | color: #d4d4d4;
2190 | height: 100vh;
2191 | display: flex;
2192 | flex-direction: column;
2193 | }
2194 | .main-container {
2195 | display: flex;
2196 | flex: 1;
2197 | overflow: hidden;
2198 | }
2199 | .left-panel {
2200 | flex: 1;
2201 | display: flex;
2202 | flex-direction: column;
2203 | border-right: 1px solid #3e3e42;
2204 | overflow: hidden;
2205 | }
2206 | .output-section {
2207 | flex: 1;
2208 | overflow-y: auto;
2209 | padding: 20px;
2210 | }
2211 | .output-cell {
2212 | border-left: 3px solid #007acc;
2213 | padding-left: 15px;
2214 | margin-bottom: 20px;
2215 | background: #252526;
2216 | padding: 15px;
2217 | border-radius: 4px;
2218 | }
2219 | .command-line {
2220 | color: #4fc1ff;
2221 | font-weight: bold;
2222 | margin-bottom: 10px;
2223 | font-family: 'Consolas', 'Monaco', monospace;
2224 | }
2225 | .command-output {
2226 | font-family: 'Consolas', 'Monaco', monospace;
2227 | white-space: pre-wrap;
2228 | font-size: 13px;
2229 | line-height: 1.5;
2230 | }
2231 | .input-section {
2232 | border-top: 1px solid #3e3e42;
2233 | padding: 20px;
2234 | background: #252526;
2235 | }
2236 | .input-container {
2237 | display: flex;
2238 | gap: 10px;
2239 | }
2240 | #command-input {
2241 | flex: 1;
2242 | background: #3c3c3c;
2243 | border: 1px solid #6c6c6c;
2244 | color: #d4d4d4;
2245 | padding: 12px 15px;
2246 | font-family: 'Consolas', 'Monaco', monospace;
2247 | font-size: 14px;
2248 | border-radius: 4px;
2249 | }
2250 | #command-input:focus {
2251 | outline: none;
2252 | border-color: #007acc;
2253 | }
2254 | #run-button {
2255 | background: #0e639c;
2256 | color: white;
2257 | border: none;
2258 | padding: 12px 30px;
2259 | font-weight: 600;
2260 | cursor: pointer;
2261 | border-radius: 4px;
2262 | transition: background 0.2s;
2263 | }
2264 | #run-button:hover {
2265 | background: #1177bb;
2266 | }
2267 | #run-button:disabled {
2268 | background: #555;
2269 | cursor: not-allowed;
2270 | }
2271 | .right-panel {
2272 | width: 40%;
2273 | overflow-y: auto;
2274 | padding: 20px;
2275 | background: #1e1e1e;
2276 | }
2277 | .graphs-title {
2278 | font-size: 20px;
2279 | font-weight: 600;
2280 | margin-bottom: 20px;
2281 | color: #ffffff;
2282 | }
2283 | .graph-card {
2284 | background: #252526;
2285 | border: 1px solid #3e3e42;
2286 | border-radius: 8px;
2287 | padding: 20px;
2288 | margin-bottom: 20px;
2289 | }
2290 | .graph-card h3 {
2291 | margin-bottom: 15px;
2292 | color: #ffffff;
2293 | }
2294 | .graph-card img {
2295 | width: 100%;
2296 | height: auto;
2297 | border-radius: 4px;
2298 | }
2299 | .error {
2300 | background: #5a1d1d;
2301 | border-left: 3px solid #f48771;
2302 | padding: 15px;
2303 | border-radius: 4px;
2304 | margin-bottom: 20px;
2305 | }
2306 | .hint {
2307 | color: #858585;
2308 | font-size: 12px;
2309 | margin-top: 8px;
2310 | }
2311 | .no-graphs {
2312 | color: #858585;
2313 | font-style: italic;
2314 | text-align: center;
2315 | padding: 40px;
2316 | }
2317 | </style>
2318 | </head>
2319 | <body>
2320 | <div class="main-container">
2321 | <div class="left-panel">
2322 | <div class="output-section" id="output-container"></div>
2323 |
2324 | <div class="input-section">
2325 | <div class="input-container">
2326 | <input type="text" id="command-input"
2327 | placeholder="Enter Stata command (e.g., summarize, scatter y x, regress y x)..."
2328 | autocomplete="off" />
2329 | <button id="run-button">Run</button>
2330 | </div>
2331 | <div class="hint">Press Enter to execute • Ctrl+L to clear output</div>
2332 | </div>
2333 | </div>
2334 |
2335 | <div class="right-panel">
2336 | <div class="graphs-title">Graphs</div>
2337 | <div id="graphs-container">
2338 | <div class="no-graphs">No graphs yet. Run commands to generate graphs.</div>
2339 | </div>
2340 | </div>
2341 | </div>
2342 |
2343 | <script>
2344 | const commandInput = document.getElementById('command-input');
2345 | const runButton = document.getElementById('run-button');
2346 | const outputContainer = document.getElementById('output-container');
2347 | const graphsContainer = document.getElementById('graphs-container');
2348 |
2349 | runButton.addEventListener('click', executeCommand);
2350 | commandInput.addEventListener('keypress', (e) => {
2351 | if (e.key === 'Enter') executeCommand();
2352 | });
2353 |
2354 | document.addEventListener('keydown', async (e) => {
2355 | if (e.ctrlKey && e.key === 'l') {
2356 | e.preventDefault();
2357 | // Clear text output visually
2358 | outputContainer.innerHTML = '';
2359 | // Clear graphs visually
2360 | graphsContainer.innerHTML = '<div class="no-graphs">No graphs yet. Run commands to generate graphs.</div>';
2361 | // Clear server-side command history so it doesn't come back
2362 | try {
2363 | const response = await fetch('/clear_history', {
2364 | method: 'POST',
2365 | headers: { 'Content-Type': 'application/json' }
2366 | });
2367 | const data = await response.json();
2368 | console.log('History cleared:', data.message);
2369 | } catch (err) {
2370 | console.error('Error clearing history:', err);
2371 | }
2372 | }
2373 | });
2374 |
2375 | async function executeCommand() {
2376 | const command = commandInput.value.trim();
2377 | if (!command) return;
2378 |
2379 | runButton.disabled = true;
2380 | runButton.textContent = 'Running...';
2381 |
2382 | try {
2383 | const response = await fetch('/v1/tools', {
2384 | method: 'POST',
2385 | headers: { 'Content-Type': 'application/json' },
2386 | body: JSON.stringify({
2387 | tool: 'run_selection',
2388 | parameters: { selection: command }
2389 | })
2390 | });
2391 |
2392 | const data = await response.json();
2393 |
2394 | if (data.status === 'success') {
2395 | addOutputCell(command, data.result);
2396 | updateGraphs(data.result);
2397 | } else {
2398 | addError(data.message || 'Command failed');
2399 | }
2400 | } catch (error) {
2401 | addError(error.message);
2402 | }
2403 |
2404 | runButton.disabled = false;
2405 | runButton.textContent = 'Run';
2406 | commandInput.value = '';
2407 | commandInput.focus();
2408 | }
2409 |
2410 | function addOutputCell(command, output) {
2411 | const cell = document.createElement('div');
2412 | cell.className = 'output-cell';
2413 | cell.innerHTML = `
2414 | <div class="command-line">> ${escapeHtml(command)}</div>
2415 | <div class="command-output">${escapeHtml(output)}</div>
2416 | `;
2417 | outputContainer.appendChild(cell);
2418 | outputContainer.scrollTop = outputContainer.scrollHeight;
2419 | }
2420 |
2421 | function addError(message) {
2422 | const error = document.createElement('div');
2423 | error.className = 'error';
2424 | error.textContent = 'Error: ' + message;
2425 | outputContainer.appendChild(error);
2426 | outputContainer.scrollTop = outputContainer.scrollHeight;
2427 | }
2428 |
2429 | function updateGraphs(output) {
2430 | // Updated regex to capture optional command: • name: path [CMD: command]
2431 | // Use [^\\n\\[] to stop at newlines or opening bracket
2432 | const graphRegex = /• ([^:]+): ([^\\n\\[]+)(?:\\[CMD: ([^\\]]+)\\])?/g;
2433 | const matches = [...output.matchAll(graphRegex)];
2434 |
2435 | if (matches.length > 0) {
2436 | // Remove "no graphs" message if it exists
2437 | const noGraphsMsg = graphsContainer.querySelector('.no-graphs');
2438 | if (noGraphsMsg) {
2439 | graphsContainer.innerHTML = '';
2440 | }
2441 |
2442 | // Add or update each graph
2443 | matches.forEach(match => {
2444 | const name = match[1].trim();
2445 | const path = match[2].trim();
2446 | const command = match[3] ? match[3].trim() : null;
2447 |
2448 | // Check if graph already exists
2449 | const existingGraph = graphsContainer.querySelector(`[data-graph-name="${name}"]`);
2450 | if (existingGraph) {
2451 | // Update existing graph - force reload by adding timestamp
2452 | updateGraph(existingGraph, name, `/graphs/${encodeURIComponent(name)}`, command);
2453 | } else {
2454 | // Add new graph
2455 | addGraph(name, `/graphs/${encodeURIComponent(name)}`, command);
2456 | }
2457 | });
2458 | }
2459 | }
2460 |
2461 | function updateGraph(existingCard, name, url, command) {
2462 | // Force reload by adding timestamp to bypass cache
2463 | const timestamp = new Date().getTime();
2464 | const urlWithTimestamp = `${url}?t=${timestamp}`;
2465 |
2466 | const commandHtml = command ? `<div style="color: #858585; font-size: 12px; margin-bottom: 8px; font-family: 'Courier New', monospace; background: #1a1a1a; padding: 6px; border-radius: 3px; border-left: 3px solid #4a9eff;">$ ${escapeHtml(command)}</div>` : '';
2467 | existingCard.innerHTML = `
2468 | <h3>${escapeHtml(name)}</h3>
2469 | ${commandHtml}
2470 | <img src="${urlWithTimestamp}" alt="${escapeHtml(name)}"
2471 | onerror="this.parentElement.innerHTML='<p style=\\'color:#f48771\\'>Failed to load graph</p>'">
2472 | `;
2473 | }
2474 |
2475 | function addGraph(name, url, command) {
2476 | const card = document.createElement('div');
2477 | card.className = 'graph-card';
2478 | card.setAttribute('data-graph-name', name);
2479 | const commandHtml = command ? `<div style="color: #858585; font-size: 12px; margin-bottom: 8px; font-family: 'Courier New', monospace; background: #1a1a1a; padding: 6px; border-radius: 3px; border-left: 3px solid #4a9eff;">$ ${escapeHtml(command)}</div>` : '';
2480 | card.innerHTML = `
2481 | <h3>${escapeHtml(name)}</h3>
2482 | ${commandHtml}
2483 | <img src="${url}" alt="${escapeHtml(name)}"
2484 | onerror="this.parentElement.innerHTML='<p style=\\'color:#f48771\\'>Failed to load graph</p>'">
2485 | `;
2486 | graphsContainer.appendChild(card);
2487 | }
2488 |
2489 | function escapeHtml(text) {
2490 | const div = document.createElement('div');
2491 | div.textContent = text;
2492 | return div.innerHTML;
2493 | }
2494 |
2495 | // Auto-execute file or code if provided in URL parameter
2496 | const urlParams = new URLSearchParams(window.location.search);
2497 | const autoRunFile = urlParams.get('file');
2498 | const autoRunCode = urlParams.get('code');
2499 |
2500 | if (autoRunFile) {
2501 | console.log('Auto-running file from URL parameter:', autoRunFile);
2502 | // Run the file on page load
2503 | fetch('/v1/tools', {
2504 | method: 'POST',
2505 | headers: { 'Content-Type': 'application/json' },
2506 | body: JSON.stringify({
2507 | tool: 'run_file',
2508 | parameters: { file_path: autoRunFile }
2509 | })
2510 | })
2511 | .then(response => response.json())
2512 | .then(data => {
2513 | if (data.status === 'success') {
2514 | addOutputCell('Running file: ' + autoRunFile, data.result);
2515 | updateGraphs(data.result);
2516 | } else {
2517 | addError(data.message || 'Failed to run file');
2518 | }
2519 | })
2520 | .catch(error => {
2521 | addError('Error running file: ' + error.message);
2522 | });
2523 | } else if (autoRunCode) {
2524 | console.log('Auto-running code from URL parameter');
2525 | // Run the selected code on page load
2526 | fetch('/v1/tools', {
2527 | method: 'POST',
2528 | headers: { 'Content-Type': 'application/json' },
2529 | body: JSON.stringify({
2530 | tool: 'run_selection',
2531 | parameters: { selection: autoRunCode }
2532 | })
2533 | })
2534 | .then(response => response.json())
2535 | .then(data => {
2536 | if (data.status === 'success') {
2537 | addOutputCell('Running selection', data.result);
2538 | updateGraphs(data.result);
2539 | } else {
2540 | addError(data.message || 'Failed to run code');
2541 | }
2542 | })
2543 | .catch(error => {
2544 | addError('Error running code: ' + error.message);
2545 | });
2546 | }
2547 |
2548 | commandInput.focus();
2549 | </script>
2550 | </body>
2551 | </html>
2552 | """
2553 | # Replace the placeholder with the actual file path (with proper escaping)
2554 | if auto_run_file:
2555 | # Escape the file path for JavaScript string
2556 | escaped_file = auto_run_file.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
2557 | html_content = html_content.replace('AUTO_RUN_FILE_PLACEHOLDER', escaped_file)
2558 |
2559 | return Response(content=html_content, media_type="text/html")
2560 |
2561 |
2562 | def main():
2563 | """Main function to set up and run the server"""
2564 | try:
2565 | # Get Stata path from arguments
2566 | parser = argparse.ArgumentParser(description='Stata MCP Server')
2567 | parser.add_argument('--stata-path', type=str, help='Path to Stata installation')
2568 | parser.add_argument('--port', type=int, default=4000, help='Port to run MCP server on')
2569 | parser.add_argument('--host', type=str, default='localhost', help='Host to bind the server to')
2570 | parser.add_argument('--log-level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
2571 | default='INFO', help='Logging level')
2572 | parser.add_argument('--force-port', action='store_true', help='Force the specified port, even if it requires killing processes')
2573 | parser.add_argument('--log-file', type=str, help='Path to log file (default: stata_mcp_server.log in current directory)')
2574 | parser.add_argument('--stata-edition', type=str, choices=['mp', 'se', 'be'], default='mp',
2575 | help='Stata edition to use (mp, se, be) - default: mp')
2576 | parser.add_argument('--log-file-location', type=str, choices=['extension', 'workspace', 'custom'], default='extension',
2577 | help='Location for .do file logs (extension, workspace, custom) - default: extension')
2578 | parser.add_argument('--custom-log-directory', type=str, default='',
2579 | help='Custom directory for .do file logs (when location is custom)')
2580 |
2581 | # Special handling when running as a module
2582 | if is_running_as_module:
2583 | print(f"Command line arguments when running as module: {sys.argv}")
2584 | # When run as a module, the first arg won't be the script path
2585 | args_to_parse = sys.argv[1:]
2586 | else:
2587 | # Regular mode - arg 0 is script path
2588 | #print(f"[MCP Server] Original command line arguments: {sys.argv}")
2589 | args_to_parse = sys.argv
2590 |
2591 | # Skip if an argument is a duplicate script path (e.g., on Windows with shell:true)
2592 | clean_args = []
2593 | script_path_found = False
2594 |
2595 | for arg in args_to_parse:
2596 | # Skip duplicate script paths, but keep the first one (sys.argv[0])
2597 | if arg.endswith('stata_mcp_server.py'):
2598 | if script_path_found and arg != sys.argv[0]:
2599 | logging.debug(f"Skipping duplicate script path: {arg}")
2600 | continue
2601 | script_path_found = True
2602 |
2603 | clean_args.append(arg)
2604 |
2605 | args_to_parse = clean_args
2606 |
2607 | # Process commands for Stata path with spaces
2608 | fixed_args = []
2609 | i = 0
2610 | while i < len(args_to_parse):
2611 | arg = args_to_parse[i]
2612 |
2613 | if arg == '--stata-path' and i + 1 < len(args_to_parse):
2614 | # The next argument might be a path that got split
2615 | stata_path = args_to_parse[i + 1]
2616 |
2617 | # Check if this is a quoted path
2618 | if (stata_path.startswith('"') and not stata_path.endswith('"')) or (stata_path.startswith("'") and not stata_path.endswith("'")):
2619 | # Look for the rest of the path in subsequent arguments
2620 | i += 2 # Move past '--stata-path' and the first part
2621 |
2622 | # Get the quote character (single or double)
2623 | quote_char = stata_path[0]
2624 | path_parts = [stata_path[1:]] # Remove the starting quote
2625 |
2626 | # Collect all parts until we find the end quote
2627 | while i < len(args_to_parse):
2628 | current = args_to_parse[i]
2629 | if current.endswith(quote_char):
2630 | # Found the end quote
2631 | path_parts.append(current[:-1]) # Remove the ending quote
2632 | break
2633 | else:
2634 | path_parts.append(current)
2635 | i += 1
2636 |
2637 | # Join all parts to form the complete path
2638 | complete_path = " ".join(path_parts)
2639 | fixed_args.append('--stata-path')
2640 | fixed_args.append(complete_path)
2641 | else:
2642 | # Normal path handling (either without quotes or with properly matched quotes)
2643 | fixed_args.append(arg)
2644 | fixed_args.append(stata_path)
2645 | i += 2
2646 | else:
2647 | # For all other arguments, add them as-is
2648 | fixed_args.append(arg)
2649 | i += 1
2650 |
2651 | # Print debug info
2652 | print(f"Command line arguments: {fixed_args}")
2653 |
2654 | # Use the fixed arguments
2655 | args = parser.parse_args(fixed_args[1:] if fixed_args and not is_running_as_module else fixed_args)
2656 | print(f"Parsed arguments: stata_path={args.stata_path}, port={args.port}")
2657 |
2658 | # Check if args.stata_path accidentally captured other arguments
2659 | if args.stata_path and ' --' in args.stata_path:
2660 | # The stata_path might have captured other arguments
2661 | parts = args.stata_path.split(' --')
2662 | # The first part is the actual stata_path
2663 | stata_path = parts[0].strip()
2664 | print(f"WARNING: Detected merged arguments in Stata path. Fixing: {args.stata_path} -> {stata_path}")
2665 | logging.warning(f"Fixed merged arguments in Stata path: {args.stata_path} -> {stata_path}")
2666 | args.stata_path = stata_path
2667 |
2668 | # If Stata path was enclosed in quotes, remove them
2669 | if args.stata_path:
2670 | args.stata_path = args.stata_path.strip('"\'')
2671 | logging.debug(f"Cleaned Stata path: {args.stata_path}")
2672 |
2673 | # Configure log file
2674 | log_file = args.log_file or 'stata_mcp_server.log'
2675 | log_dir = os.path.dirname(log_file)
2676 |
2677 | # Create log directory if needed
2678 | if log_dir and not os.path.exists(log_dir):
2679 | try:
2680 | os.makedirs(log_dir, exist_ok=True)
2681 | print(f"Created log directory: {log_dir}")
2682 | except Exception as e:
2683 | print(f"ERROR: Failed to create log directory {log_dir}: {str(e)}")
2684 | # Continue anyway, the file handler creation will fail if needed
2685 |
2686 | # Always print where we're trying to log
2687 | print(f"Logging to: {os.path.abspath(log_file)}")
2688 |
2689 | # Remove existing handlers
2690 | for handler in logging.getLogger().handlers[:]:
2691 | logging.getLogger().removeHandler(handler)
2692 |
2693 | # Add file handler
2694 | try:
2695 | file_handler = logging.FileHandler(log_file, mode='a', encoding='utf-8')
2696 | file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
2697 | logging.getLogger().addHandler(file_handler)
2698 | print(f"Successfully configured log file: {os.path.abspath(log_file)}")
2699 | except Exception as log_error:
2700 | print(f"ERROR: Failed to configure log file {log_file}: {str(log_error)}")
2701 | # Continue with console logging only
2702 |
2703 | # Re-add console handler
2704 | logging.getLogger().addHandler(console_handler)
2705 |
2706 | # Set log level
2707 | log_level = getattr(logging, args.log_level)
2708 | logging.getLogger().setLevel(log_level)
2709 |
2710 | # Set Stata edition
2711 | global stata_edition, log_file_location, custom_log_directory, extension_path
2712 | stata_edition = args.stata_edition.lower()
2713 | log_file_location = args.log_file_location
2714 | custom_log_directory = args.custom_log_directory
2715 |
2716 | # Try to determine extension path from the log file path
2717 | if args.log_file:
2718 | # If log file is in a logs subdirectory, the parent of that is the extension path
2719 | log_file_dir = os.path.dirname(os.path.abspath(args.log_file))
2720 | if log_file_dir.endswith('logs'):
2721 | extension_path = os.path.dirname(log_file_dir)
2722 | else:
2723 | extension_path = log_file_dir
2724 |
2725 | logging.info(f"Using Stata {stata_edition.upper()} edition")
2726 | logging.info(f"Log file location setting: {log_file_location}")
2727 | if custom_log_directory:
2728 | logging.info(f"Custom log directory: {custom_log_directory}")
2729 | if extension_path:
2730 | logging.info(f"Extension path: {extension_path}")
2731 |
2732 | # Log startup information
2733 | logging.info(f"Log initialized at {os.path.abspath(log_file)}")
2734 | logging.info(f"Log level set to {args.log_level}")
2735 | logging.info(f"Platform: {platform.system()} {platform.release()}")
2736 | logging.info(f"Python version: {sys.version}")
2737 | logging.info(f"Working directory: {os.getcwd()}")
2738 |
2739 | # Set Stata path
2740 | global STATA_PATH
2741 | if args.stata_path:
2742 | # Strip quotes if present
2743 | STATA_PATH = args.stata_path.strip('"\'')
2744 | else:
2745 | STATA_PATH = os.environ.get('STATA_PATH')
2746 | if not STATA_PATH:
2747 | if platform.system() == 'Darwin': # macOS
2748 | STATA_PATH = '/Applications/Stata'
2749 | elif platform.system() == 'Windows':
2750 | # Try common Windows paths
2751 | potential_paths = [
2752 | 'C:\\Program Files\\Stata18',
2753 | 'C:\\Program Files\\Stata17',
2754 | 'C:\\Program Files\\Stata16',
2755 | 'C:\\Program Files (x86)\\Stata18',
2756 | 'C:\\Program Files (x86)\\Stata17',
2757 | 'C:\\Program Files (x86)\\Stata16'
2758 | ]
2759 | for path in potential_paths:
2760 | if os.path.exists(path):
2761 | STATA_PATH = path
2762 | break
2763 | if not STATA_PATH:
2764 | STATA_PATH = 'C:\\Program Files\\Stata18' # Default if none found
2765 | else: # Linux
2766 | STATA_PATH = '/usr/local/stata'
2767 |
2768 | logging.info(f"Using Stata path: {STATA_PATH}")
2769 | if not os.path.exists(STATA_PATH):
2770 | logging.error(f"Stata path does not exist: {STATA_PATH}")
2771 | print(f"ERROR: Stata path does not exist: {STATA_PATH}")
2772 | sys.exit(1)
2773 |
2774 | # Check if the requested port is available
2775 | port = args.port
2776 |
2777 | if args.force_port:
2778 | # Kill any existing process on the port
2779 | kill_process_on_port(port)
2780 | else:
2781 | # Always kill processes on port 4000
2782 | if port == 4000:
2783 | logging.info(f"Ensuring port 4000 is available by terminating any existing processes")
2784 | kill_process_on_port(port)
2785 | else:
2786 | # For other ports, check if available
2787 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
2788 | s.settimeout(1)
2789 | result = s.connect_ex(('localhost', port))
2790 | if result == 0: # Port is in use
2791 | logging.warning(f"Port {port} is already in use")
2792 | # Kill the process on the port instead of finding a new one
2793 | logging.info(f"Attempting to kill process using port {port}")
2794 | kill_process_on_port(port)
2795 |
2796 | # Try to initialize Stata
2797 | try_init_stata(STATA_PATH)
2798 |
2799 | # Create and mount the MCP server
2800 | # Only expose run_selection and run_file to LLMs
2801 | # Other endpoints are still accessible via direct HTTP calls from VS Code extension
2802 | # Configure HTTP client with ASGI transport and extended timeout for long-running Stata operations
2803 | http_client = httpx.AsyncClient(
2804 | transport=httpx.ASGITransport(app=app, raise_app_exceptions=False),
2805 | base_url="http://apiserver",
2806 | timeout=1200.0 # 20 minutes timeout for long Stata operations
2807 | )
2808 |
2809 | mcp = FastApiMCP(
2810 | app,
2811 | name=SERVER_NAME,
2812 | description="This server provides tools for running Stata commands and scripts. Use stata_run_selection for running code snippets and stata_run_file for executing .do files.",
2813 | http_client=http_client,
2814 | exclude_operations=[
2815 | "call_tool_v1_tools_post", # Legacy VS Code extension endpoint
2816 | "health_check_health_get", # Health check endpoint
2817 | "view_data_endpoint_view_data_get", # Data viewer endpoint (VS Code only)
2818 | "get_graph_graphs_graph_name_get", # Graph serving endpoint (VS Code only)
2819 | "clear_history_endpoint_clear_history_post", # History clearing (VS Code only)
2820 | "interactive_window_interactive_get", # Interactive window (VS Code only)
2821 | "stata_run_file_stream_endpoint_run_file_stream_get" # SSE streaming endpoint (HTTP clients only)
2822 | ]
2823 | )
2824 |
2825 | # Mount SSE transport at /mcp for backward compatibility
2826 | mcp.mount(mount_path="/mcp", transport="sse")
2827 |
2828 | # ========================================================================
2829 | # HTTP (Streamable) Transport - Separate Server Instance
2830 | # ========================================================================
2831 | # Create a SEPARATE MCP server instance for HTTP to avoid session conflicts
2832 | # This ensures notifications go to the correct transport
2833 | from mcp.server import Server as MCPServer
2834 | from mcp.server.streamable_http_manager import StreamableHTTPSessionManager
2835 | from starlette.responses import StreamingResponse as StarletteStreamingResponse
2836 |
2837 | logging.info("Creating separate MCP server instance for HTTP transport...")
2838 | http_mcp_server = MCPServer(SERVER_NAME)
2839 |
2840 | # Register list_tools handler to expose the same tools
2841 | @http_mcp_server.list_tools()
2842 | async def list_tools_http():
2843 | """List available tools - delegate to main server"""
2844 | # Get tools from the main fastapi_mcp server
2845 | import mcp.types as types
2846 |
2847 | tools_list = []
2848 | # stata_run_selection tool
2849 | tools_list.append(types.Tool(
2850 | name="stata_run_selection",
2851 | description="Stata Run Selection Endpoint\n\nRun selected Stata code and return the output\n\n### Responses:\n\n**200**: Successful Response (Success Response)",
2852 | inputSchema={
2853 | "type": "object",
2854 | "properties": {
2855 | "selection": {"type": "string", "title": "selection"}
2856 | },
2857 | "title": "stata_run_selectionArguments",
2858 | "required": ["selection"]
2859 | }
2860 | ))
2861 | # stata_run_file tool
2862 | tools_list.append(types.Tool(
2863 | name="stata_run_file",
2864 | description="Stata Run File Endpoint\n\nRun a Stata .do file and return the output (MCP-compatible endpoint)\n\nArgs:\n file_path: Path to the .do file\n timeout: Timeout in seconds (default: 600 seconds / 10 minutes)\n\nReturns:\n Response with plain text output\n\n### Responses:\n\n**200**: Successful Response (Success Response)",
2865 | inputSchema={
2866 | "type": "object",
2867 | "properties": {
2868 | "file_path": {"type": "string", "title": "file_path"},
2869 | "timeout": {"type": "integer", "default": 600, "title": "timeout"}
2870 | },
2871 | "title": "stata_run_fileArguments",
2872 | "required": ["file_path"]
2873 | }
2874 | ))
2875 | return tools_list
2876 |
2877 | # Register call_tool handler to execute tools with HTTP server's context
2878 | @http_mcp_server.call_tool()
2879 | async def call_tool_http(name: str, arguments: dict) -> list:
2880 | """Execute tools using HTTP server's own context for proper notification routing"""
2881 | import mcp.types as types
2882 |
2883 | logging.debug(f"HTTP server executing tool: {name}")
2884 |
2885 | # Call the fastapi_mcp's execute method, which has the streaming wrapper
2886 | # The streaming wrapper will check http_mcp_server.request_context (which is set by StreamableHTTPSessionManager)
2887 | result = await mcp._execute_api_tool(
2888 | client=http_client,
2889 | tool_name=name,
2890 | arguments=arguments,
2891 | operation_map=mcp.operation_map, # Correct attribute name
2892 | http_request_info=None
2893 | )
2894 |
2895 | return result
2896 |
2897 | logging.debug("Registered tool handlers with HTTP server")
2898 |
2899 | # Create HTTP session manager with dedicated server
2900 | http_session_manager = StreamableHTTPSessionManager(
2901 | app=http_mcp_server, # Use dedicated HTTP server, not shared
2902 | event_store=None,
2903 | json_response=False, # Use SSE format for responses
2904 | stateless=False, # Maintain session state
2905 | )
2906 | logging.info("HTTP transport configured with dedicated MCP server")
2907 |
2908 | # Create a custom Response class that properly handles ASGI streaming
2909 | class ASGIPassthroughResponse(StarletteStreamingResponse):
2910 | """Response that passes through ASGI calls without buffering"""
2911 | def __init__(self, asgi_handler, scope, receive):
2912 | # Initialize the parent class with a dummy streaming function
2913 | # We need this to set up all required attributes like background, headers, etc.
2914 | super().__init__(content=iter([]), media_type="text/event-stream")
2915 |
2916 | # Store our ASGI handler
2917 | self.asgi_handler = asgi_handler
2918 | self.scope_data = scope
2919 | self.receive_func = receive
2920 |
2921 | async def __call__(self, scope, receive, send):
2922 | """Handle ASGI request/response cycle"""
2923 | # Call the ASGI handler directly with the provided send callback
2924 | # This allows SSE events to be sent immediately without buffering
2925 | await self.asgi_handler(self.scope_data, self.receive_func, send)
2926 |
2927 | @app.api_route(
2928 | "/mcp-streamable",
2929 | methods=["GET", "POST", "DELETE"],
2930 | include_in_schema=False,
2931 | operation_id="mcp_http_streamable"
2932 | )
2933 | async def handle_mcp_streamable(request: Request):
2934 | """Handle MCP Streamable HTTP requests with proper ASGI passthrough"""
2935 | # Return a response that directly passes through to the ASGI handler
2936 | # This avoids any buffering by FastAPI/Starlette
2937 | return ASGIPassthroughResponse(
2938 | asgi_handler=http_session_manager.handle_request,
2939 | scope=request.scope,
2940 | receive=request.receive
2941 | )
2942 |
2943 | # Store the session manager for startup/shutdown
2944 | app.state.http_session_manager = http_session_manager
2945 | app.state.http_session_manager_cm = None
2946 |
2947 | # Define startup handler for the HTTP session manager
2948 | async def _start_http_session_manager():
2949 | """Start the HTTP session manager task group"""
2950 | try:
2951 | logging.info("Starting StreamableHTTP session manager...")
2952 | # Enter the context manager
2953 | app.state.http_session_manager_cm = http_session_manager.run()
2954 | await app.state.http_session_manager_cm.__aenter__()
2955 | logging.info("✓ StreamableHTTP session manager started successfully")
2956 | except Exception as e:
2957 | logging.error(f"Failed to start StreamableHTTP session manager: {e}", exc_info=True)
2958 | raise
2959 |
2960 | # Define shutdown handler for the HTTP session manager
2961 | async def _stop_http_session_manager():
2962 | """Stop the HTTP session manager"""
2963 | if app.state.http_session_manager_cm:
2964 | try:
2965 | logging.info("Stopping StreamableHTTP session manager...")
2966 | await app.state.http_session_manager_cm.__aexit__(None, None, None)
2967 | logging.info("✓ StreamableHTTP session manager stopped")
2968 | except Exception as e:
2969 | logging.error(f"Error stopping HTTP session manager: {e}", exc_info=True)
2970 |
2971 | # Store handlers on app.state for the lifespan manager to call
2972 | app.state._http_session_manager_starter = _start_http_session_manager
2973 | app.state._http_session_manager_stopper = _stop_http_session_manager
2974 | logging.debug("HTTP session manager startup/shutdown handlers registered with lifespan")
2975 |
2976 | # Store reference
2977 | mcp._http_transport = http_session_manager
2978 | logging.info("MCP HTTP Streamable transport mounted at /mcp-streamable with TRUE SSE streaming (ASGI direct)")
2979 |
2980 | LOG_LEVEL_RANK = {
2981 | "debug": 0,
2982 | "info": 1,
2983 | "notice": 2,
2984 | "warning": 3,
2985 | "error": 4,
2986 | "critical": 5,
2987 | "alert": 6,
2988 | "emergency": 7,
2989 | }
2990 | DEFAULT_LOG_LEVEL = "notice"
2991 |
2992 | @mcp.server.set_logging_level()
2993 | async def handle_set_logging_level(level: str):
2994 | """Persist client-requested log level for the current session."""
2995 | try:
2996 | ctx = mcp.server.request_context
2997 | except LookupError:
2998 | logging.debug("logging/setLevel received outside of request context")
2999 | return
3000 |
3001 | session = getattr(ctx, "session", None)
3002 | if session is not None:
3003 | setattr(session, "_stata_log_level", (level or "info").lower())
3004 | logging.debug(f"Set MCP log level for session to {level}")
3005 |
3006 | # Enhance stata_run_file with MCP-native streaming updates
3007 | original_execute = mcp._execute_api_tool
3008 |
3009 | async def execute_with_streaming(*call_args, **call_kwargs):
3010 | """Wrap tool execution to stream progress for long-running Stata jobs."""
3011 | if not call_args:
3012 | raise TypeError("execute_with_streaming requires bound 'self'")
3013 |
3014 | bound_self = call_args[0]
3015 | original_args = call_args[1:]
3016 | original_kwargs = dict(call_kwargs)
3017 |
3018 | # Extract known keyword arguments
3019 | working_kwargs = dict(call_kwargs)
3020 | client = working_kwargs.pop("client", None)
3021 | tool_name = working_kwargs.pop("tool_name", None)
3022 | arguments = working_kwargs.pop("arguments", None)
3023 | operation_map = working_kwargs.pop("operation_map", None)
3024 | http_request_info = working_kwargs.pop("http_request_info", None)
3025 |
3026 | # Log and discard unexpected kwargs to stay forwards-compatible
3027 | for extra_key in list(working_kwargs.keys()):
3028 | extra_val = working_kwargs.pop(extra_key, None)
3029 | logging.debug(f"Ignoring unexpected MCP execute kwarg: {extra_key}={extra_val!r}")
3030 |
3031 | remaining = list(original_args)
3032 |
3033 | # Fill from positional args if any are missing
3034 | if client is None and remaining:
3035 | client = remaining.pop(0)
3036 | if tool_name is None and remaining:
3037 | tool_name = remaining.pop(0)
3038 | if arguments is None and remaining:
3039 | arguments = remaining.pop(0)
3040 | if operation_map is None and remaining:
3041 | operation_map = remaining.pop(0)
3042 | if http_request_info is None and remaining:
3043 | http_request_info = remaining.pop(0)
3044 |
3045 | # If not our tool or required data missing, fall back to original implementation
3046 | if (
3047 | tool_name != "stata_run_file"
3048 | or client is None
3049 | or operation_map is None
3050 | ):
3051 | return await original_execute(*original_args, **original_kwargs)
3052 |
3053 | arguments_dict = dict(arguments or {})
3054 |
3055 | # Try to get request context from either HTTP or SSE server
3056 | # IMPORTANT: Check HTTP first! If we check SSE first, we might get stale SSE context
3057 | # even when the request came through HTTP.
3058 | ctx = None
3059 | server_type = "unknown"
3060 | try:
3061 | ctx = http_mcp_server.request_context
3062 | server_type = "HTTP"
3063 | logging.debug(f"Using HTTP server request context: {ctx}")
3064 | except (LookupError, NameError):
3065 | # HTTP server has no context, try SSE server
3066 | try:
3067 | ctx = bound_self.server.request_context
3068 | server_type = "SSE"
3069 | logging.debug(f"Using SSE server request context: {ctx}")
3070 | except LookupError:
3071 | logging.debug("No MCP request context available; skipping streaming wrapper")
3072 | return await original_execute(
3073 | client=client,
3074 | tool_name=tool_name,
3075 | arguments=arguments_dict,
3076 | operation_map=operation_map,
3077 | http_request_info=http_request_info,
3078 | )
3079 |
3080 | session = getattr(ctx, "session", None)
3081 | request_id = getattr(ctx, "request_id", None)
3082 | progress_token = getattr(getattr(ctx, "meta", None), "progressToken", None)
3083 |
3084 | # DEBUG: Log session information
3085 | logging.info(f"✓ Streaming enabled via {server_type} server - Tool: {tool_name}")
3086 | if session:
3087 | session_attrs = [attr for attr in dir(session) if not attr.startswith('__')]
3088 | logging.debug(f"Session type: {type(session)}, Attributes: {session_attrs[:10]}")
3089 | session_id = getattr(session, "_session_id", getattr(session, "session_id", getattr(session, "id", None)))
3090 | else:
3091 | session_id = None
3092 | logging.debug(f"Tool execution - Server: {server_type}, Session ID: {session_id}, Request ID: {request_id}, Progress Token: {progress_token}")
3093 |
3094 | if session is None:
3095 | logging.debug("MCP session not available; falling back to default execution")
3096 | return await original_execute(
3097 | client=client,
3098 | tool_name=tool_name,
3099 | arguments=arguments_dict,
3100 | operation_map=operation_map,
3101 | http_request_info=http_request_info,
3102 | )
3103 |
3104 | if not hasattr(session, "_stata_log_level"):
3105 | setattr(session, "_stata_log_level", DEFAULT_LOG_LEVEL)
3106 |
3107 | file_path = arguments_dict.get("file_path", "")
3108 |
3109 | try:
3110 | timeout = int(arguments_dict.get("timeout", 600))
3111 | except (TypeError, ValueError):
3112 | timeout = 600
3113 |
3114 | resolved_path, resolution_candidates = resolve_do_file_path(file_path)
3115 | effective_path = resolved_path or os.path.abspath(file_path)
3116 | base_name = os.path.splitext(os.path.basename(effective_path))[0]
3117 | log_file_path = get_log_file_path(effective_path, base_name)
3118 |
3119 | logging.info(f"📡 MCP streaming enabled for {os.path.basename(file_path)}")
3120 | logging.debug(f"MCP log streaming monitoring: {log_file_path}")
3121 | if not resolved_path:
3122 | logging.debug(f"Resolution attempts: {resolution_candidates}")
3123 |
3124 | import asyncio as _asyncio
3125 | import time as _time
3126 |
3127 | async def send_log(level: str, message: str):
3128 | level = (level or "info").lower()
3129 | session_level = getattr(session, "_stata_log_level", DEFAULT_LOG_LEVEL)
3130 | if LOG_LEVEL_RANK.get(level, 0) < LOG_LEVEL_RANK.get(session_level, LOG_LEVEL_RANK[DEFAULT_LOG_LEVEL]):
3131 | return
3132 | logging.debug(f"MCP streaming log [{level}] (session level {session_level}): {message}")
3133 | try:
3134 | await session.send_log_message(
3135 | level=level,
3136 | data=message,
3137 | logger="stata-mcp",
3138 | related_request_id=request_id,
3139 | )
3140 | except Exception as send_exc: # noqa: BLE001
3141 | logging.debug(f"Unable to send MCP log message: {send_exc}")
3142 |
3143 | async def send_progress(elapsed: float, message: str | None = None):
3144 | if progress_token is None:
3145 | return
3146 | try:
3147 | await session.send_progress_notification(
3148 | progress_token=progress_token,
3149 | progress=elapsed,
3150 | total=timeout,
3151 | message=message,
3152 | related_request_id=request_id,
3153 | )
3154 | except Exception as send_exc: # noqa: BLE001
3155 | logging.debug(f"Unable to send MCP progress notification: {send_exc}")
3156 |
3157 | task = _asyncio.create_task(
3158 | original_execute(
3159 | client=client,
3160 | tool_name=tool_name,
3161 | arguments=arguments_dict,
3162 | operation_map=operation_map,
3163 | http_request_info=http_request_info,
3164 | )
3165 | )
3166 |
3167 | start_time = _time.time()
3168 | stream_interval = 5
3169 | poll_interval = 2
3170 | last_stream = 0.0
3171 | last_offset = 0
3172 |
3173 | start_message = f"▶️ Starting Stata execution: {os.path.basename(effective_path)}"
3174 | await send_log("notice", start_message)
3175 | await send_progress(0.0, start_message)
3176 |
3177 | try:
3178 | while not task.done():
3179 | await _asyncio.sleep(poll_interval)
3180 | now = _time.time()
3181 | elapsed = now - start_time
3182 |
3183 | if now - last_stream >= stream_interval:
3184 | progress_msg = f"⏱️ {elapsed:.0f}s elapsed / {timeout}s timeout"
3185 | await send_progress(elapsed, progress_msg)
3186 |
3187 | if os.path.exists(log_file_path):
3188 | await send_log(
3189 | "notice",
3190 | f"{progress_msg}\n\n(📁 Inspecting Stata log for new output...)",
3191 | )
3192 | try:
3193 | with open(log_file_path, "r", encoding="utf-8", errors="replace") as log_file:
3194 | log_file.seek(last_offset)
3195 | new_content = log_file.read()
3196 | last_offset = log_file.tell()
3197 |
3198 | snippet = ""
3199 | if new_content.strip():
3200 | lines = new_content.strip().splitlines()
3201 | snippet = "\n".join(lines[-3:])
3202 |
3203 |
3204 | if snippet:
3205 | progress_msg = f"{progress_msg}\n\n📝 Recent output:\n{snippet}"
3206 |
3207 | await send_log("notice", progress_msg)
3208 | except Exception as read_exc: # noqa: BLE001
3209 | logging.debug(f"Error reading log for streaming: {read_exc}")
3210 | await send_log(
3211 | "notice",
3212 | f"{progress_msg} (waiting for output...)",
3213 | )
3214 | else:
3215 | await send_log(
3216 | "notice",
3217 | f"{progress_msg} (initializing...)",
3218 | )
3219 |
3220 | last_stream = now
3221 |
3222 | result = await task
3223 | total_time = _time.time() - start_time
3224 | await send_log("notice", f"✅ Execution completed in {total_time:.1f}s")
3225 | return result
3226 | except Exception as exc:
3227 | logging.error(f"❌ Error during MCP streaming: {exc}", exc_info=True)
3228 | await send_log("error", f"Error during execution: {exc}")
3229 | raise
3230 |
3231 | import types as _types
3232 |
3233 | mcp._execute_api_tool = _types.MethodType(execute_with_streaming, mcp)
3234 | logging.info("📡 MCP streaming wrapper installed for stata_run_file")
3235 |
3236 | # Mark MCP as initialized (will also be set in startup event)
3237 | global mcp_initialized
3238 | mcp_initialized = True
3239 | logging.info("MCP server mounted and initialized")
3240 |
3241 | try:
3242 | # Start the server
3243 | logging.info(f"Starting Stata MCP Server on {args.host}:{port}")
3244 | logging.info(f"Stata available: {stata_available}")
3245 |
3246 | # Print to stdout as well to ensure visibility
3247 | if platform.system() == 'Windows':
3248 | # For Windows, completely skip the startup message if another instance is detected
3249 | # as we already printed information above
3250 | if not stata_banner_displayed:
3251 | print(f"INITIALIZATION SUCCESS: Stata MCP Server starting on {args.host}:{port}")
3252 | print(f"Stata available: {stata_available}")
3253 | print(f"Log file: {os.path.abspath(log_file)}")
3254 | else:
3255 | # Normal behavior for macOS/Linux
3256 | print(f"INITIALIZATION SUCCESS: Stata MCP Server starting on {args.host}:{port}")
3257 | print(f"Stata available: {stata_available}")
3258 | print(f"Log file: {os.path.abspath(log_file)}")
3259 |
3260 | import uvicorn
3261 | uvicorn.run(
3262 | app,
3263 | host=args.host,
3264 | port=port,
3265 | log_level="warning", # Use warning to allow important messages through
3266 | access_log=False # Disable access logs
3267 | )
3268 |
3269 | except Exception as e:
3270 | logging.error(f"Server error: {str(e)}")
3271 | traceback.print_exc()
3272 | sys.exit(1)
3273 |
3274 | except Exception as e:
3275 | logging.error(f"Error in main function: {str(e)}")
3276 | traceback.print_exc()
3277 | sys.exit(1)
3278 |
3279 | if __name__ == "__main__":
3280 | main()
3281 |
```