sedwardstx/demomcp # codebase.md

This is page 3 of 4. Use http://codebase.md/sedwardstx/demomcp?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .gitignore
├── .mcp.json
├── check_server.py
├── CLAUDE.md
├── config
│   └── default.yml
├── docs
│   ├── api_reference.md
│   ├── demo-recording
│   │   └── MCPDemo.gif
│   ├── example-context-docs
│   │   ├── mcp-ai-agent-architecture.md
│   │   ├── mcp-ai-agent-dev-task.md
│   │   └── mcp-ai-agent-prd.md
│   └── getting_started.md
├── LICENSE
├── main_tcp.py
├── main.py
├── mcp_tcp_client.py
├── pyproject.toml
├── QUICK_START.md
├── README.md
├── scripts
│   └── test_server.py
├── setup.py
├── src
│   └── mcp_log_analyzer
│       ├── __init__.py
│       ├── api
│       │   ├── __init__.py
│       │   └── server.py
│       ├── config
│       │   ├── __init__.py
│       │   └── settings.py
│       ├── core
│       │   ├── __init__.py
│       │   ├── config.py
│       │   ├── models.py
│       │   └── state_manager.py
│       ├── mcp_server
│       │   ├── __init__.py
│       │   ├── models
│       │   │   ├── __init__.py
│       │   │   └── schemas.py
│       │   ├── prompts
│       │   │   ├── __init__.py
│       │   │   ├── linux_testing_prompt.py
│       │   │   ├── log_management_prompt.py
│       │   │   ├── mcp_assets_overview_prompt.py
│       │   │   ├── network_testing_prompt.py
│       │   │   ├── process_monitoring_prompt.py
│       │   │   └── windows_testing_prompt.py
│       │   ├── resources
│       │   │   ├── __init__.py
│       │   │   ├── linux_resources.py
│       │   │   ├── logs_resources.py
│       │   │   ├── network_resources.py
│       │   │   ├── process_resources.py
│       │   │   └── windows_resources.py
│       │   ├── server.py
│       │   └── tools
│       │       ├── __init__.py
│       │       ├── health_check_tools.py
│       │       ├── linux_test_tools.py
│       │       ├── log_management_tools.py
│       │       ├── network_test_tools.py
│       │       ├── process_test_tools.py
│       │       └── windows_test_tools.py
│       ├── parsers
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── csv_parser.py
│       │   ├── etl_cached_parser.py
│       │   ├── etl_large_file_parser.py
│       │   ├── etl_parser.py
│       │   ├── etl_windows_parser.py
│       │   └── evt_parser.py
│       └── tcp_proxy.py
├── TCP_PROXY_README.md
├── tcp_proxy.py
├── tcp_server.py
├── test_server.py
├── test_tcp_proxy.py
├── test_windows_setup.py
└── tests
    ├── test_base_parser.py
    ├── test_mcp_server.py
    ├── test_tool_utils.py
    └── test_utils.py
```

# Files

--------------------------------------------------------------------------------
/src/mcp_log_analyzer/parsers/etl_large_file_parser.py:
--------------------------------------------------------------------------------

```python
  1 | """Enhanced ETL parser for large files with streaming support."""
  2 | 
  3 | import asyncio
  4 | import os
  5 | import platform
  6 | import subprocess
  7 | import tempfile
  8 | from datetime import datetime
  9 | from pathlib import Path
 10 | from typing import Any, Dict, Iterator, List, Optional, Union
 11 | from uuid import uuid4
 12 | import csv
 13 | import logging
 14 | 
 15 | from ..core.models import LogRecord, LogSource, LogType
 16 | from .base import BaseParser
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | class EtlLargeFileParser(BaseParser):
 22 |     """Enhanced ETL parser with support for large files using streaming."""
 23 | 
 24 |     def __init__(self, config: Optional[Dict[str, Any]] = None):
 25 |         """Initialize ETL large file parser.
 26 | 
 27 |         Args:
 28 |             config: Parser configuration.
 29 |         """
 30 |         super().__init__(config)
 31 |         self.chunk_size = self.config.get("chunk_size", 1000)  # Records per chunk
 32 |         self.tracerpt_path = self._find_tracerpt()
 33 |         self.temp_dir = None
 34 | 
 35 |     def _find_tracerpt(self) -> Optional[str]:
 36 |         """Find tracerpt.exe on the system."""
 37 |         if platform.system() != "Windows":
 38 |             return None
 39 | 
 40 |         # Common locations for tracerpt.exe
 41 |         possible_paths = [
 42 |             r"C:\Windows\System32\tracerpt.exe",
 43 |             r"C:\Windows\SysWOW64\tracerpt.exe",
 44 |         ]
 45 | 
 46 |         for path in possible_paths:
 47 |             if os.path.exists(path):
 48 |                 return path
 49 | 
 50 |         # Try to find it in PATH
 51 |         try:
 52 |             result = subprocess.run(
 53 |                 ["where", "tracerpt.exe"],
 54 |                 capture_output=True,
 55 |                 text=True,
 56 |                 check=False
 57 |             )
 58 |             if result.returncode == 0 and result.stdout.strip():
 59 |                 return result.stdout.strip().split('\n')[0]
 60 |         except:
 61 |             pass
 62 | 
 63 |         return None
 64 | 
 65 |     def is_available(self) -> bool:
 66 |         """Check if ETL parsing is available."""
 67 |         return self.tracerpt_path is not None
 68 | 
 69 |     def parse_file_streaming(
 70 |         self, source: LogSource, file_path: Union[str, Path], 
 71 |         limit: int = 1000, offset: int = 0
 72 |     ) -> Iterator[LogRecord]:
 73 |         """Parse ETL file with streaming to handle large files.
 74 | 
 75 |         Args:
 76 |             source: The log source information.
 77 |             file_path: Path to the ETL file.
 78 |             limit: Maximum number of records to return.
 79 |             offset: Number of records to skip.
 80 | 
 81 |         Yields:
 82 |             LogRecord objects parsed from the ETL file.
 83 |         """
 84 |         if not self.is_available():
 85 |             raise RuntimeError(
 86 |                 "Windows ETL parsing is not available. tracerpt.exe not found."
 87 |             )
 88 | 
 89 |         path = Path(file_path)
 90 |         if not path.exists():
 91 |             raise FileNotFoundError(f"ETL file not found: {file_path}")
 92 | 
 93 |         # Get file size for logging
 94 |         file_size_mb = path.stat().st_size / (1024 * 1024)
 95 |         logger.info(f"Processing ETL file: {file_size_mb:.1f} MB")
 96 | 
 97 |         # Create a persistent temp directory if not exists
 98 |         if self.temp_dir is None:
 99 |             self.temp_dir = tempfile.mkdtemp(prefix="etl_parser_")
100 |         
101 |         output_file = os.path.join(self.temp_dir, f"etl_{uuid4()}.csv")
102 |         
103 |         try:
104 |             # Use tracerpt with specific parameters for large files
105 |             cmd = [
106 |                 self.tracerpt_path,
107 |                 str(path),
108 |                 "-o", output_file,
109 |                 "-of", "CSV",
110 |                 "-y",  # Overwrite without prompting
111 |                 "-lr",  # Less restrictive; attempt to process badly-formed events
112 |             ]
113 |             
114 |             # For very large files, we might want to limit the time range
115 |             if file_size_mb > 500:  # If file is over 500MB
116 |                 logger.warning(f"Large ETL file ({file_size_mb:.1f} MB), processing may take time")
117 |             
118 |             # Run tracerpt as a subprocess
119 |             logger.info("Starting tracerpt conversion...")
120 |             process = subprocess.Popen(
121 |                 cmd,
122 |                 stdout=subprocess.PIPE,
123 |                 stderr=subprocess.PIPE,
124 |                 text=True
125 |             )
126 |             
127 |             # Monitor tracerpt process
128 |             import time
129 |             start_time = time.time()
130 |             max_wait_time = 600  # 10 minutes maximum
131 |             check_interval = 5   # Check every 5 seconds
132 |             
133 |             logger.info(f"Waiting for tracerpt.exe to process {file_size_mb:.1f} MB file...")
134 |             
135 |             # Wait for initial processing
136 |             time.sleep(2)
137 |             
138 |             # Check if process failed immediately
139 |             if process.poll() is not None:
140 |                 stdout, stderr = process.communicate()
141 |                 if process.returncode != 0:
142 |                     raise RuntimeError(
143 |                         f"tracerpt failed immediately with code {process.returncode}: {stderr}"
144 |                     )
145 |             
146 |             # Start reading the CSV file as it's being written
147 |             records_yielded = 0
148 |             records_skipped = 0
149 |             last_pos = 0
150 |             
151 |             # Wait for CSV file to be created with progress monitoring
152 |             wait_time = 0
153 |             last_log_time = start_time
154 |             
155 |             while not os.path.exists(output_file):
156 |                 current_time = time.time()
157 |                 elapsed = current_time - start_time
158 |                 
159 |                 # Log progress every 30 seconds
160 |                 if current_time - last_log_time >= 30:
161 |                     logger.info(f"tracerpt.exe still running... ({elapsed:.0f}s elapsed)")
162 |                     last_log_time = current_time
163 |                 
164 |                 # Check if we've exceeded max wait time
165 |                 if elapsed > max_wait_time:
166 |                     process.terminate()
167 |                     raise RuntimeError(f"tracerpt timed out after {max_wait_time} seconds")
168 |                 
169 |                 # Check if process ended
170 |                 if process.poll() is not None:
171 |                     stdout, stderr = process.communicate()
172 |                     if process.returncode != 0:
173 |                         raise RuntimeError(f"tracerpt failed with code {process.returncode}: {stderr}")
174 |                     # Process completed but no output file
175 |                     if not os.path.exists(output_file):
176 |                         raise RuntimeError("tracerpt completed but produced no output file")
177 |                     break
178 |                 
179 |                 time.sleep(check_interval)
180 |             
181 |             if os.path.exists(output_file):
182 |                 logger.info(f"CSV file created, starting to read records...")
183 |                 file_size = 0
184 |                 last_size_check = time.time()
185 |                 
186 |                 # Wait for file to have some content
187 |                 while os.path.getsize(output_file) == 0 and process.poll() is None:
188 |                     time.sleep(0.5)
189 |                 
190 |                 # Stream the CSV file as it's being written
191 |                 with open(output_file, 'r', encoding='utf-8', errors='ignore') as f:
192 |                     # Try to read header
193 |                     header_line = f.readline()
194 |                     if not header_line:
195 |                         # Wait a bit for header to be written
196 |                         time.sleep(1)
197 |                         f.seek(0)
198 |                         header_line = f.readline()
199 |                     
200 |                     if header_line:
201 |                         # Read file incrementally instead of all at once
202 |                         csv_reader = csv.DictReader(f, fieldnames=None)
203 |                         csv_reader.fieldnames = next(csv.reader([header_line]))
204 |                         
205 |                         for row_num, row in enumerate(csv_reader):
206 |                             # Log progress periodically
207 |                             if row_num > 0 and row_num % 1000 == 0:
208 |                                 logger.info(f"Processed {row_num} records from CSV...")
209 |                             
210 |                             # Handle offset
211 |                             if records_skipped < offset:
212 |                                 records_skipped += 1
213 |                                 continue
214 |                             
215 |                             # Convert and yield record
216 |                             log_record = self._convert_csv_row(source, row)
217 |                             if log_record:
218 |                                 yield log_record
219 |                                 records_yielded += 1
220 |                                 
221 |                                 # Check limit
222 |                                 if records_yielded >= limit:
223 |                                     logger.info(f"Reached limit of {limit} records")
224 |                                     # Terminate tracerpt if still running
225 |                                     if process.poll() is None:
226 |                                         logger.info("Terminating tracerpt as we have enough records")
227 |                                         process.terminate()
228 |                                     break
229 |                             
230 |                             # Check if process is still running periodically
231 |                             if row_num % 100 == 0 and process.poll() is not None:
232 |                                 # Process ended, check if there was an error
233 |                                 if process.returncode != 0:
234 |                                     logger.warning(f"tracerpt ended with code {process.returncode}")
235 |             
236 |             # Wait for process to complete if still running
237 |             if process.poll() is None:
238 |                 remaining_time = max_wait_time - (time.time() - start_time)
239 |                 if remaining_time > 0:
240 |                     logger.info(f"Waiting for tracerpt to complete (up to {remaining_time:.0f}s remaining)...")
241 |                     try:
242 |                         process.wait(timeout=remaining_time)
243 |                         logger.info(f"tracerpt completed successfully after {time.time() - start_time:.0f}s")
244 |                     except subprocess.TimeoutExpired:
245 |                         logger.warning(f"tracerpt timed out after {max_wait_time}s, terminating...")
246 |                         process.terminate()
247 |                         process.wait(timeout=5)  # Give it 5 seconds to terminate
248 |                 else:
249 |                     logger.warning("Maximum wait time exceeded, terminating tracerpt...")
250 |                     process.terminate()
251 |                     process.wait(timeout=5)
252 |                     
253 |         finally:
254 |             # Clean up temp file
255 |             if os.path.exists(output_file):
256 |                 try:
257 |                     os.remove(output_file)
258 |                 except:
259 |                     pass
260 | 
261 |     def _convert_csv_row(self, source: LogSource, row: Dict[str, str]) -> Optional[LogRecord]:
262 |         """Convert a CSV row from tracerpt to a LogRecord.
263 | 
264 |         Args:
265 |             source: The log source information.
266 |             row: CSV row dictionary.
267 | 
268 |         Returns:
269 |             LogRecord or None if conversion fails.
270 |         """
271 |         try:
272 |             # Common tracerpt CSV columns
273 |             record_data = {}
274 |             
275 |             # Map known columns
276 |             field_mappings = {
277 |                 "Event Name": "event_name",
278 |                 "Type": "event_type",
279 |                 "Event ID": "event_id",
280 |                 "Version": "version",
281 |                 "Channel": "channel",
282 |                 "Level": "level",
283 |                 "Task": "task",
284 |                 "Opcode": "opcode",
285 |                 "Keyword": "keywords",
286 |                 "PID": "process_id",
287 |                 "TID": "thread_id",
288 |                 "Processor Number": "processor",
289 |                 "Provider Name": "provider_name",
290 |                 "Provider ID": "provider_id",
291 |                 "Message": "message",
292 |                 "Process Name": "process_name",
293 |             }
294 |             
295 |             for csv_field, record_field in field_mappings.items():
296 |                 if csv_field in row and row[csv_field]:
297 |                     record_data[record_field] = row[csv_field]
298 |             
299 |             # Try to parse timestamp
300 |             timestamp = None
301 |             if "Clock-Time" in row and row["Clock-Time"]:
302 |                 try:
303 |                     # Handle different timestamp formats
304 |                     for fmt in ["%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S", "%m/%d/%Y %H:%M:%S.%f"]:
305 |                         try:
306 |                             timestamp = datetime.strptime(row["Clock-Time"], fmt)
307 |                             break
308 |                         except:
309 |                             continue
310 |                 except:
311 |                     pass
312 |             
313 |             # Include any additional fields
314 |             for key, value in row.items():
315 |                 if key not in field_mappings and value:
316 |                     # Clean up field name
317 |                     clean_key = key.lower().replace(' ', '_').replace('-', '_')
318 |                     record_data[clean_key] = value
319 |             
320 |             return LogRecord(
321 |                 source_id=source.id,
322 |                 timestamp=timestamp,
323 |                 data=record_data,
324 |                 raw_content=None
325 |             )
326 |             
327 |         except Exception as e:
328 |             if self.config.get("verbose", False):
329 |                 logger.error(f"Failed to convert CSV row: {e}")
330 |             return None
331 | 
332 |     def parse_file(
333 |         self, source: LogSource, file_path: Union[str, Path]
334 |     ) -> Iterator[LogRecord]:
335 |         """Parse ETL log records from a file.
336 | 
337 |         Args:
338 |             source: The log source information.
339 |             file_path: Path to the ETL file.
340 | 
341 |         Yields:
342 |             LogRecord objects parsed from the ETL file.
343 |         """
344 |         # Use streaming parser for all files
345 |         yield from self.parse_file_streaming(source, file_path, limit=10000)
346 | 
347 |     def parse(
348 |         self, path: str, filters: Optional[Dict[str, Any]] = None,
349 |         start_time: Optional[datetime] = None, end_time: Optional[datetime] = None,
350 |         limit: int = 1000, offset: int = 0
351 |     ) -> List[LogRecord]:
352 |         """Parse ETL file with filtering and pagination.
353 | 
354 |         Args:
355 |             path: Path to the ETL file.
356 |             filters: Optional filters to apply.
357 |             start_time: Optional start time filter.
358 |             end_time: Optional end time filter.
359 |             limit: Maximum number of records to return.
360 |             offset: Number of records to skip.
361 | 
362 |         Returns:
363 |             List of LogRecord objects.
364 |         """
365 |         # Create a temporary log source for parsing
366 |         temp_source = LogSource(
367 |             name="temp_etl",
368 |             type=LogType.ETL,
369 |             path=path,
370 |             metadata={}
371 |         )
372 | 
373 |         records = []
374 |         
375 |         # Use streaming parser
376 |         for record in self.parse_file_streaming(temp_source, path, limit=limit + offset):
377 |             # Apply time filters
378 |             if start_time and record.timestamp and record.timestamp < start_time:
379 |                 continue
380 |             if end_time and record.timestamp and record.timestamp > end_time:
381 |                 continue
382 |                 
383 |             # Apply custom filters
384 |             if filters:
385 |                 if not self._match_filters(record, filters):
386 |                     continue
387 |             
388 |             records.append(record)
389 |             
390 |             if len(records) >= limit + offset:
391 |                 break
392 |         
393 |         # Apply offset by slicing
394 |         if offset > 0:
395 |             return records[offset:offset + limit]
396 |         else:
397 |             return records[:limit]
398 | 
399 |     def _match_filters(self, record: LogRecord, filters: Dict[str, Any]) -> bool:
400 |         """Check if a record matches the provided filters.
401 | 
402 |         Args:
403 |             record: The log record to check.
404 |             filters: Dictionary of filters to apply.
405 | 
406 |         Returns:
407 |             True if record matches all filters.
408 |         """
409 |         for key, value in filters.items():
410 |             record_value = record.data.get(key)
411 |             
412 |             if isinstance(value, list):
413 |                 if record_value not in value:
414 |                     return False
415 |             else:
416 |                 if record_value != value:
417 |                     return False
418 |                     
419 |         return True
420 | 
421 |     def parse_content(self, source: LogSource, content: str) -> Iterator[LogRecord]:
422 |         """Parse ETL log records from content string.
423 | 
424 |         Note: ETL files are binary and cannot be parsed from string content.
425 | 
426 |         Args:
427 |             source: The log source information.
428 |             content: String content (not supported for ETL).
429 | 
430 |         Raises:
431 |             NotImplementedError: ETL files must be parsed from file.
432 |         """
433 |         raise NotImplementedError(
434 |             "ETL files are binary and must be parsed from file, not string content"
435 |         )
436 | 
437 |     def validate_file(self, file_path: Union[str, Path]) -> bool:
438 |         """Validate if the file can be parsed by this parser.
439 | 
440 |         Args:
441 |             file_path: Path to validate.
442 | 
443 |         Returns:
444 |             True if file appears to be an ETL file.
445 |         """
446 |         path = Path(file_path)
447 |         
448 |         # Check file extension
449 |         if not str(path).lower().endswith('.etl'):
450 |             return False
451 |             
452 |         # Check if file exists and is readable
453 |         if not path.exists() or not path.is_file():
454 |             return False
455 |             
456 |         # Check if we have tracerpt available
457 |         if not self.is_available():
458 |             return False
459 |             
460 |         return True
461 | 
462 |     def __del__(self):
463 |         """Cleanup temp directory on deletion."""
464 |         if self.temp_dir and os.path.exists(self.temp_dir):
465 |             try:
466 |                 import shutil
467 |                 shutil.rmtree(self.temp_dir)
468 |             except:
469 |                 pass
```

--------------------------------------------------------------------------------
/src/mcp_log_analyzer/mcp_server/prompts/linux_testing_prompt.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Linux system testing and diagnostics prompts for the MCP Log Analyzer server.
  3 | """
  4 | 
  5 | from typing import Optional
  6 | from mcp.server import FastMCP
  7 | 
  8 | 
  9 | def register_linux_testing_prompts(mcp: FastMCP):
 10 |     """Register all Linux testing prompts."""
 11 | 
 12 |     @mcp.prompt(
 13 |         title="Linux Log Access Test",
 14 |         description="Guide for testing access to Linux log files and systemd journal"
 15 |     )
 16 |     async def linux_log_access_test() -> str:
 17 |         """
 18 |         Test access to common Linux log files and systemd journal.
 19 |         """
 20 |         return """
 21 | # 🔍 Linux Log Access Test
 22 | 
 23 | ## Tool: test_linux_log_access
 24 | 
 25 | ### Purpose
 26 | Tests access to common log files in /var/log/ and systemd journal accessibility.
 27 | 
 28 | ### What It Tests
 29 | - **Traditional log files**: /var/log/syslog, messages, auth.log, kern.log
 30 | - **systemd journal**: Journal accessibility and permissions
 31 | - **System commands**: ss, netstat, systemctl availability
 32 | - **File permissions**: Read access and file sizes
 33 | 
 34 | ### Usage
 35 | ```
 36 | Tool: test_linux_log_access
 37 | ```
 38 | 
 39 | ### Interpreting Results
 40 | - **Accessible files**: Can be queried and analyzed
 41 | - **Permission denied**: Need elevated privileges or group membership
 42 | - **File not found**: May indicate different distribution or log rotation
 43 | - **Journal access**: Check systemd-journal group membership
 44 | 
 45 | ### Next Steps
 46 | - Add user to systemd-journal group for journal access
 47 | - Use sudo for system log access if needed
 48 | - Check distribution-specific log locations
 49 | """
 50 | 
 51 |     @mcp.prompt(
 52 |         title="systemd Journal Query Guide",
 53 |         description="How to query and filter systemd journal logs"
 54 |     )
 55 |     async def systemd_journal_query(
 56 |         priority: Optional[str] = None
 57 |     ) -> str:
 58 |         """
 59 |         Guide for querying systemd journal.
 60 |         
 61 |         Args:
 62 |             priority: Optional priority level filter (err, warning, info, etc.)
 63 |         """
 64 |         
 65 |         base_guide = """
 66 | # 📊 systemd Journal Query Guide
 67 | 
 68 | ## Tool: query_systemd_journal
 69 | 
 70 | ### Purpose
 71 | Query systemd journal with specific filters for targeted log analysis.
 72 | 
 73 | ### Parameters
 74 | - **service_name**: Specific service to query (e.g., "nginx", "sshd")
 75 | - **priority**: Log level (emerg, alert, crit, err, warning, notice, info, debug)
 76 | - **time_duration**: Time range ("30m", "2h", "1d", etc.)
 77 | - **max_lines**: Result limit (default: 100)
 78 | 
 79 | ### Usage Examples
 80 | ```
 81 | # Service errors
 82 | Tool: query_systemd_journal
 83 | Parameters: service_name="nginx", priority="err", time_duration="1h"
 84 | 
 85 | # Recent critical events
 86 | Tool: query_systemd_journal
 87 | Parameters: priority="crit", time_duration="24h"
 88 | 
 89 | # SSH authentication logs
 90 | Tool: query_systemd_journal
 91 | Parameters: service_name="sshd", time_duration="6h", max_lines=200
 92 | ```
 93 | """
 94 | 
 95 |         priority_guide = {
 96 |             "err": """
 97 | ### Error Priority Focus
 98 | - **Use for**: Service failures, application errors
 99 | - **Common patterns**: "Failed to start", "Main process exited"
100 | - **Time range**: Start with 1-6 hours, expand if needed
101 | """,
102 |             "warning": """
103 | ### Warning Priority Focus
104 | - **Use for**: Performance issues, non-critical failures
105 | - **Common patterns**: Resource warnings, configuration issues
106 | - **Time range**: 6-24 hours typically sufficient
107 | """,
108 |             "info": """
109 | ### Info Priority Focus
110 | - **Use for**: Normal operations, service status
111 | - **Common patterns**: "Started", "Stopped", "Reloading"
112 | - **Time range**: Keep short (1-2 hours) due to volume
113 | """
114 |         }
115 | 
116 |         if priority and priority.lower() in priority_guide:
117 |             base_guide += priority_guide[priority.lower()]
118 | 
119 |         base_guide += """
120 | ### Priority Levels Reference
121 | - **emerg (0)**: System unusable
122 | - **alert (1)**: Immediate action required
123 | - **crit (2)**: Critical conditions
124 | - **err (3)**: Error conditions
125 | - **warning (4)**: Warning conditions
126 | - **notice (5)**: Normal but significant
127 | - **info (6)**: Informational messages
128 | - **debug (7)**: Debug-level messages
129 | """
130 |         
131 |         return base_guide
132 | 
133 |     @mcp.prompt(
134 |         title="Linux Service Analysis",
135 |         description="Guide for analyzing Linux service status and health"
136 |     )
137 |     async def linux_service_analysis() -> str:
138 |         """
139 |         Analyze Linux services using systemd tools.
140 |         """
141 |         return """
142 | # 🛠️ Linux Service Analysis Guide
143 | 
144 | ## Tool: analyze_linux_services
145 | 
146 | ### Purpose
147 | Lists and analyzes systemd services, identifying failed services and recent issues.
148 | 
149 | ### Parameters
150 | - **service_pattern**: Filter services by name pattern (optional)
151 | - **include_failed**: Include failed services in analysis (default: true)
152 | 
153 | ### Usage Examples
154 | ```
155 | # Check all failed services
156 | Tool: analyze_linux_services
157 | Parameters: include_failed=true
158 | 
159 | # Analyze web services
160 | Tool: analyze_linux_services
161 | Parameters: service_pattern="nginx|apache"
162 | 
163 | # Check database services
164 | Tool: analyze_linux_services
165 | Parameters: service_pattern="mysql|postgres|mongodb"
166 | ```
167 | 
168 | ### What It Provides
169 | ✅ Active and failed service lists
170 | ✅ Recent logs for failed services
171 | ✅ Service health assessment
172 | ✅ Service dependencies and issues
173 | 
174 | ### Common Service States
175 | - **active (running)**: Service operating normally
176 | - **active (exited)**: One-shot service completed
177 | - **inactive (dead)**: Service not running
178 | - **failed**: Service failed to start or crashed
179 | 
180 | ### Troubleshooting Failed Services
181 | 1. Check service logs for error details
182 | 2. Verify configuration files
183 | 3. Check service dependencies
184 | 4. Review system resources
185 | 5. Test service restart capability
186 | """
187 | 
188 |     @mcp.prompt(
189 |         title="Linux System Overview",
190 |         description="Get comprehensive Linux system health information"
191 |     )
192 |     async def linux_system_overview() -> str:
193 |         """
194 |         Guide for getting Linux system overview.
195 |         """
196 |         return """
197 | # 🖥️ Linux System Overview
198 | 
199 | ## Tool: get_linux_system_overview
200 | 
201 | ### Purpose
202 | Provides comprehensive system information and health status for Linux systems.
203 | 
204 | ### Usage
205 | ```
206 | Tool: get_linux_system_overview
207 | ```
208 | 
209 | ### Information Provided
210 | - **System Info**: Hostname, uptime, kernel version
211 | - **Distribution**: OS name and version
212 | - **Resources**: CPU, memory, disk usage
213 | - **Critical Errors**: Recent error logs
214 | - **Service Status**: Failed service count
215 | 
216 | ### Health Indicators
217 | ✅ **Healthy**: No failed services, low resource usage
218 | ⚠️ **Warning**: 1-2 failed services, moderate resource usage
219 | ❌ **Critical**: Multiple failures, high resource usage
220 | 
221 | ### Follow-up Actions
222 | Based on the overview:
223 | 1. Investigate failed services with analyze_linux_services
224 | 2. Check specific errors with query_systemd_journal
225 | 3. Monitor resource usage trends
226 | 4. Plan maintenance if needed
227 | """
228 | 
229 |     @mcp.prompt(
230 |         title="Linux Boot Troubleshooting",
231 |         description="Diagnose Linux boot and startup issues"
232 |     )
233 |     async def linux_boot_troubleshooting() -> str:
234 |         """
235 |         Guide for troubleshooting Linux boot issues.
236 |         """
237 |         return """
238 | # 🚀 Linux Boot Troubleshooting
239 | 
240 | ## Diagnosing Boot Issues
241 | 
242 | ### Step 1: Check Boot Messages
243 | ```
244 | Tool: query_systemd_journal
245 | Parameters: time_duration="2h", max_lines=500
246 | ```
247 | Look for systemd and kernel messages during boot.
248 | 
249 | ### Step 2: Identify Failed Services
250 | ```
251 | Tool: analyze_linux_services
252 | Parameters: include_failed=true
253 | ```
254 | Services that fail during boot often indicate issues.
255 | 
256 | ### Step 3: Check Critical Errors
257 | ```
258 | Tool: query_systemd_journal
259 | Parameters: priority="err", time_duration="1h"
260 | ```
261 | Focus on error messages from boot time.
262 | 
263 | ### Common Boot Problems
264 | 
265 | #### Service Dependency Failures
266 | - **Symptom**: "Dependency failed for..."
267 | - **Check**: Service order and requirements
268 | - **Fix**: Resolve dependent service issues first
269 | 
270 | #### Hardware Initialization
271 | - **Symptom**: Kernel errors, driver failures
272 | - **Check**: dmesg output, kernel logs
273 | - **Fix**: Update drivers, check hardware
274 | 
275 | #### Filesystem Issues
276 | - **Symptom**: Mount failures, read-only root
277 | - **Check**: fstab entries, disk errors
278 | - **Fix**: fsck, correct mount options
279 | 
280 | ### Boot Performance
281 | - Use `systemd-analyze` for boot timing
282 | - Check for slow services
283 | - Optimize service startup order
284 | - Disable unnecessary boot services
285 | """
286 | 
287 |     @mcp.prompt(
288 |         title="Linux Security Monitoring",
289 |         description="Monitor authentication and security events on Linux"
290 |     )
291 |     async def linux_security_monitoring() -> str:
292 |         """
293 |         Guide for monitoring Linux security events.
294 |         """
295 |         return """
296 | # 🔒 Linux Security Monitoring
297 | 
298 | ## Authentication Monitoring
299 | 
300 | ### Step 1: SSH Login Activity
301 | ```
302 | Tool: query_systemd_journal
303 | Parameters: service_name="sshd", time_duration="24h"
304 | ```
305 | 
306 | ### Step 2: Failed Authentication
307 | ```
308 | Tool: query_systemd_journal
309 | Parameters: priority="warning", time_duration="6h"
310 | ```
311 | Look for "Failed password" or "authentication failure".
312 | 
313 | ### Step 3: Sudo Usage
314 | ```
315 | Tool: query_systemd_journal
316 | Parameters: service_name="sudo", time_duration="24h"
317 | ```
318 | 
319 | ## Security Patterns to Watch
320 | 
321 | ### Failed Login Attempts
322 | - Multiple failures from same IP
323 | - Attempts on non-existent users
324 | - Rapid retry patterns
325 | - Unusual login times
326 | 
327 | ### Privilege Escalation
328 | - sudo usage by new users
329 | - Unexpected root processes
330 | - Service account activities
331 | - Permission changes
332 | 
333 | ### System Modifications
334 | - Package installations
335 | - Configuration changes
336 | - New user accounts
337 | - Service modifications
338 | 
339 | ## Security Event Examples
340 | - **"Failed password for"**: SSH authentication failure
341 | - **"Accepted publickey"**: Successful SSH key auth
342 | - **"session opened for user root"**: Root access
343 | - **"COMMAND="**: Sudo command execution
344 | """
345 | 
346 |     @mcp.prompt(
347 |         title="Linux Performance Issues",
348 |         description="Diagnose Linux system performance problems"
349 |     )
350 |     async def linux_performance_issues() -> str:
351 |         """
352 |         Guide for diagnosing Linux performance issues.
353 |         """
354 |         return """
355 | # 📊 Linux Performance Issues
356 | 
357 | ## Investigating Performance Problems
358 | 
359 | ### Step 1: Check System Warnings
360 | ```
361 | Tool: query_systemd_journal
362 | Parameters: priority="warning", time_duration="6h"
363 | ```
364 | Look for resource-related warnings.
365 | 
366 | ### Step 2: Memory Issues
367 | Search for OOM (Out of Memory) events:
368 | ```
369 | Tool: query_systemd_journal
370 | Parameters: time_duration="24h", max_lines=200
371 | ```
372 | Look for "Out of memory" or "killed process".
373 | 
374 | ### Step 3: Disk Space Problems
375 | ```
376 | Tool: query_systemd_journal
377 | Parameters: priority="err", time_duration="12h"
378 | ```
379 | Search for "No space left on device".
380 | 
381 | ### Step 4: Service Performance
382 | ```
383 | Tool: analyze_linux_services
384 | Parameters: include_failed=true
385 | ```
386 | Check for services with performance issues.
387 | 
388 | ## Common Performance Issues
389 | 
390 | ### High Memory Usage
391 | - **Symptoms**: OOM killer activations
392 | - **Investigation**: Check process memory usage
393 | - **Solutions**: Add swap, optimize applications
394 | 
395 | ### Disk I/O Bottlenecks
396 | - **Symptoms**: Slow response, high wait times
397 | - **Investigation**: iostat, iotop results
398 | - **Solutions**: Optimize I/O patterns, upgrade storage
399 | 
400 | ### CPU Saturation
401 | - **Symptoms**: High load average, slow processing
402 | - **Investigation**: Check CPU-intensive processes
403 | - **Solutions**: Optimize code, add CPU resources
404 | 
405 | ### Network Issues
406 | - **Symptoms**: Connection timeouts, packet loss
407 | - **Investigation**: Network service logs
408 | - **Solutions**: Check bandwidth, optimize network
409 | """
410 | 
411 |     @mcp.prompt(
412 |         title="Linux Service Management",
413 |         description="Managing and troubleshooting specific Linux services"
414 |     )
415 |     async def linux_service_management(
416 |         service_type: Optional[str] = None
417 |     ) -> str:
418 |         """
419 |         Guide for managing specific Linux services.
420 |         
421 |         Args:
422 |             service_type: Type of service (web, database, system, etc.)
423 |         """
424 |         
425 |         base_guide = """
426 | # 🛠️ Linux Service Management
427 | 
428 | ## Managing systemd Services
429 | 
430 | ### Check Service Status
431 | ```
432 | Tool: analyze_linux_services
433 | Parameters: service_pattern="service-name"
434 | ```
435 | 
436 | ### View Service Logs
437 | ```
438 | Tool: query_systemd_journal
439 | Parameters: service_name="service-name", time_duration="1h"
440 | ```
441 | 
442 | ### Common Service Operations
443 | - **Start**: systemctl start service-name
444 | - **Stop**: systemctl stop service-name
445 | - **Restart**: systemctl restart service-name
446 | - **Enable**: systemctl enable service-name
447 | - **Status**: systemctl status service-name
448 | """
449 | 
450 |         service_guides = {
451 |             "web": """
452 | ## Web Server Services
453 | 
454 | ### Nginx
455 | ```
456 | Tool: query_systemd_journal
457 | Parameters: service_name="nginx", priority="err"
458 | ```
459 | Common issues: Port conflicts, configuration errors
460 | 
461 | ### Apache
462 | ```
463 | Tool: query_systemd_journal
464 | Parameters: service_name="apache2", priority="err"
465 | ```
466 | Common issues: Module conflicts, .htaccess errors
467 | """,
468 |             "database": """
469 | ## Database Services
470 | 
471 | ### MySQL/MariaDB
472 | ```
473 | Tool: query_systemd_journal
474 | Parameters: service_name="mysql", priority="err"
475 | ```
476 | Common issues: Connection limits, disk space
477 | 
478 | ### PostgreSQL
479 | ```
480 | Tool: query_systemd_journal
481 | Parameters: service_name="postgresql", priority="err"
482 | ```
483 | Common issues: Shared memory, connection pooling
484 | """,
485 |             "system": """
486 | ## System Services
487 | 
488 | ### SSH
489 | ```
490 | Tool: query_systemd_journal
491 | Parameters: service_name="sshd", time_duration="6h"
492 | ```
493 | Monitor: Authentication attempts, configuration
494 | 
495 | ### Cron
496 | ```
497 | Tool: query_systemd_journal
498 | Parameters: service_name="cron", time_duration="24h"
499 | ```
500 | Monitor: Job execution, failures
501 | """
502 |         }
503 | 
504 |         if service_type and service_type.lower() in service_guides:
505 |             base_guide += service_guides[service_type.lower()]
506 | 
507 |         base_guide += """
508 | ## Service Troubleshooting Steps
509 | 1. Check service status and recent logs
510 | 2. Verify configuration files
511 | 3. Check service dependencies
512 | 4. Review resource availability
513 | 5. Test service functionality
514 | 6. Monitor after restart
515 | """
516 |         
517 |         return base_guide
518 | 
519 |     @mcp.prompt(
520 |         title="Linux Log Patterns Reference",
521 |         description="Common Linux log patterns and their meanings"
522 |     )
523 |     async def linux_log_patterns() -> str:
524 |         """
525 |         Reference guide for common Linux log patterns.
526 |         """
527 |         return """
528 | # 📖 Linux Log Patterns Reference
529 | 
530 | ## Service Management Patterns
531 | 
532 | ### Successful Operations
533 | - **"Started [Service]"**: Service startup success
534 | - **"Reloaded [Service]"**: Configuration reload
535 | - **"Listening on"**: Service accepting connections
536 | - **"Reached target"**: systemd target achieved
537 | 
538 | ### Service Failures
539 | - **"Failed to start"**: Startup failure
540 | - **"Main process exited"**: Service crash
541 | - **"Dependency failed"**: Required service unavailable
542 | - **"Timed out"**: Service startup timeout
543 | - **"code=exited, status=1"**: Exit with error
544 | 
545 | ## Security Patterns
546 | 
547 | ### Authentication
548 | - **"Failed password for"**: Login failure
549 | - **"Accepted publickey"**: SSH key success
550 | - **"session opened"**: User session start
551 | - **"session closed"**: User session end
552 | - **"COMMAND="**: Sudo command execution
553 | 
554 | ### Security Events
555 | - **"authentication failure"**: PAM auth fail
556 | - **"Connection closed by"**: Dropped connection
557 | - **"Invalid user"**: Non-existent user login
558 | - **"Connection reset"**: Network interruption
559 | 
560 | ## System Events
561 | 
562 | ### Boot/Shutdown
563 | - **"Booting Linux"**: Kernel boot start
564 | - **"Started Session"**: User session start
565 | - **"Reached target Multi-User"**: Boot complete
566 | - **"Stopped target"**: Shutdown initiated
567 | 
568 | ### Resource Issues
569 | - **"Out of memory"**: OOM killer activated
570 | - **"No space left"**: Disk full
571 | - **"Too many open files"**: File descriptor limit
572 | - **"Cannot allocate memory"**: Memory exhaustion
573 | 
574 | ## Network Patterns
575 | 
576 | ### Connection Events
577 | - **"link is up"**: Network interface active
578 | - **"link is down"**: Network interface inactive
579 | - **"DHCPREQUEST"**: IP address request
580 | - **"DHCPACK"**: IP address assigned
581 | 
582 | ### Network Errors
583 | - **"Name or service not known"**: DNS failure
584 | - **"Connection refused"**: Service not listening
585 | - **"Network is unreachable"**: Routing issue
586 | - **"Connection timed out"**: No response
587 | 
588 | ## Performance Indicators
589 | 
590 | ### Warning Signs
591 | - **"took too long"**: Slow operation
592 | - **"degraded"**: Performance issue
593 | - **"high load"**: System overload
594 | - **"throttling"**: Rate limiting active
595 | 
596 | ### Critical Issues
597 | - **"segfault"**: Memory violation
598 | - **"core dumped"**: Process crash
599 | - **"kernel panic"**: System crash
600 | - **"hung task"**: Process stuck
601 | """
602 | 
603 |     @mcp.prompt(
604 |         title="Linux Distribution Differences",
605 |         description="Guide for log locations across different Linux distributions"
606 |     )
607 |     async def linux_distribution_guide() -> str:
608 |         """
609 |         Guide for handling distribution-specific differences.
610 |         """
611 |         return """
612 | # 🐧 Linux Distribution Differences
613 | 
614 | ## Log File Locations
615 | 
616 | ### Debian/Ubuntu
617 | - **System logs**: /var/log/syslog
618 | - **Auth logs**: /var/log/auth.log
619 | - **Kernel**: /var/log/kern.log
620 | - **Package manager**: /var/log/dpkg.log
621 | 
622 | ### RHEL/CentOS/Fedora
623 | - **System logs**: /var/log/messages
624 | - **Auth logs**: /var/log/secure
625 | - **Kernel**: /var/log/messages
626 | - **Package manager**: /var/log/yum.log
627 | 
628 | ### Arch Linux
629 | - **Primary logging**: systemd journal only
630 | - **Persistent logs**: /var/log/journal/
631 | - **Package manager**: /var/log/pacman.log
632 | 
633 | ### SUSE
634 | - **System logs**: /var/log/messages
635 | - **Auth logs**: /var/log/messages
636 | - **Package manager**: /var/log/zypper.log
637 | 
638 | ## systemd Adoption
639 | 
640 | ### Full systemd
641 | - Ubuntu 16.04+
642 | - Debian 8+
643 | - RHEL/CentOS 7+
644 | - Fedora 15+
645 | - Arch Linux
646 | - openSUSE
647 | 
648 | ### SysV Init or Other
649 | - Older distributions
650 | - Some embedded systems
651 | - Specialized distributions
652 | 
653 | ## Best Practices
654 | 1. Check for systemd first (systemctl available)
655 | 2. Fall back to traditional logs if needed
656 | 3. Use distribution detection for paths
657 | 4. Handle both logging systems when possible
658 | """
659 | 
660 |     @mcp.prompt(
661 |         title="Linux Emergency Diagnostics",
662 |         description="Quick diagnostics for Linux system emergencies"
663 |     )
664 |     async def linux_emergency_diagnostics() -> str:
665 |         """
666 |         Emergency diagnostic procedures for critical Linux issues.
667 |         """
668 |         return """
669 | # 🚨 Linux Emergency Diagnostics
670 | 
671 | ## Critical System Failure
672 | 
673 | ### Phase 1: Initial Assessment (< 2 minutes)
674 | ```
675 | Tool: get_linux_system_overview
676 | ```
677 | Get immediate system status.
678 | 
679 | ### Phase 2: Service Status (2-5 minutes)
680 | ```
681 | Tool: analyze_linux_services
682 | Parameters: include_failed=true
683 | ```
684 | Identify all failed services.
685 | 
686 | ### Phase 3: Recent Errors (5-10 minutes)
687 | ```
688 | Tool: query_systemd_journal
689 | Parameters: priority="err", time_duration="1h"
690 | ```
691 | Find recent critical errors.
692 | 
693 | ## Emergency Scenarios
694 | 
695 | ### System Won't Boot
696 | 1. Check journal from rescue mode
697 | 2. Look for kernel panic messages
698 | 3. Verify filesystem integrity
699 | 4. Check hardware initialization
700 | 
701 | ### All Services Failing
702 | 1. Check system resources (disk, memory)
703 | 2. Verify systemd functionality
704 | 3. Check for dependency loops
705 | 4. Review recent system changes
706 | 
707 | ### Performance Crisis
708 | 1. Check for OOM killer activity
709 | 2. Look for disk full errors
710 | 3. Monitor CPU/memory usage
711 | 4. Identify resource hogs
712 | 
713 | ### Security Breach
714 | 1. Check authentication logs immediately
715 | 2. Look for privilege escalations
716 | 3. Monitor network connections
717 | 4. Review system modifications
718 | 
719 | ## Recovery Actions
720 | 
721 | ### Service Recovery
722 | - Restart failed services systematically
723 | - Check service dependencies first
724 | - Monitor logs during restart
725 | - Verify functionality after start
726 | 
727 | ### Resource Recovery
728 | - Free disk space (logs, temp files)
729 | - Kill memory-intensive processes
730 | - Clear system caches if needed
731 | - Add swap space temporarily
732 | 
733 | ### Access Recovery
734 | - Reset service configurations
735 | - Restore from known-good backups
736 | - Check file permissions
737 | - Verify network connectivity
738 | 
739 | ## Critical Commands Reference
740 | - **Journal since boot**: journalctl -b
741 | - **Follow live logs**: journalctl -f
742 | - **System status**: systemctl status
743 | - **Failed services**: systemctl --failed
744 | - **Resource usage**: top, htop, free, df
745 | """
```

--------------------------------------------------------------------------------
/src/mcp_log_analyzer/mcp_server/tools/windows_test_tools.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Windows Event Log testing and diagnostic MCP tools.
  3 | """
  4 | 
  5 | from typing import Any, Dict
  6 | 
  7 | from mcp.server import FastMCP
  8 | from pydantic import BaseModel, Field
  9 | 
 10 | 
 11 | class WindowsEventLogTestRequest(BaseModel):
 12 |     """Request model for testing Windows Event Log access."""
 13 | 
 14 |     log_name: str = Field(
 15 |         "System", description="Event log name to test (System, Application, Security)"
 16 |     )
 17 |     max_entries: int = Field(10, description="Maximum number of entries to fetch")
 18 | 
 19 | 
 20 | class WindowsEventLogQueryRequest(BaseModel):
 21 |     """Request model for querying Windows Event Logs."""
 22 | 
 23 |     log_name: str = Field("System", description="Event log name to query")
 24 |     event_id: int = Field(None, description="Specific Event ID to filter by")
 25 |     level: str = Field(None, description="Event level (Error, Warning, Information)")
 26 |     time_duration: str = Field(
 27 |         "1h", description="Time duration (e.g., '30m', '2h', '1d')"
 28 |     )
 29 |     max_entries: int = Field(50, description="Maximum number of entries to return")
 30 | 
 31 | 
 32 | def register_windows_test_tools(mcp: FastMCP):
 33 |     """Register all Windows testing tools with the MCP server."""
 34 | 
 35 |     @mcp.tool()
 36 |     async def test_windows_event_log_access() -> Dict[str, Any]:
 37 |         """
 38 |         Test Windows Event Log access and permissions.
 39 | 
 40 |         This tool checks if the system can access Windows Event Logs
 41 |         and provides diagnostic information about available logs.
 42 |         """
 43 |         import platform
 44 | 
 45 |         if platform.system() != "Windows":
 46 |             return {
 47 |                 "status": "unavailable",
 48 |                 "message": "Windows Event Logs are only available on Windows systems",
 49 |                 "platform": platform.system(),
 50 |             }
 51 | 
 52 |         try:
 53 |             import win32evtlog
 54 | 
 55 |             # Test access to common event logs
 56 |             test_results = {}
 57 |             common_logs = ["System", "Application", "Security"]
 58 | 
 59 |             for log_name in common_logs:
 60 |                 try:
 61 |                     hand = win32evtlog.OpenEventLog(None, log_name)
 62 |                     win32evtlog.CloseEventLog(hand)
 63 |                     test_results[log_name] = {"accessible": True, "error": None}
 64 |                 except Exception as e:
 65 |                     test_results[log_name] = {"accessible": False, "error": str(e)}
 66 | 
 67 |             return {
 68 |                 "status": "available",
 69 |                 "message": "Windows Event Log access test completed",
 70 |                 "log_access": test_results,
 71 |                 "pywin32_available": True,
 72 |             }
 73 | 
 74 |         except ImportError:
 75 |             return {
 76 |                 "status": "missing_dependencies",
 77 |                 "message": "pywin32 package is required for Windows Event Log access",
 78 |                 "pywin32_available": False,
 79 |             }
 80 |         except Exception as e:
 81 |             return {
 82 |                 "status": "error",
 83 |                 "message": f"Error testing Windows Event Log access: {str(e)}",
 84 |             }
 85 | 
 86 |     @mcp.tool()
 87 |     async def get_windows_event_log_info(
 88 |         request: WindowsEventLogTestRequest,
 89 |     ) -> Dict[str, Any]:
 90 |         """
 91 |         Get detailed information about a specific Windows Event Log.
 92 | 
 93 |         This tool provides metadata and recent entries from the specified
 94 |         Windows Event Log for diagnostic purposes.
 95 |         """
 96 |         import platform
 97 | 
 98 |         if platform.system() != "Windows":
 99 |             return {"error": "This tool is only available on Windows systems"}
100 | 
101 |         try:
102 |             import win32evtlog
103 |             import win32evtlogutil
104 |             from win32con import EVENTLOG_BACKWARDS_READ, EVENTLOG_SEQUENTIAL_READ
105 | 
106 |             hand = win32evtlog.OpenEventLog(None, request.log_name)
107 | 
108 |             # Get log information
109 |             try:
110 |                 num_records = win32evtlog.GetNumberOfEventLogRecords(hand)
111 |                 oldest_record = win32evtlog.GetOldestEventLogRecord(hand)
112 |                 info = (oldest_record, num_records)
113 |             except:
114 |                 info = None
115 | 
116 |             # Get recent entries
117 |             flags = EVENTLOG_BACKWARDS_READ | EVENTLOG_SEQUENTIAL_READ
118 | 
119 |             entries = []
120 |             count = 0
121 | 
122 |             while count < request.max_entries:
123 |                 events = win32evtlog.ReadEventLog(hand, flags, 0)
124 |                 if not events:
125 |                     break  # No more events to read
126 | 
127 |                 for event in events:
128 |                     if count >= request.max_entries:
129 |                         break
130 | 
131 |                     try:
132 |                         message = win32evtlogutil.SafeFormatMessage(
133 |                             event, request.log_name
134 |                         )
135 |                     except:
136 |                         message = "(Unable to format message)"
137 | 
138 |                     entries.append(
139 |                         {
140 |                             "event_id": event.EventID
141 |                             & 0xFFFFFFFF,  # Convert to unsigned
142 |                             "time_generated": str(event.TimeGenerated),
143 |                             "source_name": event.SourceName,
144 |                             "event_type": event.EventType,
145 |                             "message_preview": message[:200] if message else "",
146 |                         }
147 |                     )
148 |                     count += 1
149 | 
150 |             win32evtlog.CloseEventLog(hand)
151 | 
152 |             return {
153 |                 "log_name": request.log_name,
154 |                 "log_info": {
155 |                     "oldest_record_number": info[0] if info else "Unknown",
156 |                     "total_records": info[1] if info else "Unknown",
157 |                 },
158 |                 "recent_entries": entries,
159 |                 "entries_retrieved": len(entries),
160 |                 "max_requested": request.max_entries,
161 |             }
162 | 
163 |         except ImportError:
164 |             return {"error": "pywin32 package is required for Windows Event Log access"}
165 |         except Exception as e:
166 |             return {"error": f"Error accessing Windows Event Log: {str(e)}"}
167 | 
168 |     @mcp.tool()
169 |     async def query_windows_events_by_criteria(
170 |         request: WindowsEventLogQueryRequest,
171 |     ) -> Dict[str, Any]:
172 |         """
173 |         Query Windows Event Logs with specific criteria.
174 | 
175 |         This tool allows filtering Windows Event Logs by Event ID,
176 |         level, and time range for targeted analysis.
177 |         """
178 |         import platform
179 | 
180 |         if platform.system() != "Windows":
181 |             return {"error": "This tool is only available on Windows systems"}
182 | 
183 |         try:
184 |             import win32evtlog
185 |             import win32evtlogutil
186 |             from win32con import EVENTLOG_BACKWARDS_READ, EVENTLOG_SEQUENTIAL_READ
187 |             import xml.etree.ElementTree as ET
188 |             from datetime import datetime
189 | 
190 |             from ..server import parse_time_param
191 | 
192 |             # Parse time duration
193 |             if request.time_duration:
194 |                 start_time = parse_time_param(request.time_duration)
195 |             else:
196 |                 start_time = None
197 | 
198 |             matching_events = []
199 |             count = 0
200 |             total_checked = 0
201 |             level_map = {1: "Error", 2: "Warning", 4: "Information"}
202 | 
203 |             # Check if this is a custom Application and Services log
204 |             if "/" in request.log_name or "\\" in request.log_name:
205 |                 # Use newer EvtQuery API for custom logs
206 |                 try:
207 |                     query_flags = (
208 |                         win32evtlog.EvtQueryChannelPath
209 |                         | win32evtlog.EvtQueryReverseDirection
210 |                     )
211 | 
212 |                     # Build XPath query
213 |                     conditions = []
214 |                     if start_time:
215 |                         start_ms = int(start_time.timestamp() * 1000)
216 |                         conditions.append(f"TimeCreated[@SystemTime >= '{start_ms}']")
217 |                     if request.event_id:
218 |                         conditions.append(f"EventID={request.event_id}")
219 |                     if request.level:
220 |                         level_num = {"error": 2, "warning": 3, "information": 4}.get(
221 |                             request.level.lower(), 0
222 |                         )
223 |                         if level_num:
224 |                             conditions.append(f"Level={level_num}")
225 | 
226 |                     xpath_query = "*"
227 |                     if conditions:
228 |                         xpath_query = f"*[System[{' and '.join(conditions)}]]"
229 | 
230 |                     query_handle = win32evtlog.EvtQuery(
231 |                         request.log_name, query_flags, xpath_query
232 |                     )
233 | 
234 |                     while count < request.max_entries:
235 |                         events = win32evtlog.EvtNext(query_handle, 10)
236 |                         if not events:
237 |                             break
238 | 
239 |                         for event in events:
240 |                             total_checked += 1
241 | 
242 |                             # Render event as XML
243 |                             xml_content = win32evtlog.EvtRender(
244 |                                 event, win32evtlog.EvtRenderEventXml
245 |                             )
246 | 
247 |                             # Parse XML to extract event data
248 |                             root = ET.fromstring(xml_content)
249 |                             system = root.find(".//System")
250 | 
251 |                             event_id = (
252 |                                 int(system.find("EventID").text)
253 |                                 if system.find("EventID") is not None
254 |                                 else 0
255 |                             )
256 |                             event_id = event_id & 0xFFFFFFFF
257 | 
258 |                             provider = system.find("Provider")
259 |                             source_name = (
260 |                                 provider.get("Name", "Unknown")
261 |                                 if provider is not None
262 |                                 else "Unknown"
263 |                             )
264 | 
265 |                             time_created = system.find("TimeCreated")
266 |                             if time_created is not None:
267 |                                 time_str = time_created.get(
268 |                                     "SystemTime", str(datetime.now())
269 |                                 )
270 |                             else:
271 |                                 time_str = str(datetime.now())
272 | 
273 |                             level = system.find("Level")
274 |                             event_type = int(level.text) if level is not None else 4
275 | 
276 |                             # Extract message
277 |                             message = ""
278 |                             event_data = root.find(".//EventData")
279 |                             if event_data is not None:
280 |                                 data_items = []
281 |                                 for data in event_data:
282 |                                     name = data.get("Name", "")
283 |                                     value = data.text or ""
284 |                                     if name:
285 |                                         data_items.append(f"{name}: {value}")
286 |                                 message = "; ".join(data_items)
287 | 
288 |                             matching_events.append(
289 |                                 {
290 |                                     "event_id": event_id,
291 |                                     "time_generated": time_str,
292 |                                     "source_name": source_name,
293 |                                     "event_type": event_type,
294 |                                     "level": level_map.get(event_type, "Unknown"),
295 |                                     "message": message[:500] if message else "",
296 |                                 }
297 |                             )
298 | 
299 |                             count += 1
300 |                             win32evtlog.EvtClose(event)
301 | 
302 |                             if count >= request.max_entries:
303 |                                 break
304 | 
305 |                     win32evtlog.EvtClose(query_handle)
306 | 
307 |                 except Exception as e:
308 |                     return {"error": f"Error querying custom event log: {str(e)}"}
309 |             else:
310 |                 # Use legacy API for standard logs
311 |                 hand = win32evtlog.OpenEventLog(None, request.log_name)
312 |                 flags = EVENTLOG_BACKWARDS_READ | EVENTLOG_SEQUENTIAL_READ
313 | 
314 |                 # Continue reading until we have enough matching events or no more events
315 |                 while count < request.max_entries:
316 |                     events = win32evtlog.ReadEventLog(hand, flags, 0)
317 |                     if not events:
318 |                         break  # No more events to read
319 | 
320 |                     for event in events:
321 |                         total_checked += 1
322 | 
323 |                         # Check time filter
324 |                         if start_time and event.TimeGenerated < start_time:
325 |                             continue
326 | 
327 |                         # Check Event ID filter
328 |                         # Handle both signed and unsigned Event ID comparisons
329 |                         if request.event_id:
330 |                             # Convert to unsigned 32-bit for comparison
331 |                             event_id_unsigned = event.EventID & 0xFFFFFFFF
332 |                             if (
333 |                                 event_id_unsigned != request.event_id
334 |                                 and event.EventID != request.event_id
335 |                             ):
336 |                                 continue
337 | 
338 |                         # Check level filter (simplified mapping)
339 |                         if request.level:
340 |                             event_level = level_map.get(event.EventType, "Unknown")
341 |                             if event_level.lower() != request.level.lower():
342 |                                 continue
343 | 
344 |                         # Event matches all criteria
345 |                         try:
346 |                             message = win32evtlogutil.SafeFormatMessage(
347 |                                 event, request.log_name
348 |                             )
349 |                         except:
350 |                             message = "(Unable to format message)"
351 | 
352 |                         matching_events.append(
353 |                             {
354 |                                 "event_id": event.EventID
355 |                                 & 0xFFFFFFFF,  # Convert to unsigned
356 |                                 "time_generated": str(event.TimeGenerated),
357 |                                 "source_name": event.SourceName,
358 |                                 "event_type": event.EventType,
359 |                                 "level": level_map.get(event.EventType, "Unknown"),
360 |                                 "message": message[:500] if message else "",
361 |                             }
362 |                         )
363 | 
364 |                         count += 1
365 |                         if count >= request.max_entries:
366 |                             break
367 | 
368 |                 win32evtlog.CloseEventLog(hand)
369 | 
370 |             return {
371 |                 "log_name": request.log_name,
372 |                 "query_criteria": {
373 |                     "event_id": request.event_id,
374 |                     "level": request.level,
375 |                     "time_duration": request.time_duration,
376 |                     "start_time": str(start_time) if start_time else None,
377 |                 },
378 |                 "matching_events": matching_events,
379 |                 "total_matches": len(matching_events),
380 |                 "total_events_checked": total_checked,
381 |                 "max_requested": request.max_entries,
382 |             }
383 | 
384 |         except ImportError:
385 |             return {"error": "pywin32 package is required for Windows Event Log access"}
386 |         except Exception as e:
387 |             return {"error": f"Error querying Windows Event Logs: {str(e)}"}
388 | 
389 |     @mcp.tool()
390 |     async def get_windows_system_health() -> Dict[str, Any]:
391 |         """
392 |         Get Windows system health overview from Event Logs.
393 | 
394 |         This tool analyzes recent System and Application event logs
395 |         to provide a quick health assessment of the Windows system.
396 |         """
397 |         import platform
398 | 
399 |         if platform.system() != "Windows":
400 |             return {"error": "This tool is only available on Windows systems"}
401 | 
402 |         try:
403 |             from datetime import datetime, timedelta
404 | 
405 |             import win32evtlog
406 |             import win32evtlogutil
407 |             from win32con import EVENTLOG_BACKWARDS_READ, EVENTLOG_SEQUENTIAL_READ
408 | 
409 |             # Check last 24 hours
410 |             start_time = datetime.now() - timedelta(hours=24)
411 | 
412 |             health_summary = {"errors": 0, "warnings": 0, "critical_events": []}
413 | 
414 |             for log_name in ["System", "Application"]:
415 |                 try:
416 |                     hand = win32evtlog.OpenEventLog(None, log_name)
417 |                     flags = EVENTLOG_BACKWARDS_READ | EVENTLOG_SEQUENTIAL_READ
418 | 
419 |                     log_errors = 0
420 |                     log_warnings = 0
421 |                     done_reading = False
422 | 
423 |                     while not done_reading:
424 |                         events = win32evtlog.ReadEventLog(hand, flags, 0)
425 |                         if not events:
426 |                             break  # No more events to read
427 | 
428 |                         for event in events:
429 |                             if event.TimeGenerated < start_time:
430 |                                 done_reading = True
431 |                                 break
432 | 
433 |                             if event.EventType == 1:  # Error
434 |                                 log_errors += 1
435 |                                 if log_errors <= 5:  # Capture first 5 errors
436 |                                     try:
437 |                                         message = win32evtlogutil.SafeFormatMessage(
438 |                                             event, log_name
439 |                                         )
440 |                                     except:
441 |                                         message = "Unable to format message"
442 | 
443 |                                     health_summary["critical_events"].append(
444 |                                         {
445 |                                             "log": log_name,
446 |                                             "type": "Error",
447 |                                             "event_id": event.EventID
448 |                                             & 0xFFFFFFFF,  # Convert to unsigned
449 |                                             "source": event.SourceName,
450 |                                             "time": str(event.TimeGenerated),
451 |                                             "message": message[:200],
452 |                                         }
453 |                                     )
454 | 
455 |                             elif event.EventType == 2:  # Warning
456 |                                 log_warnings += 1
457 | 
458 |                     health_summary["errors"] += log_errors
459 |                     health_summary["warnings"] += log_warnings
460 | 
461 |                     win32evtlog.CloseEventLog(hand)
462 | 
463 |                 except Exception as e:
464 |                     health_summary[f"{log_name}_error"] = str(e)
465 | 
466 |             # Determine overall health status
467 |             if health_summary["errors"] == 0 and health_summary["warnings"] < 5:
468 |                 status = "healthy"
469 |             elif health_summary["errors"] < 3 and health_summary["warnings"] < 20:
470 |                 status = "fair"
471 |             else:
472 |                 status = "concerning"
473 | 
474 |             return {
475 |                 "time_period": "Last 24 hours",
476 |                 "overall_status": status,
477 |                 "summary": {
478 |                     "total_errors": health_summary["errors"],
479 |                     "total_warnings": health_summary["warnings"],
480 |                 },
481 |                 "critical_events": health_summary["critical_events"],
482 |                 "timestamp": str(datetime.now()),
483 |             }
484 | 
485 |         except ImportError:
486 |             return {"error": "pywin32 package is required for Windows Event Log access"}
487 |         except Exception as e:
488 |             return {"error": f"Error analyzing Windows system health: {str(e)}"}
489 | 
```

--------------------------------------------------------------------------------
/src/mcp_log_analyzer/mcp_server/prompts/process_monitoring_prompt.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Process monitoring and system resource prompts for the MCP Log Analyzer server.
  3 | """
  4 | 
  5 | from typing import Optional
  6 | from mcp.server import FastMCP
  7 | 
  8 | 
  9 | def register_process_monitoring_prompts(mcp: FastMCP):
 10 |     """Register all process monitoring prompts."""
 11 | 
 12 |     @mcp.prompt(
 13 |         title="Test System Resources",
 14 |         description="Guide for testing system resource monitoring capabilities"
 15 |     )
 16 |     async def test_system_resources() -> str:
 17 |         """
 18 |         Test system resource monitoring access and capabilities.
 19 |         """
 20 |         return """
 21 | # 🔍 Test System Resources
 22 | 
 23 | ## Tool: test_system_resources_access
 24 | 
 25 | ### Purpose
 26 | Tests CPU, memory, disk, network, and process monitoring capabilities.
 27 | 
 28 | ### What It Tests
 29 | - **CPU**: Core count, usage, frequency
 30 | - **Memory**: Total, available, usage percentage
 31 | - **Disk**: Space usage, I/O counters
 32 | - **Network**: Traffic statistics, connections
 33 | - **Processes**: Enumeration and basic info
 34 | 
 35 | ### Usage
 36 | ```
 37 | Tool: test_system_resources_access
 38 | ```
 39 | 
 40 | ### Interpreting Results
 41 | - **psutil version**: Library version for compatibility
 42 | - **Accessible resources**: What can be monitored
 43 | - **Current values**: Baseline measurements
 44 | - **Platform notes**: OS-specific capabilities
 45 | 
 46 | ### Next Steps
 47 | - Use specialized monitoring tools for details
 48 | - Set up regular monitoring if needed
 49 | - Note any access limitations
 50 | - Plan resource tracking strategy
 51 | """
 52 | 
 53 |     @mcp.prompt(
 54 |         title="System Performance Analysis",
 55 |         description="Guide for analyzing current system performance"
 56 |     )
 57 |     async def system_performance_guide() -> str:
 58 |         """
 59 |         Analyze system performance and resource usage.
 60 |         """
 61 |         return """
 62 | # 📊 System Performance Analysis
 63 | 
 64 | ## Tool: analyze_system_performance
 65 | 
 66 | ### Purpose
 67 | Provides comprehensive analysis of system performance metrics.
 68 | 
 69 | ### Parameters
 70 | - **include_network**: Include network statistics (default: true)
 71 | - **include_disk**: Include disk I/O statistics (default: true)
 72 | - **sample_interval**: Sampling interval in seconds (default: 1.0)
 73 | 
 74 | ### Usage Examples
 75 | ```
 76 | # Full system analysis
 77 | Tool: analyze_system_performance
 78 | 
 79 | # CPU and memory only (faster)
 80 | Tool: analyze_system_performance
 81 | Parameters: include_network=false, include_disk=false
 82 | 
 83 | # Extended sampling
 84 | Tool: analyze_system_performance
 85 | Parameters: sample_interval=5.0
 86 | ```
 87 | 
 88 | ### Performance Metrics
 89 | 
 90 | #### CPU Metrics
 91 | - **Usage percent**: Current utilization
 92 | - **Core count**: Physical and logical cores
 93 | - **Frequency**: Current/min/max MHz
 94 | - **Load average**: 1/5/15 minute (Unix)
 95 | 
 96 | #### Memory Metrics
 97 | - **Virtual memory**: Physical RAM usage
 98 | - **Swap memory**: Virtual memory usage
 99 | - **Available**: Memory for new processes
100 | - **Percent used**: Overall utilization
101 | 
102 | #### Disk Metrics
103 | - **Usage**: Total/used/free space
104 | - **I/O counters**: Read/write operations
105 | - **Throughput**: Bytes read/written
106 | 
107 | #### Network Metrics
108 | - **Traffic**: Bytes sent/received
109 | - **Packets**: Count sent/received
110 | - **Connections**: Active connection count
111 | 
112 | ### Health Status
113 | - **Good**: Normal resource usage
114 | - **Fair**: Elevated but manageable
115 | - **Concerning**: Action needed
116 | """
117 | 
118 |     @mcp.prompt(
119 |         title="Find Resource-Intensive Processes",
120 |         description="Guide for identifying high CPU and memory consumers"
121 |     )
122 |     async def resource_intensive_processes(
123 |         sort_by: Optional[str] = None
124 |     ) -> str:
125 |         """
126 |         Find processes consuming significant resources.
127 |         
128 |         Args:
129 |             sort_by: Sort criteria (cpu, memory, pid)
130 |         """
131 |         
132 |         base_guide = """
133 | # 🔍 Find Resource-Intensive Processes
134 | 
135 | ## Tool: find_resource_intensive_processes
136 | 
137 | ### Purpose
138 | Identifies processes with high CPU or memory usage for troubleshooting.
139 | 
140 | ### Parameters
141 | - **process_name**: Filter by name (optional)
142 | - **min_cpu_percent**: CPU threshold (default: 0.0)
143 | - **min_memory_percent**: Memory threshold (default: 0.0)
144 | - **max_results**: Result limit (default: 20)
145 | - **sort_by**: Sort order (cpu/memory/pid)
146 | 
147 | ### Usage Examples
148 | ```
149 | # Top CPU consumers
150 | Tool: find_resource_intensive_processes
151 | Parameters: min_cpu_percent=10, sort_by="cpu"
152 | 
153 | # Memory hogs (>5% memory)
154 | Tool: find_resource_intensive_processes
155 | Parameters: min_memory_percent=5, sort_by="memory"
156 | 
157 | # Specific process analysis
158 | Tool: find_resource_intensive_processes
159 | Parameters: process_name="python", sort_by="memory"
160 | ```
161 | """
162 | 
163 |         sort_guides = {
164 |             "cpu": """
165 | ### Sorting by CPU
166 | Best for identifying:
167 | - Runaway processes
168 | - CPU-intensive tasks
169 | - Performance bottlenecks
170 | - Hung applications
171 | 
172 | Look for:
173 | - Consistently high CPU usage
174 | - Unexpected CPU consumers
175 | - Multiple instances of same process
176 | """,
177 |             "memory": """
178 | ### Sorting by Memory
179 | Best for identifying:
180 | - Memory leaks
181 | - Resource-heavy applications
182 | - Cache/buffer usage
183 | - Potential OOM risks
184 | 
185 | Look for:
186 | - Growing memory usage
187 | - Disproportionate memory use
188 | - Zombie processes
189 | """,
190 |             "pid": """
191 | ### Sorting by PID
192 | Best for:
193 | - Chronological process view
194 | - Parent-child relationships
195 | - System vs user processes
196 | - Process lifecycle analysis
197 | 
198 | Lower PIDs are typically system processes.
199 | """
200 |         }
201 | 
202 |         if sort_by and sort_by.lower() in sort_guides:
203 |             base_guide += sort_guides[sort_by.lower()]
204 | 
205 |         base_guide += """
206 | ### Process Information Provided
207 | - **PID**: Process identifier
208 | - **Name**: Process name
209 | - **CPU %**: Current CPU usage
210 | - **Memory %**: Memory usage percentage
211 | - **Memory MB**: Actual memory in MB
212 | - **Status**: Running/sleeping/zombie
213 | - **Command**: Full command line
214 | - **Create time**: Process start time
215 | """
216 |         
217 |         return base_guide
218 | 
219 |     @mcp.prompt(
220 |         title="Monitor Process Health",
221 |         description="Guide for monitoring specific process health and status"
222 |     )
223 |     async def monitor_process_health() -> str:
224 |         """
225 |         Monitor health of a specific process.
226 |         """
227 |         return """
228 | # 🎯 Monitor Process Health
229 | 
230 | ## Tool: monitor_process_health
231 | 
232 | ### Purpose
233 | Monitors specific process health, resource usage, and potential issues.
234 | 
235 | ### Parameters
236 | - **process_name**: Name of process to monitor (required)
237 | 
238 | ### Usage Examples
239 | ```
240 | # Monitor web server
241 | Tool: monitor_process_health
242 | Parameters: process_name="nginx"
243 | 
244 | # Monitor database
245 | Tool: monitor_process_health
246 | Parameters: process_name="postgres"
247 | 
248 | # Monitor custom application
249 | Tool: monitor_process_health
250 | Parameters: process_name="myapp"
251 | ```
252 | 
253 | ### Health Indicators
254 | 
255 | #### Healthy Process
256 | ✅ Stable CPU usage
257 | ✅ Consistent memory usage
258 | ✅ Normal connection count
259 | ✅ Appropriate age for service
260 | ✅ Status: running
261 | 
262 | #### Warning Signs
263 | ⚠️ High CPU spikes
264 | ⚠️ Growing memory usage
265 | ⚠️ Many connections
266 | ⚠️ Recent restarts
267 | ⚠️ Status: sleeping (if unexpected)
268 | 
269 | #### Critical Issues
270 | ❌ Excessive CPU usage
271 | ❌ Memory leak indicators
272 | ❌ Connection exhaustion
273 | ❌ Very recent start (crash?)
274 | ❌ Status: zombie
275 | 
276 | ### Multiple Instances
277 | Tool handles multiple processes with same name:
278 | - Reports each instance separately
279 | - Shows total resource usage
280 | - Identifies newest/oldest instances
281 | 
282 | ### Follow-up Actions
283 | 1. Check process logs for errors
284 | 2. Monitor trends over time
285 | 3. Compare with baseline values
286 | 4. Investigate recent restarts
287 | 5. Check system resources
288 | """
289 | 
290 |     @mcp.prompt(
291 |         title="System Health Summary",
292 |         description="Get comprehensive system health overview"
293 |     )
294 |     async def system_health_summary() -> str:
295 |         """
296 |         Overall system health assessment guide.
297 |         """
298 |         return """
299 | # 📊 System Health Summary
300 | 
301 | ## Tool: get_system_health_summary
302 | 
303 | ### Purpose
304 | Provides overall system health assessment with resource usage and top consumers.
305 | 
306 | ### Usage
307 | ```
308 | Tool: get_system_health_summary
309 | ```
310 | 
311 | ### Health Score Interpretation
312 | - **80-100**: Excellent - System running smoothly
313 | - **60-79**: Good - Minor resource usage
314 | - **40-59**: Fair - Moderate load, monitor closely
315 | - **20-39**: Poor - High resource usage
316 | - **0-19**: Critical - Immediate action needed
317 | 
318 | ### Information Provided
319 | 
320 | #### Resource Summary
321 | - CPU usage percentage
322 | - Memory usage percentage
323 | - Disk usage percentage
324 | - Total process count
325 | 
326 | #### Top Consumers
327 | - Top 5 CPU processes
328 | - Top 5 memory processes
329 | - Resource usage details
330 | - Process command lines
331 | 
332 | #### Health Assessment
333 | - Overall status (excellent/good/fair/poor/critical)
334 | - Identified issues
335 | - Recommended actions
336 | - Timestamp
337 | 
338 | ### Common Issues Detected
339 | - High CPU usage (>80%)
340 | - High memory usage (>90%)
341 | - Excessive disk usage (>95%)
342 | - Too many processes (>1000)
343 | - Resource exhaustion risks
344 | 
345 | ### Using the Summary
346 | 1. Quick daily health checks
347 | 2. Baseline establishment
348 | 3. Trend monitoring
349 | 4. Capacity planning
350 | 5. Problem identification
351 | """
352 | 
353 |     @mcp.prompt(
354 |         title="CPU Troubleshooting",
355 |         description="Diagnose and resolve high CPU usage issues"
356 |     )
357 |     async def cpu_troubleshooting() -> str:
358 |         """
359 |         Guide for troubleshooting CPU-related issues.
360 |         """
361 |         return """
362 | # 🔥 CPU Troubleshooting Guide
363 | 
364 | ## Diagnosing High CPU Usage
365 | 
366 | ### Step 1: Identify CPU Consumers
367 | ```
368 | Tool: find_resource_intensive_processes
369 | Parameters: min_cpu_percent=20, sort_by="cpu"
370 | ```
371 | 
372 | ### Step 2: Analyze System Performance
373 | ```
374 | Tool: analyze_system_performance
375 | Parameters: include_disk=false, include_network=false
376 | ```
377 | 
378 | ### Step 3: Monitor Specific Process
379 | ```
380 | Tool: monitor_process_health
381 | Parameters: process_name="high-cpu-process"
382 | ```
383 | 
384 | ## Common CPU Issues
385 | 
386 | ### Runaway Process
387 | **Symptoms**: Single process at 100% CPU
388 | **Causes**:
389 | - Infinite loops
390 | - Busy waiting
391 | - Algorithm issues
392 | 
393 | **Solutions**:
394 | - Restart the process
395 | - Debug application code
396 | - Apply CPU limits
397 | 
398 | ### System Overload
399 | **Symptoms**: Multiple processes high CPU
400 | **Causes**:
401 | - Too many concurrent tasks
402 | - Insufficient CPU cores
403 | - Background jobs
404 | 
405 | **Solutions**:
406 | - Reduce concurrent load
407 | - Schedule tasks off-peak
408 | - Upgrade CPU resources
409 | 
410 | ### CPU Thrashing
411 | **Symptoms**: Rapid CPU spikes
412 | **Causes**:
413 | - Context switching
414 | - Memory pressure
415 | - I/O wait
416 | 
417 | **Solutions**:
418 | - Reduce process count
419 | - Increase memory
420 | - Optimize I/O operations
421 | 
422 | ## Investigation Checklist
423 | - [ ] Check top CPU consumers
424 | - [ ] Review process command lines
425 | - [ ] Monitor CPU trends
426 | - [ ] Check load average
427 | - [ ] Verify cooling/throttling
428 | - [ ] Review recent changes
429 | """
430 | 
431 |     @mcp.prompt(
432 |         title="Memory Troubleshooting",
433 |         description="Diagnose and resolve memory usage issues"
434 |     )
435 |     async def memory_troubleshooting() -> str:
436 |         """
437 |         Guide for troubleshooting memory-related issues.
438 |         """
439 |         return """
440 | # 💾 Memory Troubleshooting Guide
441 | 
442 | ## Diagnosing High Memory Usage
443 | 
444 | ### Step 1: Find Memory Consumers
445 | ```
446 | Tool: find_resource_intensive_processes
447 | Parameters: min_memory_percent=5, sort_by="memory"
448 | ```
449 | 
450 | ### Step 2: Check System Memory
451 | ```
452 | Tool: analyze_system_performance
453 | Parameters: include_disk=false, include_network=false
454 | ```
455 | 
456 | ### Step 3: Monitor for Leaks
457 | ```
458 | Tool: monitor_process_health
459 | Parameters: process_name="suspected-process"
460 | ```
461 | Watch for growing memory over time.
462 | 
463 | ## Common Memory Issues
464 | 
465 | ### Memory Leaks
466 | **Symptoms**: 
467 | - Gradual memory increase
468 | - Never releases memory
469 | - Eventually crashes
470 | 
471 | **Detection**:
472 | - Monitor process over hours/days
473 | - Check memory vs process age
474 | - Look for linear growth
475 | 
476 | **Solutions**:
477 | - Restart process periodically
478 | - Fix application code
479 | - Implement memory limits
480 | 
481 | ### Memory Exhaustion
482 | **Symptoms**:
483 | - System using 95%+ memory
484 | - Heavy swap usage
485 | - System slowdown
486 | 
487 | **Solutions**:
488 | - Kill unnecessary processes
489 | - Add more RAM
490 | - Configure swap space
491 | - Optimize applications
492 | 
493 | ### Cache/Buffer Usage
494 | **Symptoms**:
495 | - High memory usage
496 | - But available memory exists
497 | - System performs well
498 | 
499 | **Note**: This is normal Linux behavior.
500 | Cache/buffers are released when needed.
501 | 
502 | ## Memory Analysis Steps
503 | 1. Identify top consumers
504 | 2. Check for growth patterns
505 | 3. Monitor swap usage
506 | 4. Review OOM killer logs
507 | 5. Calculate actual free memory
508 | 6. Plan capacity upgrades
509 | """
510 | 
511 |     @mcp.prompt(
512 |         title="Process Monitoring Best Practices",
513 |         description="Best practices for effective process monitoring"
514 |     )
515 |     async def process_monitoring_practices() -> str:
516 |         """
517 |         Best practices guide for process monitoring.
518 |         """
519 |         return """
520 | # 📋 Process Monitoring Best Practices
521 | 
522 | ## Establishing Baselines
523 | 
524 | ### Initial Baseline
525 | 1. Run system health summary
526 | ```
527 | Tool: get_system_health_summary
528 | ```
529 | 2. Document normal values:
530 |    - Typical CPU usage
531 |    - Average memory usage
532 |    - Normal process count
533 |    - Standard disk usage
534 | 
535 | ### Regular Monitoring
536 | - **Daily**: Quick health check
537 | - **Weekly**: Trend analysis
538 | - **Monthly**: Capacity review
539 | 
540 | ## Monitoring Strategy
541 | 
542 | ### Proactive Monitoring
543 | Set thresholds for alerts:
544 | - CPU > 80% for 5 minutes
545 | - Memory > 90%
546 | - Disk > 85%
547 | - Critical process not running
548 | 
549 | ### Resource Tracking
550 | ```
551 | # Track specific application
552 | Tool: monitor_process_health
553 | Parameters: process_name="critical-app"
554 | 
555 | # Find resource spikes
556 | Tool: find_resource_intensive_processes
557 | Parameters: min_cpu_percent=50
558 | ```
559 | 
560 | ### Trend Analysis
561 | 1. Collect metrics over time
562 | 2. Identify patterns:
563 |    - Peak usage hours
564 |    - Growth trends
565 |    - Recurring issues
566 | 3. Plan capacity accordingly
567 | 
568 | ## Common Monitoring Tasks
569 | 
570 | ### Daily Health Check
571 | ```
572 | 1. Tool: get_system_health_summary
573 | 2. Review any issues flagged
574 | 3. Check critical processes
575 | 4. Note unusual patterns
576 | ```
577 | 
578 | ### Performance Investigation
579 | ```
580 | 1. Tool: analyze_system_performance
581 | 2. Tool: find_resource_intensive_processes
582 | 3. Deep dive on problem processes
583 | 4. Check system logs
584 | ```
585 | 
586 | ### Capacity Planning
587 | ```
588 | 1. Track resource trends
589 | 2. Project growth rates
590 | 3. Identify bottlenecks
591 | 4. Plan upgrades
592 | ```
593 | 
594 | ## Key Metrics to Track
595 | - **CPU**: Usage %, load average
596 | - **Memory**: Used %, swap usage
597 | - **Disk**: Space %, I/O rates
598 | - **Network**: Bandwidth, connections
599 | - **Processes**: Count, top consumers
600 | 
601 | ## Documentation
602 | Maintain records of:
603 | - Normal baselines
604 | - Known issues
605 | - Growth trends
606 | - Remediation steps
607 | - Capacity plans
608 | """
609 | 
610 |     @mcp.prompt(
611 |         title="Emergency Performance Response",
612 |         description="Quick response guide for performance emergencies"
613 |     )
614 |     async def emergency_performance() -> str:
615 |         """
616 |         Emergency response for critical performance issues.
617 |         """
618 |         return """
619 | # 🚨 Emergency Performance Response
620 | 
621 | ## System Unresponsive
622 | 
623 | ### Immediate Actions (< 2 minutes)
624 | ```
625 | Tool: get_system_health_summary
626 | ```
627 | Quick assessment of system state.
628 | 
629 | ### Find Culprits (2-5 minutes)
630 | ```
631 | Tool: find_resource_intensive_processes
632 | Parameters: min_cpu_percent=50, min_memory_percent=20
633 | ```
634 | 
635 | ### Kill Problem Processes
636 | 1. Identify non-critical high users
637 | 2. Terminate gracefully if possible
638 | 3. Force kill if necessary
639 | 4. Monitor system response
640 | 
641 | ## High CPU Emergency
642 | 
643 | ### Quick Fix
644 | ```
645 | # Find top CPU users
646 | Tool: find_resource_intensive_processes
647 | Parameters: min_cpu_percent=30, sort_by="cpu"
648 | ```
649 | 
650 | Actions:
651 | 1. Kill non-essential processes
652 | 2. Nice/renice CPU hogs
653 | 3. Disable background tasks
654 | 4. Check for runaway processes
655 | 
656 | ## Memory Emergency
657 | 
658 | ### Quick Fix
659 | ```
660 | # Find memory hogs
661 | Tool: find_resource_intensive_processes
662 | Parameters: min_memory_percent=10, sort_by="memory"
663 | ```
664 | 
665 | Actions:
666 | 1. Kill largest non-critical process
667 | 2. Clear caches if possible
668 | 3. Add emergency swap
669 | 4. Restart memory-leaking services
670 | 
671 | ## Disk Full Emergency
672 | 
673 | ### Quick Actions
674 | 1. Find large files/directories
675 | 2. Clear logs and temp files
676 | 3. Remove old backups
677 | 4. Empty trash/recycle bin
678 | 5. Compress large files
679 | 
680 | ## Network Saturation
681 | 
682 | ### Quick Fix
683 | ```
684 | Tool: analyze_system_performance
685 | Parameters: include_network=true
686 | ```
687 | 
688 | Actions:
689 | 1. Identify bandwidth hogs
690 | 2. Rate limit if possible
691 | 3. Block non-essential traffic
692 | 4. Check for DDoS/attacks
693 | 
694 | ## Recovery Checklist
695 | - [ ] System responsive again?
696 | - [ ] Critical services running?
697 | - [ ] Resources below thresholds?
698 | - [ ] Root cause identified?
699 | - [ ] Temporary fixes documented?
700 | - [ ] Permanent fix planned?
701 | - [ ] Monitoring increased?
702 | """
703 | 
704 |     @mcp.prompt(
705 |         title="Process Lifecycle Management",
706 |         description="Understanding and managing process lifecycles"
707 |     )
708 |     async def process_lifecycle() -> str:
709 |         """
710 |         Guide for understanding process states and lifecycle.
711 |         """
712 |         return """
713 | # 🔄 Process Lifecycle Management
714 | 
715 | ## Process States
716 | 
717 | ### Running
718 | - Actively executing on CPU
719 | - Normal state for active processes
720 | - Should match expected workload
721 | 
722 | ### Sleeping
723 | - Waiting for event/resource
724 | - Normal for idle processes
725 | - Check if unexpectedly sleeping
726 | 
727 | ### Zombie
728 | - Process terminated
729 | - Parent hasn't collected status
730 | - Indicates parent process issue
731 | - Can't be killed directly
732 | 
733 | ### Stopped
734 | - Suspended (SIGSTOP)
735 | - Debugging or job control
736 | - Can be resumed
737 | 
738 | ## Monitoring Process Age
739 | 
740 | ### Check Process Start Time
741 | ```
742 | Tool: monitor_process_health
743 | Parameters: process_name="service-name"
744 | ```
745 | 
746 | ### Age Indicators
747 | - **Very new** (< 1 min): Just started or restarted
748 | - **Recent** (< 1 hour): May indicate crash/restart
749 | - **Stable** (> 1 day): Normal for services
750 | - **Very old**: Check for memory leaks
751 | 
752 | ## Process Relationships
753 | 
754 | ### Parent-Child
755 | - Parent spawns children
756 | - Children inherit resources
757 | - Orphans adopted by init
758 | - Zombies need parent action
759 | 
760 | ### Process Groups
761 | - Related processes
762 | - Share signals
763 | - Common for services
764 | - Monitor as group
765 | 
766 | ## Lifecycle Management
767 | 
768 | ### Graceful Restart
769 | 1. Monitor current state
770 | 2. Send termination signal
771 | 3. Wait for cleanup
772 | 4. Start new instance
773 | 5. Verify functionality
774 | 
775 | ### Resource Limits
776 | - CPU time limits
777 | - Memory limits
778 | - File descriptor limits
779 | - Process count limits
780 | 
781 | ### Automatic Management
782 | - Systemd restart policies
783 | - Process supervisors
784 | - Health check scripts
785 | - Resource governors
786 | 
787 | ## Common Issues
788 | 
789 | ### Frequent Restarts
790 | - Check logs for crashes
791 | - Review resource limits
792 | - Verify dependencies
793 | - Check configuration
794 | 
795 | ### Long-Running Processes
796 | - Monitor for memory leaks
797 | - Check file descriptor leaks
798 | - Verify log rotation
799 | - Plan periodic restarts
800 | """
801 | 
802 |     @mcp.prompt(
803 |         title="System Resource Thresholds",
804 |         description="Guidelines for setting resource monitoring thresholds"
805 |     )
806 |     async def resource_thresholds() -> str:
807 |         """
808 |         Guide for setting appropriate resource thresholds.
809 |         """
810 |         return """
811 | # 📏 System Resource Thresholds
812 | 
813 | ## CPU Thresholds
814 | 
815 | ### Usage Levels
816 | - **0-40%**: Low usage, optimal
817 | - **40-60%**: Moderate, normal
818 | - **60-80%**: High, monitor closely
819 | - **80-95%**: Very high, investigate
820 | - **95-100%**: Critical, take action
821 | 
822 | ### Load Average (Unix/Linux)
823 | - **< 1.0 per core**: Good
824 | - **1-2 per core**: Busy
825 | - **> 2 per core**: Overloaded
826 | 
827 | Example: 4-core system
828 | - Good: < 4.0
829 | - Busy: 4-8
830 | - Overloaded: > 8
831 | 
832 | ## Memory Thresholds
833 | 
834 | ### RAM Usage
835 | - **0-60%**: Healthy
836 | - **60-75%**: Normal
837 | - **75-85%**: Monitor
838 | - **85-95%**: Warning
839 | - **95-100%**: Critical
840 | 
841 | ### Swap Usage
842 | - **0-20%**: Normal
843 | - **20-50%**: Monitor
844 | - **50-80%**: Performance impact
845 | - **> 80%**: Critical
846 | 
847 | ## Disk Thresholds
848 | 
849 | ### Space Usage
850 | - **0-70%**: Safe
851 | - **70-80%**: Plan cleanup
852 | - **80-90%**: Warning
853 | - **90-95%**: Critical
854 | - **> 95%**: Emergency
855 | 
856 | ### I/O Metrics
857 | - Response time > 20ms: Investigate
858 | - Queue depth > 10: Bottleneck
859 | - Utilization > 80%: Saturated
860 | 
861 | ## Process Thresholds
862 | 
863 | ### Process Count
864 | - **< 200**: Light load
865 | - **200-500**: Normal
866 | - **500-1000**: Heavy
867 | - **> 1000**: Very heavy
868 | 
869 | ### Per-Process Limits
870 | - CPU > 50%: Investigate
871 | - Memory > 10%: Monitor
872 | - Connections > 1000: Check
873 | - Threads > 500: Review
874 | 
875 | ## Network Thresholds
876 | 
877 | ### Bandwidth
878 | - **< 50%**: Good
879 | - **50-70%**: Normal
880 | - **70-85%**: High
881 | - **> 85%**: Saturated
882 | 
883 | ### Connections
884 | - Depends on service type
885 | - Web server: 1000s normal
886 | - Database: 100s typical
887 | - Monitor for growth
888 | 
889 | ## Setting Custom Thresholds
890 | 
891 | Consider:
892 | 1. Baseline measurements
893 | 2. Application requirements
894 | 3. Peak vs average load
895 | 4. Business criticality
896 | 5. Hardware capabilities
897 | 
898 | Adjust based on:
899 | - Historical data
900 | - Growth projections
901 | - SLA requirements
902 | - User experience
903 | """
```

--------------------------------------------------------------------------------
/src/mcp_log_analyzer/mcp_server/prompts/windows_testing_prompt.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Windows testing and diagnostics prompts for the MCP Log Analyzer server.
  3 | """
  4 | 
  5 | from typing import Optional
  6 | from mcp.server import FastMCP
  7 | 
  8 | 
  9 | def register_windows_testing_prompts(mcp: FastMCP):
 10 |     """Register all Windows testing prompts."""
 11 | 
 12 |     @mcp.prompt(
 13 |         title="Test Windows Event Log Access",
 14 |         description="Guide for testing Windows Event Log access and permissions"
 15 |     )
 16 |     async def test_event_log_access() -> str:
 17 |         """
 18 |         Test Windows Event Log access capabilities.
 19 |         """
 20 |         return """
 21 | # 🔍 Test Windows Event Log Access
 22 | 
 23 | ## Tool: test_windows_event_log_access
 24 | 
 25 | ### Purpose
 26 | Tests access to Windows Event Logs and verifies permissions.
 27 | 
 28 | ### What It Tests
 29 | - **System Log**: Hardware, drivers, system services
 30 | - **Application Log**: Application crashes and errors
 31 | - **Security Log**: Authentication and audit events
 32 | - **pywin32**: Package availability check
 33 | 
 34 | ### Usage
 35 | ```
 36 | Tool: test_windows_event_log_access
 37 | ```
 38 | 
 39 | ### Interpreting Results
 40 | - **Accessible**: Can read and query the log
 41 | - **Access Denied**: Need administrator privileges
 42 | - **pywin32 available**: Required for Event Log access
 43 | 
 44 | ### Common Issues
 45 | - Security log requires admin rights
 46 | - pywin32 not installed: `pip install pywin32`
 47 | - Need to run as administrator
 48 | 
 49 | ### Next Steps
 50 | - Install pywin32 if missing
 51 | - Run with admin rights for Security log
 52 | - Register accessible logs as sources
 53 | """
 54 | 
 55 |     @mcp.prompt(
 56 |         title="Windows Event Log Information",
 57 |         description="Get detailed information about specific Windows Event Logs"
 58 |     )
 59 |     async def event_log_info_guide() -> str:
 60 |         """
 61 |         Guide for retrieving Windows Event Log information.
 62 |         """
 63 |         return """
 64 | # 📊 Windows Event Log Information
 65 | 
 66 | ## Tool: get_windows_event_log_info
 67 | 
 68 | ### Purpose
 69 | Retrieves metadata and recent entries from Windows Event Logs.
 70 | 
 71 | ### Parameters
 72 | - **log_name**: "System", "Application", or "Security"
 73 | - **max_entries**: Number of recent entries (default: 10)
 74 | 
 75 | ### Usage Examples
 76 | ```
 77 | # Get System log info
 78 | Tool: get_windows_event_log_info
 79 | Parameters: log_name="System", max_entries=20
 80 | 
 81 | # Check Application events
 82 | Tool: get_windows_event_log_info
 83 | Parameters: log_name="Application", max_entries=50
 84 | 
 85 | # Security events (requires admin)
 86 | Tool: get_windows_event_log_info
 87 | Parameters: log_name="Security", max_entries=10
 88 | ```
 89 | 
 90 | ### Information Returned
 91 | - **Log metadata**: Size, record count, timestamps
 92 | - **Recent entries**: Event ID, source, type, message
 93 | - **Event details**: Formatted for readability
 94 | 
 95 | ### Event Types
 96 | - 1 = Error (Red)
 97 | - 2 = Warning (Yellow)
 98 | - 4 = Information (Blue)
 99 | - 8 = Success Audit
100 | - 16 = Failure Audit
101 | """
102 | 
103 |     @mcp.prompt(
104 |         title="Query Windows Events",
105 |         description="Query Windows Event Logs with specific criteria"
106 |     )
107 |     async def query_windows_events(
108 |         filter_type: Optional[str] = None
109 |     ) -> str:
110 |         """
111 |         Guide for querying Windows events by criteria.
112 |         
113 |         Args:
114 |             filter_type: Type of filter (event_id, level, time)
115 |         """
116 |         
117 |         base_guide = """
118 | # 🔍 Query Windows Events by Criteria
119 | 
120 | ## Tool: query_windows_events_by_criteria
121 | 
122 | ### Purpose
123 | Query Windows Event Logs with powerful filtering options.
124 | 
125 | ### Parameters
126 | - **log_name**: Target log (default: "System")
127 | - **event_id**: Specific Event ID to find
128 | - **level**: "Error", "Warning", or "Information"
129 | - **time_duration**: Time range (e.g., "30m", "2h", "1d")
130 | - **max_entries**: Result limit (default: 50)
131 | 
132 | ### Usage Examples
133 | ```
134 | # Find all errors in last hour
135 | Tool: query_windows_events_by_criteria
136 | Parameters: level="Error", time_duration="1h"
137 | 
138 | # Find specific Event ID
139 | Tool: query_windows_events_by_criteria
140 | Parameters: event_id=7001, time_duration="24h"
141 | 
142 | # Application warnings
143 | Tool: query_windows_events_by_criteria
144 | Parameters: log_name="Application", level="Warning"
145 | ```
146 | """
147 | 
148 |         filter_guides = {
149 |             "event_id": """
150 | ### Filtering by Event ID
151 | Common Event IDs to search:
152 | - **1074**: System shutdown/restart
153 | - **6005/6006**: Event Log start/stop
154 | - **7000-7034**: Service control events
155 | - **1000**: Application crashes
156 | - **4624/4625**: Logon success/failure
157 | 
158 | Example:
159 | ```
160 | Tool: query_windows_events_by_criteria
161 | Parameters: event_id=7001, time_duration="6h"
162 | ```
163 | """,
164 |             "level": """
165 | ### Filtering by Level
166 | Event severity levels:
167 | - **Error**: Critical failures
168 | - **Warning**: Potential issues
169 | - **Information**: Normal operations
170 | 
171 | Example:
172 | ```
173 | Tool: query_windows_events_by_criteria
174 | Parameters: level="Error", time_duration="2h"
175 | ```
176 | """,
177 |             "time": """
178 | ### Time-based Filtering
179 | Duration formats:
180 | - **Minutes**: "30m", "45m"
181 | - **Hours**: "1h", "6h", "12h"
182 | - **Days**: "1d", "7d", "30d"
183 | 
184 | Example:
185 | ```
186 | Tool: query_windows_events_by_criteria
187 | Parameters: time_duration="4h", level="Error"
188 | ```
189 | """
190 |         }
191 | 
192 |         if filter_type and filter_type.lower() in filter_guides:
193 |             base_guide += filter_guides[filter_type.lower()]
194 | 
195 |         base_guide += """
196 | ### Query Strategy
197 | 1. Start with recent time ranges
198 | 2. Use specific Event IDs when known
199 | 3. Combine filters for precision
200 | 4. Expand search if needed
201 | """
202 |         
203 |         return base_guide
204 | 
205 |     @mcp.prompt(
206 |         title="Windows System Health",
207 |         description="Get Windows system health overview from Event Logs"
208 |     )
209 |     async def windows_system_health() -> str:
210 |         """
211 |         Guide for Windows system health assessment.
212 |         """
213 |         return """
214 | # 🎯 Windows System Health Overview
215 | 
216 | ## Tool: get_windows_system_health
217 | 
218 | ### Purpose
219 | Analyzes System and Application logs to assess Windows health.
220 | 
221 | ### Usage
222 | ```
223 | Tool: get_windows_system_health
224 | ```
225 | 
226 | ### Analysis Period
227 | - Last 24 hours of events
228 | - System and Application logs
229 | - Error and warning counts
230 | - Critical event identification
231 | 
232 | ### Health Status Levels
233 | - **Healthy**: 0 errors, minimal warnings
234 | - **Fair**: <3 errors, <20 warnings
235 | - **Concerning**: 3+ errors or 20+ warnings
236 | 
237 | ### Critical Events Shown
238 | - Service failures
239 | - Application crashes
240 | - System errors
241 | - Hardware issues
242 | - Driver problems
243 | 
244 | ### Follow-up Actions
245 | Based on health status:
246 | 1. **Healthy**: Continue monitoring
247 | 2. **Fair**: Investigate warnings
248 | 3. **Concerning**: Address errors immediately
249 | 
250 | ### Common Issues Found
251 | - Windows Update failures
252 | - Service startup problems
253 | - Application crashes
254 | - Driver errors
255 | - Hardware warnings
256 | """
257 | 
258 |     @mcp.prompt(
259 |         title="Windows Service Troubleshooting",
260 |         description="Diagnose Windows service failures and issues"
261 |     )
262 |     async def service_troubleshooting() -> str:
263 |         """
264 |         Guide for troubleshooting Windows services.
265 |         """
266 |         return """
267 | # 🛠️ Windows Service Troubleshooting
268 | 
269 | ## Common Service Event IDs
270 | 
271 | ### Service Failures
272 | - **7000**: Service failed to start (logon failure)
273 | - **7001**: Service depends on failed service
274 | - **7023**: Service terminated with error
275 | - **7024**: Service-specific error
276 | - **7031**: Service crashed unexpectedly
277 | 
278 | ### Service Timeouts
279 | - **7009**: Connection timeout
280 | - **7011**: Response timeout
281 | - **7022**: Service hung on starting
282 | 
283 | ## Diagnostic Steps
284 | 
285 | ### Step 1: Find Service Errors
286 | ```
287 | Tool: query_windows_events_by_criteria
288 | Parameters: event_id=7000, time_duration="6h"
289 | ```
290 | 
291 | ### Step 2: Check Dependencies
292 | ```
293 | Tool: query_windows_events_by_criteria
294 | Parameters: event_id=7001, time_duration="6h"
295 | ```
296 | 
297 | ### Step 3: Review Service Crashes
298 | ```
299 | Tool: query_windows_events_by_criteria
300 | Parameters: event_id=7031, time_duration="24h"
301 | ```
302 | 
303 | ## Common Causes
304 | 
305 | ### Logon Failures (7000)
306 | - Incorrect service account password
307 | - Account locked or disabled
308 | - Insufficient permissions
309 | 
310 | ### Dependency Issues (7001)
311 | - Required service not started
312 | - Circular dependencies
313 | - Network service unavailable
314 | 
315 | ### Service Crashes (7031)
316 | - Application bugs
317 | - Resource exhaustion
318 | - Configuration errors
319 | 
320 | ## Resolution Steps
321 | 1. Check service account credentials
322 | 2. Verify service dependencies
323 | 3. Review service configuration
324 | 4. Check system resources
325 | 5. Examine application logs
326 | """
327 | 
328 |     @mcp.prompt(
329 |         title="Windows Application Crashes",
330 |         description="Analyze application crashes and errors"
331 |     )
332 |     async def application_crashes() -> str:
333 |         """
334 |         Guide for investigating application crashes.
335 |         """
336 |         return """
337 | # 💥 Windows Application Crash Analysis
338 | 
339 | ## Key Event IDs
340 | 
341 | ### Application Errors
342 | - **1000**: Application crash/fault
343 | - **1001**: Windows Error Reporting
344 | - **1002**: Application hang
345 | - **1026**: .NET runtime error
346 | 
347 | ## Investigation Steps
348 | 
349 | ### Step 1: Find Recent Crashes
350 | ```
351 | Tool: query_windows_events_by_criteria
352 | Parameters: log_name="Application", event_id=1000, time_duration="24h"
353 | ```
354 | 
355 | ### Step 2: Check Application Hangs
356 | ```
357 | Tool: query_windows_events_by_criteria
358 | Parameters: log_name="Application", event_id=1002, time_duration="24h"
359 | ```
360 | 
361 | ### Step 3: Review Error Details
362 | ```
363 | Tool: get_windows_event_log_info
364 | Parameters: log_name="Application", max_entries=50
365 | ```
366 | 
367 | ## Crash Information
368 | 
369 | ### Event 1000 Details
370 | - Faulting application name
371 | - Faulting module (DLL/EXE)
372 | - Exception code
373 | - Fault offset
374 | - Process ID and path
375 | 
376 | ### Common Exception Codes
377 | - **0xc0000005**: Access violation
378 | - **0xc0000409**: Stack buffer overrun
379 | - **0xc00000fd**: Stack overflow
380 | - **0x80000003**: Breakpoint
381 | 
382 | ## Troubleshooting Steps
383 | 1. Identify crashing application
384 | 2. Check for patterns (time, frequency)
385 | 3. Review exception codes
386 | 4. Look for module conflicts
387 | 5. Check for updates/patches
388 | 6. Test in safe mode
389 | """
390 | 
391 |     @mcp.prompt(
392 |         title="Windows Security Monitoring",
393 |         description="Monitor Windows security events and authentication"
394 |     )
395 |     async def security_monitoring() -> str:
396 |         """
397 |         Guide for Windows security event monitoring.
398 |         """
399 |         return """
400 | # 🔒 Windows Security Monitoring
401 | 
402 | ## Important Security Event IDs
403 | 
404 | ### Logon Events
405 | - **4624**: Successful logon
406 | - **4625**: Failed logon attempt
407 | - **4634**: Account logoff
408 | - **4647**: User initiated logoff
409 | 
410 | ### Account Management
411 | - **4720**: User account created
412 | - **4722**: User account enabled
413 | - **4725**: User account disabled
414 | - **4726**: User account deleted
415 | - **4740**: Account locked out
416 | 
417 | ### Privilege Use
418 | - **4672**: Special privileges assigned
419 | - **4673**: Privileged service called
420 | - **4674**: Operation attempted on object
421 | 
422 | ## Security Queries
423 | 
424 | ### Failed Login Attempts
425 | ```
426 | Tool: query_windows_events_by_criteria
427 | Parameters: log_name="Security", event_id=4625, time_duration="2h"
428 | ```
429 | 
430 | ### Account Lockouts
431 | ```
432 | Tool: query_windows_events_by_criteria
433 | Parameters: log_name="Security", event_id=4740, time_duration="24h"
434 | ```
435 | 
436 | ### Successful Logins
437 | ```
438 | Tool: query_windows_events_by_criteria
439 | Parameters: log_name="Security", event_id=4624, time_duration="1h"
440 | ```
441 | 
442 | ## Security Analysis
443 | 
444 | ### Brute Force Detection
445 | - Multiple 4625 events
446 | - Same username, different IPs
447 | - Rapid attempts
448 | 
449 | ### Suspicious Activity
450 | - Logins at unusual times
451 | - New user accounts created
452 | - Privilege escalations
453 | - Service account usage
454 | 
455 | ### Monitoring Best Practices
456 | 1. Regular failed login reviews
457 | 2. Track account changes
458 | 3. Monitor privileged access
459 | 4. Check for patterns
460 | 5. Set up alerts
461 | 
462 | **Note**: Security log requires administrator privileges.
463 | """
464 | 
465 |     @mcp.prompt(
466 |         title="Windows Boot and Startup",
467 |         description="Diagnose Windows boot and startup issues"
468 |     )
469 |     async def boot_startup_issues() -> str:
470 |         """
471 |         Guide for Windows boot and startup diagnostics.
472 |         """
473 |         return """
474 | # 🚀 Windows Boot and Startup Diagnostics
475 | 
476 | ## Boot-Related Event IDs
477 | 
478 | ### System Start/Stop
479 | - **6005**: Event Log service started (boot)
480 | - **6006**: Event Log service stopped (shutdown)
481 | - **6008**: Unexpected shutdown detected
482 | - **6009**: Processor information at boot
483 | 
484 | ### Shutdown/Restart
485 | - **1074**: System shutdown by user/process
486 | - **1076**: Reason for shutdown
487 | 
488 | ### Driver/Service Issues
489 | - **7026**: Boot-start driver failed
490 | - **7000**: Service failed at startup
491 | 
492 | ## Diagnostic Queries
493 | 
494 | ### Check Last Boot
495 | ```
496 | Tool: query_windows_events_by_criteria
497 | Parameters: event_id=6005, time_duration="24h"
498 | ```
499 | 
500 | ### Unexpected Shutdowns
501 | ```
502 | Tool: query_windows_events_by_criteria
503 | Parameters: event_id=6008, time_duration="7d"
504 | ```
505 | 
506 | ### Boot Driver Failures
507 | ```
508 | Tool: query_windows_events_by_criteria
509 | Parameters: event_id=7026, time_duration="24h"
510 | ```
511 | 
512 | ### Service Startup Issues
513 | ```
514 | Tool: query_windows_events_by_criteria
515 | Parameters: event_id=7000, time_duration="2h"
516 | ```
517 | 
518 | ## Boot Problem Analysis
519 | 
520 | ### Slow Boot
521 | 1. Check service startup times
522 | 2. Look for driver failures
523 | 3. Review dependency chains
524 | 4. Check for timeout events
525 | 
526 | ### Boot Loops
527 | 1. Check Event ID 6008 frequency
528 | 2. Look for critical errors before shutdown
529 | 3. Review hardware events
530 | 4. Check for driver conflicts
531 | 
532 | ### Service Failures
533 | 1. Identify failing services
534 | 2. Check dependencies
535 | 3. Review service accounts
536 | 4. Verify system resources
537 | 
538 | ## Boot Optimization
539 | - Disable unnecessary startup services
540 | - Update drivers
541 | - Check disk health
542 | - Review startup programs
543 | """
544 | 
545 |     @mcp.prompt(
546 |         title="Windows Event ID Reference",
547 |         description="Quick reference for common Windows Event IDs"
548 |     )
549 |     async def event_id_reference(
550 |         category: Optional[str] = None
551 |     ) -> str:
552 |         """
553 |         Windows Event ID reference guide.
554 |         
555 |         Args:
556 |             category: Event category (system, service, security, etc.)
557 |         """
558 |         
559 |         all_categories = {
560 |             "system": """
561 | ## System Events
562 | - **1074**: System shutdown/restart initiated
563 | - **6005**: Event Log service started
564 | - **6006**: Event Log service stopped  
565 | - **6008**: Unexpected system shutdown
566 | - **6009**: Processor information
567 | - **6013**: System uptime
568 | """,
569 |             "service": """
570 | ## Service Control Manager
571 | - **7000**: Service failed to start
572 | - **7001**: Service dependency failure
573 | - **7009**: Connection timeout
574 | - **7011**: Service timeout
575 | - **7023**: Service terminated with error
576 | - **7024**: Service-specific error
577 | - **7026**: Boot driver failed
578 | - **7031**: Service crash
579 | - **7034**: Service crashed (no recovery)
580 | """,
581 |             "application": """
582 | ## Application Events
583 | - **1000**: Application error/crash
584 | - **1001**: Windows Error Reporting
585 | - **1002**: Application hang
586 | - **1004**: Application recovery
587 | - **1026**: .NET runtime error
588 | """,
589 |             "security": """
590 | ## Security Events (Admin Required)
591 | - **4624**: Successful logon
592 | - **4625**: Failed logon
593 | - **4634**: Logoff
594 | - **4672**: Special privileges
595 | - **4720**: User created
596 | - **4726**: User deleted
597 | - **4740**: Account locked
598 | """,
599 |             "hardware": """
600 | ## Hardware Events
601 | - **7**: Disk bad block
602 | - **11**: Disk controller error
603 | - **15**: Disk not ready
604 | - **51**: Paging error
605 | - **129**: Disk reset
606 | """
607 |         }
608 | 
609 |         result = "# 📖 Windows Event ID Reference\n\n"
610 |         
611 |         if category and category.lower() in all_categories:
612 |             result += all_categories[category.lower()]
613 |         else:
614 |             result += "## Common Windows Event IDs by Category\n\n"
615 |             for cat_content in all_categories.values():
616 |                 result += cat_content + "\n"
617 |         
618 |         result += """
619 | ## Using Event IDs
620 | 1. Note the Event ID from logs
621 | 2. Query for specific IDs
622 | 3. Check patterns and frequency
623 | 4. Cross-reference with time
624 | 5. Correlate related events
625 | 
626 | ## Event Levels
627 | - **Error**: Critical failures
628 | - **Warning**: Potential issues  
629 | - **Information**: Normal operations
630 | - **Success Audit**: Security success
631 | - **Failure Audit**: Security failure
632 | """
633 |         
634 |         return result
635 | 
636 |     @mcp.prompt(
637 |         title="Windows Performance Issues",
638 |         description="Diagnose Windows performance problems using Event Logs"
639 |     )
640 |     async def performance_issues() -> str:
641 |         """
642 |         Guide for Windows performance diagnostics.
643 |         """
644 |         return """
645 | # 📊 Windows Performance Diagnostics
646 | 
647 | ## Performance-Related Events
648 | 
649 | ### Resource Issues
650 | - **2004**: Resource exhaustion
651 | - **1001**: Performance counter issues
652 | - **100**: Component timeout
653 | 
654 | ### Application Performance
655 | - **1002**: Application hang
656 | - **1530**: Application slow response
657 | 
658 | ## Investigation Steps
659 | 
660 | ### Step 1: Check System Health
661 | ```
662 | Tool: get_windows_system_health
663 | ```
664 | 
665 | ### Step 2: Find Application Hangs
666 | ```
667 | Tool: query_windows_events_by_criteria
668 | Parameters: log_name="Application", event_id=1002, time_duration="6h"
669 | ```
670 | 
671 | ### Step 3: Look for Timeouts
672 | ```
673 | Tool: query_windows_events_by_criteria
674 | Parameters: event_id=7011, time_duration="6h"
675 | ```
676 | 
677 | ### Step 4: Resource Warnings
678 | ```
679 | Tool: query_windows_events_by_criteria
680 | Parameters: level="Warning", time_duration="2h"
681 | ```
682 | 
683 | ## Common Performance Issues
684 | 
685 | ### High CPU Usage
686 | - Check for crashed services
687 | - Look for restart loops
688 | - Review application errors
689 | 
690 | ### Memory Issues
691 | - Application crashes (1000)
692 | - Out of memory errors
693 | - Page file warnings
694 | 
695 | ### Disk Problems
696 | - Event ID 7, 11, 51
697 | - Slow response warnings
698 | - I/O timeouts
699 | 
700 | ### Network Issues
701 | - Connection timeouts
702 | - Service availability
703 | - DNS failures
704 | 
705 | ## Performance Optimization
706 | 1. Identify problematic services
707 | 2. Check for memory leaks
708 | 3. Review disk errors
709 | 4. Monitor service restarts
710 | 5. Update drivers
711 | 6. Check for malware
712 | """
713 | 
714 |     @mcp.prompt(
715 |         title="Windows Event Log Best Practices",
716 |         description="Best practices for Windows Event Log monitoring"
717 |     )
718 |     async def event_log_practices() -> str:
719 |         """
720 |         Best practices for Event Log management.
721 |         """
722 |         return """
723 | # 📋 Windows Event Log Best Practices
724 | 
725 | ## Regular Monitoring
726 | 
727 | ### Daily Checks
728 | ```
729 | Tool: get_windows_system_health
730 | ```
731 | - Review health status
732 | - Check critical errors
733 | - Note new warnings
734 | 
735 | ### Weekly Analysis
736 | - Service failure patterns
737 | - Application crash trends
738 | - Security audit review
739 | - Performance issues
740 | 
741 | ## Effective Queries
742 | 
743 | ### Start Specific
744 | ```
745 | # Known issue
746 | Tool: query_windows_events_by_criteria
747 | Parameters: event_id=7001, time_duration="24h"
748 | ```
749 | 
750 | ### Then Broaden
751 | ```
752 | # General errors
753 | Tool: query_windows_events_by_criteria
754 | Parameters: level="Error", time_duration="6h"
755 | ```
756 | 
757 | ## Key Event IDs to Monitor
758 | 
759 | ### Critical System
760 | - 6008: Unexpected shutdown
761 | - 1074: System restart
762 | - 7031: Service crash
763 | - 41: Kernel power
764 | 
765 | ### Security (if accessible)
766 | - 4625: Failed logins
767 | - 4740: Account lockouts
768 | - 4720: User creation
769 | 
770 | ### Application Health
771 | - 1000: App crashes
772 | - 1002: App hangs
773 | - 1026: .NET errors
774 | 
775 | ## Log Management
776 | 
777 | ### Retention
778 | - System: 30-90 days
779 | - Application: 30 days
780 | - Security: 90-365 days
781 | 
782 | ### Size Limits
783 | - Prevent logs from filling
784 | - Archive old events
785 | - Regular cleanup
786 | 
787 | ## Automation Ideas
788 | 1. Schedule daily health checks
789 | 2. Alert on critical Event IDs
790 | 3. Weekly summary reports
791 | 4. Trend analysis
792 | 5. Correlation rules
793 | 
794 | ## Documentation
795 | Track:
796 | - Recurring issues
797 | - Resolution steps
798 | - Event patterns
799 | - System changes
800 | """
801 | 
802 |     @mcp.prompt(
803 |         title="Windows Emergency Diagnostics",
804 |         description="Quick diagnostics for Windows emergencies"
805 |     )
806 |     async def emergency_diagnostics() -> str:
807 |         """
808 |         Emergency Windows diagnostic procedures.
809 |         """
810 |         return """
811 | # 🚨 Windows Emergency Diagnostics
812 | 
813 | ## System Won't Boot
814 | 
815 | ### Quick Checks
816 | 1. Boot to Safe Mode
817 | 2. Check Event Viewer for:
818 | ```
819 | Tool: query_windows_events_by_criteria
820 | Parameters: event_id=7026, time_duration="1h"
821 | ```
822 | 
823 | 3. Service failures:
824 | ```
825 | Tool: query_windows_events_by_criteria
826 | Parameters: event_id=7000, time_duration="1h"
827 | ```
828 | 
829 | ## Blue Screen (BSOD)
830 | 
831 | ### After Reboot
832 | ```
833 | Tool: query_windows_events_by_criteria
834 | Parameters: event_id=41, time_duration="1h"
835 | ```
836 | 
837 | Check for:
838 | - Kernel-Power events
839 | - Driver failures
840 | - Hardware errors
841 | 
842 | ## Service Failures
843 | 
844 | ### Critical Service Down
845 | ```
846 | # Find specific service
847 | Tool: query_windows_events_by_criteria
848 | Parameters: level="Error", time_duration="30m"
849 | ```
850 | 
851 | Quick fixes:
852 | 1. Restart service
853 | 2. Check dependencies
854 | 3. Verify credentials
855 | 4. Review resources
856 | 
857 | ## Performance Crisis
858 | 
859 | ### System Slow
860 | ```
861 | Tool: get_windows_system_health
862 | ```
863 | 
864 | Then check:
865 | - Application hangs (1002)
866 | - Service timeouts (7011)
867 | - Resource warnings
868 | 
869 | ## Security Incident
870 | 
871 | ### Suspected Breach
872 | ```
873 | # Failed logins
874 | Tool: query_windows_events_by_criteria
875 | Parameters: log_name="Security", event_id=4625
876 | 
877 | # New accounts
878 | Parameters: log_name="Security", event_id=4720
879 | ```
880 | 
881 | ## Recovery Checklist
882 | - [ ] System accessible?
883 | - [ ] Critical services running?
884 | - [ ] Recent errors identified?
885 | - [ ] Security verified?
886 | - [ ] Performance acceptable?
887 | - [ ] Root cause found?
888 | - [ ] Preventive measures?
889 | """
```

--------------------------------------------------------------------------------
/src/mcp_log_analyzer/mcp_server/prompts/log_management_prompt.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Log Management prompts for the MCP Log Analyzer server.
  3 | """
  4 | 
  5 | from typing import Optional
  6 | from mcp.server import FastMCP
  7 | 
  8 | 
  9 | def register_log_management_prompts(mcp: FastMCP):
 10 |     """Register all log management prompts."""
 11 | 
 12 |     @mcp.prompt(
 13 |         title="Register Log Source",
 14 |         description="Guide for registering new log sources for analysis"
 15 |     )
 16 |     async def register_log_source_guide() -> str:
 17 |         """
 18 |         Guide for registering various types of log sources.
 19 |         """
 20 |         return """
 21 | # 💾 Register Log Source Guide
 22 | 
 23 | ## Tool: register_log_source
 24 | 
 25 | ### Purpose
 26 | Register a new log source for analysis. Supports various log formats.
 27 | 
 28 | ### Parameters
 29 | - **name**: Unique identifier for your log source
 30 | - **source_type**: Type of log (evt, json, xml, csv, text)
 31 | - **path**: File path or directory containing logs
 32 | - **config**: Additional parser configuration (optional)
 33 | 
 34 | ### Supported Log Types
 35 | - **evt**: Windows Event Logs (System, Application, Security)
 36 | - **json**: JSON-formatted application logs
 37 | - **xml**: XML-structured logs
 38 | - **csv**: Comma-separated value logs
 39 | - **text**: Plain text logs (syslog, custom formats)
 40 | 
 41 | ### Usage Examples
 42 | ```
 43 | # Windows System Events
 44 | Tool: register_log_source
 45 | Parameters: name="windows_system", source_type="evt", path="System"
 46 | 
 47 | # JSON Application Logs
 48 | Tool: register_log_source
 49 | Parameters: name="app_logs", source_type="json", path="/var/log/myapp/app.json"
 50 | 
 51 | # Text-based Syslog
 52 | Tool: register_log_source
 53 | Parameters: name="syslog", source_type="text", path="/var/log/syslog"
 54 | ```
 55 | 
 56 | ### Best Practices
 57 | ✅ Use descriptive names for easy identification
 58 | ✅ Organize by system and log type
 59 | ✅ Verify file path exists before registering
 60 | ✅ Set appropriate parser configurations
 61 | """
 62 | 
 63 |     @mcp.prompt(
 64 |         title="Query Logs",
 65 |         description="Guide for querying and filtering registered log sources"
 66 |     )
 67 |     async def query_logs_guide(
 68 |         filter_type: Optional[str] = None
 69 |     ) -> str:
 70 |         """
 71 |         Guide for querying logs with various filters.
 72 |         
 73 |         Args:
 74 |             filter_type: Type of filter (time, level, content, etc.)
 75 |         """
 76 |         
 77 |         base_guide = """
 78 | # 🔍 Query Logs Guide
 79 | 
 80 | ## Tool: query_logs
 81 | 
 82 | ### Purpose
 83 | Query and filter logs from registered sources with powerful filtering options.
 84 | 
 85 | ### Basic Parameters
 86 | - **source_name**: Which registered source to query
 87 | - **start_time/end_time**: Define time ranges
 88 | - **limit/offset**: Paginate through results
 89 | - **filters**: Apply specific criteria
 90 | 
 91 | ### Usage Examples
 92 | ```
 93 | # Query recent errors
 94 | Tool: query_logs
 95 | Parameters: source_name="windows_system", filters={"level": "Error"}, start_time="1 hour ago"
 96 | 
 97 | # Search for specific content
 98 | Tool: query_logs
 99 | Parameters: source_name="app_logs", filters={"message_contains": "database error"}
100 | 
101 | # Paginate through results
102 | Tool: query_logs
103 | Parameters: source_name="syslog", limit=50, offset=100
104 | ```
105 | """
106 | 
107 |         filter_guides = {
108 |             "time": """
109 | ### Time-based Filtering
110 | - **Absolute time**: "2024-01-15 10:00:00"
111 | - **Relative time**: "1 hour ago", "24 hours ago"
112 | - **Time ranges**: start_time and end_time
113 | - **Duration shortcuts**: "last_hours": 6
114 | 
115 | Examples:
116 | ```
117 | # Last 24 hours
118 | start_time="24 hours ago"
119 | 
120 | # Specific date range
121 | start_time="2024-01-15 00:00:00", end_time="2024-01-15 23:59:59"
122 | ```
123 | """,
124 |             "level": """
125 | ### Level-based Filtering
126 | - **Error levels**: Error, Warning, Info, Debug
127 | - **Windows levels**: Error, Warning, Information
128 | - **Syslog priorities**: 0-7 (emerg to debug)
129 | 
130 | Examples:
131 | ```
132 | filters={"level": "Error"}
133 | filters={"severity": "critical"}
134 | filters={"priority": [0, 1, 2]}  # emerg, alert, crit
135 | ```
136 | """,
137 |             "content": """
138 | ### Content Filtering
139 | - **Text search**: message_contains
140 | - **Regex patterns**: regex_pattern
141 | - **Field matching**: Exact field values
142 | - **Multiple criteria**: AND/OR conditions
143 | 
144 | Examples:
145 | ```
146 | filters={"message_contains": "authentication failed"}
147 | filters={"regex_pattern": "error.*database.*timeout"}
148 | filters={"event_id": 7001, "source": "Service Control Manager"}
149 | ```
150 | """
151 |         }
152 | 
153 |         if filter_type and filter_type.lower() in filter_guides:
154 |             base_guide += filter_guides[filter_type.lower()]
155 |         
156 |         base_guide += """
157 | ### Performance Tips
158 | ✅ Use time ranges to limit data scope
159 | ✅ Apply specific filters to reduce noise
160 | ✅ Start with recent time periods
161 | ✅ Use pagination for large datasets
162 | """
163 |         
164 |         return base_guide
165 | 
166 |     @mcp.prompt(
167 |         title="Analyze Logs",
168 |         description="Guide for running log analysis (summary, pattern, anomaly)"
169 |     )
170 |     async def analyze_logs_guide(
171 |         analysis_type: Optional[str] = None
172 |     ) -> str:
173 |         """
174 |         Guide for different types of log analysis.
175 |         
176 |         Args:
177 |             analysis_type: Type of analysis (summary, pattern, anomaly)
178 |         """
179 |         
180 |         base_guide = """
181 | # 📊 Analyze Logs Guide
182 | 
183 | ## Tool: analyze_logs
184 | 
185 | ### Purpose
186 | Perform advanced analysis on logs to identify patterns, anomalies, and trends.
187 | 
188 | ### Analysis Types
189 | - **summary**: General statistics and overview
190 | - **pattern**: Detect recurring patterns and frequencies
191 | - **anomaly**: Identify unusual or suspicious log entries
192 | 
193 | ### Basic Usage
194 | ```
195 | Tool: analyze_logs
196 | Parameters: source_name="app_logs", analysis_type="summary"
197 | ```
198 | """
199 | 
200 |         analysis_guides = {
201 |             "summary": """
202 | ### Summary Analysis
203 | Provides high-level overview and statistics.
204 | 
205 | **What it shows:**
206 | - Total log count and time range
207 | - Error/Warning/Info distribution
208 | - Top sources and components
209 | - Peak activity periods
210 | - Message frequency analysis
211 | 
212 | **Best for:**
213 | - Initial investigation
214 | - Health assessment
215 | - Capacity planning
216 | - Report generation
217 | 
218 | **Example:**
219 | ```
220 | Tool: analyze_logs
221 | Parameters: source_name="windows_system", analysis_type="summary", time_duration="24h"
222 | ```
223 | """,
224 |             "pattern": """
225 | ### Pattern Analysis
226 | Detects recurring patterns and correlations.
227 | 
228 | **What it finds:**
229 | - Frequent error messages
230 | - Event sequences and correlations
231 | - Time-based patterns (hourly, daily)
232 | - Recurring issues
233 | - Common failure modes
234 | 
235 | **Best for:**
236 | - Root cause analysis
237 | - Predictive maintenance
238 | - Identifying systematic issues
239 | - Performance optimization
240 | 
241 | **Example:**
242 | ```
243 | Tool: analyze_logs
244 | Parameters: source_name="app_logs", analysis_type="pattern", filters={"level": "Error"}
245 | ```
246 | """,
247 |             "anomaly": """
248 | ### Anomaly Detection
249 | Identifies unusual events and outliers.
250 | 
251 | **What it detects:**
252 | - Unusual error spikes or drops
253 | - New error types not seen before
254 | - Unexpected source activity
255 | - Timing anomalies
256 | - Statistical outliers
257 | 
258 | **Best for:**
259 | - Security monitoring
260 | - Early problem detection
261 | - Change detection
262 | - Incident investigation
263 | 
264 | **Example:**
265 | ```
266 | Tool: analyze_logs
267 | Parameters: source_name="security_logs", analysis_type="anomaly", time_duration="48h"
268 | ```
269 | """
270 |         }
271 | 
272 |         if analysis_type and analysis_type.lower() in analysis_guides:
273 |             base_guide += analysis_guides[analysis_type.lower()]
274 |         else:
275 |             # Show all types if none specified
276 |             for guide in analysis_guides.values():
277 |                 base_guide += guide
278 |         
279 |         base_guide += """
280 | ### Analysis Strategy
281 | 1. Start with summary for overview
282 | 2. Use pattern analysis for recurring issues
283 | 3. Apply anomaly detection for security
284 | 4. Combine analyses for comprehensive insights
285 | """
286 |         
287 |         return base_guide
288 | 
289 |     @mcp.prompt(
290 |         title="Manage Log Sources",
291 |         description="Guide for listing, viewing, and deleting log sources"
292 |     )
293 |     async def manage_log_sources() -> str:
294 |         """
295 |         Guide for managing registered log sources.
296 |         """
297 |         return """
298 | # 📋 Manage Log Sources
299 | 
300 | ## Available Management Tools
301 | 
302 | ### List All Sources
303 | ```
304 | Tool: list_log_sources
305 | ```
306 | Shows all registered log sources with:
307 | - Source names and types
308 | - File paths
309 | - Registration timestamps
310 | - Parser configurations
311 | 
312 | ### Get Source Details
313 | ```
314 | Tool: get_log_source
315 | Parameters: name="source_name"
316 | ```
317 | Provides detailed information about a specific source:
318 | - Full configuration
319 | - Parser settings
320 | - Access status
321 | - Recent activity
322 | 
323 | ### Delete Log Source
324 | ```
325 | Tool: delete_log_source
326 | Parameters: name="source_name"
327 | ```
328 | Removes a log source registration:
329 | - Cleans up configuration
330 | - Does not delete actual log files
331 | - Frees up the source name
332 | 
333 | ## Management Best Practices
334 | 
335 | ### Organization
336 | - Use naming conventions (system_component_type)
337 | - Group related sources logically
338 | - Document source purposes
339 | - Regular cleanup of unused sources
340 | 
341 | ### Maintenance
342 | - Verify sources are still accessible
343 | - Update paths after log rotation
344 | - Remove obsolete sources
345 | - Monitor source performance
346 | 
347 | ### Examples
348 | ```
349 | # List all sources to review
350 | Tool: list_log_sources
351 | 
352 | # Check specific source status
353 | Tool: get_log_source
354 | Parameters: name="prod_app_logs"
355 | 
356 | # Remove old test source
357 | Tool: delete_log_source
358 | Parameters: name="test_logs_old"
359 | ```
360 | """
361 | 
362 |     @mcp.prompt(
363 |         title="Windows Event Log Setup",
364 |         description="Guide for setting up Windows Event Log sources"
365 |     )
366 |     async def windows_event_setup() -> str:
367 |         """
368 |         Guide for Windows Event Log configuration.
369 |         """
370 |         return """
371 | # 🪟 Windows Event Log Setup
372 | 
373 | ## Registering Windows Event Logs
374 | 
375 | ### System Event Log
376 | ```
377 | Tool: register_log_source
378 | Parameters: name="windows_system", source_type="evt", path="System"
379 | ```
380 | **Contains**: Hardware, drivers, system services, kernel events
381 | 
382 | ### Application Event Log
383 | ```
384 | Tool: register_log_source
385 | Parameters: name="windows_application", source_type="evt", path="Application"
386 | ```
387 | **Contains**: Application crashes, errors, informational events
388 | 
389 | ### Security Event Log
390 | ```
391 | Tool: register_log_source
392 | Parameters: name="windows_security", source_type="evt", path="Security"
393 | ```
394 | **Contains**: Authentication, authorization, audit events
395 | **Note**: Requires administrator privileges
396 | 
397 | ## Common Windows Queries
398 | 
399 | ### Recent System Errors
400 | ```
401 | Tool: query_logs
402 | Parameters: source_name="windows_system", filters={"level": "Error"}, start_time="24 hours ago"
403 | ```
404 | 
405 | ### Service Failures
406 | ```
407 | Tool: query_logs
408 | Parameters: source_name="windows_system", filters={"event_id": [7000, 7001, 7023]}
409 | ```
410 | 
411 | ### Application Crashes
412 | ```
413 | Tool: query_logs
414 | Parameters: source_name="windows_application", filters={"event_id": 1000}
415 | ```
416 | 
417 | ### Failed Logins (Security)
418 | ```
419 | Tool: query_logs
420 | Parameters: source_name="windows_security", filters={"event_id": 4625}
421 | ```
422 | 
423 | ## Prerequisites
424 | - Windows operating system
425 | - pywin32 package installed
426 | - Administrator rights for Security log
427 | - Appropriate Event Log permissions
428 | """
429 | 
430 |     @mcp.prompt(
431 |         title="Structured Log Setup",
432 |         description="Guide for JSON, XML, and CSV log sources"
433 |     )
434 |     async def structured_log_setup(
435 |         format_type: Optional[str] = None
436 |     ) -> str:
437 |         """
438 |         Guide for structured log formats.
439 |         
440 |         Args:
441 |             format_type: Log format type (json, xml, csv)
442 |         """
443 |         
444 |         base_guide = """
445 | # 📄 Structured Log Setup
446 | 
447 | ## Supported Structured Formats
448 | - **JSON**: JavaScript Object Notation logs
449 | - **XML**: Extensible Markup Language logs
450 | - **CSV**: Comma-Separated Values logs
451 | """
452 | 
453 |         format_guides = {
454 |             "json": """
455 | ### JSON Log Configuration
456 | ```
457 | Tool: register_log_source
458 | Parameters: 
459 |   name="app_json_logs"
460 |   source_type="json"
461 |   path="/var/log/app/application.json"
462 |   config={
463 |     "timestamp_field": "timestamp",
464 |     "level_field": "severity",
465 |     "message_field": "message"
466 |   }
467 | ```
468 | 
469 | **Example JSON Format:**
470 | ```json
471 | {
472 |   "timestamp": "2024-01-15T10:30:00Z",
473 |   "severity": "ERROR",
474 |   "message": "Database connection failed",
475 |   "component": "database",
476 |   "error_code": "DB_001"
477 | }
478 | ```
479 | 
480 | **Query Example:**
481 | ```
482 | Tool: query_logs
483 | Parameters: 
484 |   source_name="app_json_logs"
485 |   filters={"severity": "ERROR", "component": "database"}
486 | ```
487 | """,
488 |             "xml": """
489 | ### XML Log Configuration
490 | ```
491 | Tool: register_log_source
492 | Parameters:
493 |   name="app_xml_logs"
494 |   source_type="xml"
495 |   path="/var/log/app/events.xml"
496 |   config={
497 |     "root_element": "events",
498 |     "event_element": "event",
499 |     "timestamp_path": "event/timestamp",
500 |     "level_path": "event/level"
501 |   }
502 | ```
503 | 
504 | **Example XML Format:**
505 | ```xml
506 | <events>
507 |   <event>
508 |     <timestamp>2024-01-15T10:30:00Z</timestamp>
509 |     <level>ERROR</level>
510 |     <message>Service initialization failed</message>
511 |     <source>ServiceManager</source>
512 |   </event>
513 | </events>
514 | ```
515 | 
516 | **Query Example:**
517 | ```
518 | Tool: query_logs
519 | Parameters:
520 |   source_name="app_xml_logs"
521 |   filters={"level": "ERROR", "source": "ServiceManager"}
522 | ```
523 | """,
524 |             "csv": """
525 | ### CSV Log Configuration
526 | ```
527 | Tool: register_log_source
528 | Parameters:
529 |   name="app_csv_logs"
530 |   source_type="csv"
531 |   path="/var/log/app/metrics.csv"
532 |   config={
533 |     "delimiter": ",",
534 |     "has_header": true,
535 |     "timestamp_column": 0,
536 |     "level_column": 2,
537 |     "message_column": 3
538 |   }
539 | ```
540 | 
541 | **Example CSV Format:**
542 | ```csv
543 | timestamp,host,level,message,duration_ms
544 | 2024-01-15T10:30:00Z,server01,ERROR,Request timeout,5023
545 | 2024-01-15T10:30:01Z,server01,INFO,Request processed,245
546 | ```
547 | 
548 | **Query Example:**
549 | ```
550 | Tool: query_logs
551 | Parameters:
552 |   source_name="app_csv_logs"
553 |   filters={"level": "ERROR", "host": "server01"}
554 | ```
555 | """
556 |         }
557 | 
558 |         if format_type and format_type.lower() in format_guides:
559 |             base_guide += format_guides[format_type.lower()]
560 |         else:
561 |             for guide in format_guides.values():
562 |                 base_guide += guide
563 |         
564 |         base_guide += """
565 | ## Configuration Best Practices
566 | ✅ Specify field mappings clearly
567 | ✅ Use consistent timestamp formats
568 | ✅ Validate log format before registering
569 | ✅ Test queries after registration
570 | ✅ Document custom field meanings
571 | """
572 |         
573 |         return base_guide
574 | 
575 |     @mcp.prompt(
576 |         title="Text Log Setup",
577 |         description="Guide for plain text and custom format logs"
578 |     )
579 |     async def text_log_setup() -> str:
580 |         """
581 |         Guide for text-based log sources.
582 |         """
583 |         return """
584 | # 📝 Text Log Setup
585 | 
586 | ## Registering Text Logs
587 | 
588 | ### Basic Text Log
589 | ```
590 | Tool: register_log_source
591 | Parameters:
592 |   name="syslog"
593 |   source_type="text"
594 |   path="/var/log/syslog"
595 | ```
596 | 
597 | ### Custom Format Configuration
598 | ```
599 | Tool: register_log_source
600 | Parameters:
601 |   name="custom_app_log"
602 |   source_type="text"
603 |   path="/var/log/app/custom.log"
604 |   config={
605 |     "pattern": "(?P<timestamp>\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) \\[(?P<level>\\w+)\\] (?P<message>.*)",
606 |     "timestamp_format": "%Y-%m-%d %H:%M:%S"
607 |   }
608 | ```
609 | 
610 | ## Common Text Log Formats
611 | 
612 | ### Syslog Format
613 | ```
614 | Jan 15 10:30:45 hostname service[1234]: Error message here
615 | ```
616 | **Config**: Built-in syslog parser
617 | 
618 | ### Apache/Nginx Access Logs
619 | ```
620 | 192.168.1.1 - - [15/Jan/2024:10:30:45 +0000] "GET /api/data HTTP/1.1" 500 1234
621 | ```
622 | **Config**: Use pattern matching for fields
623 | 
624 | ### Application Logs
625 | ```
626 | 2024-01-15 10:30:45 [ERROR] [database] Connection pool exhausted
627 | ```
628 | **Config**: Define custom regex pattern
629 | 
630 | ## Query Examples
631 | 
632 | ### Search by Content
633 | ```
634 | Tool: query_logs
635 | Parameters:
636 |   source_name="syslog"
637 |   filters={"message_contains": "authentication failed"}
638 | ```
639 | 
640 | ### Filter by Pattern
641 | ```
642 | Tool: query_logs
643 | Parameters:
644 |   source_name="custom_app_log"
645 |   filters={"regex_pattern": "ERROR.*database.*timeout"}
646 | ```
647 | 
648 | ## Parsing Tips
649 | ✅ Test regex patterns before registering
650 | ✅ Use named capture groups for fields
651 | ✅ Handle multi-line log entries
652 | ✅ Consider log rotation handling
653 | ✅ Validate timestamp parsing
654 | """
655 | 
656 |     @mcp.prompt(
657 |         title="Log Analysis Workflow",
658 |         description="Step-by-step workflow for comprehensive log analysis"
659 |     )
660 |     async def log_analysis_workflow() -> str:
661 |         """
662 |         Complete workflow for log analysis tasks.
663 |         """
664 |         return """
665 | # 🔄 Log Analysis Workflow
666 | 
667 | ## Step-by-Step Analysis Process
668 | 
669 | ### 1. Setup Phase
670 | ```
671 | # Register your log sources
672 | Tool: register_log_source
673 | Parameters: [appropriate for your log type]
674 | 
675 | # Verify registration
676 | Tool: list_log_sources
677 | ```
678 | 
679 | ### 2. Initial Assessment
680 | ```
681 | # Get overview with summary analysis
682 | Tool: analyze_logs
683 | Parameters: source_name="your_source", analysis_type="summary"
684 | 
685 | # Check recent errors
686 | Tool: query_logs
687 | Parameters: source_name="your_source", filters={"level": "Error"}, start_time="6 hours ago"
688 | ```
689 | 
690 | ### 3. Deep Dive Investigation
691 | ```
692 | # Find patterns in errors
693 | Tool: analyze_logs
694 | Parameters: source_name="your_source", analysis_type="pattern", filters={"level": "Error"}
695 | 
696 | # Search for specific issues
697 | Tool: query_logs
698 | Parameters: source_name="your_source", filters={"message_contains": "specific error"}
699 | ```
700 | 
701 | ### 4. Anomaly Detection
702 | ```
703 | # Check for unusual activity
704 | Tool: analyze_logs
705 | Parameters: source_name="your_source", analysis_type="anomaly", time_duration="48h"
706 | ```
707 | 
708 | ### 5. Reporting
709 | - Document findings from summary analysis
710 | - List identified patterns and frequencies
711 | - Note any anomalies detected
712 | - Provide recommendations
713 | 
714 | ## Common Analysis Scenarios
715 | 
716 | ### Performance Investigation
717 | 1. Register application logs
718 | 2. Query for performance warnings
719 | 3. Analyze patterns in slow operations
720 | 4. Identify peak problem times
721 | 
722 | ### Security Audit
723 | 1. Register security/auth logs
724 | 2. Search for failed authentications
725 | 3. Detect anomalous access patterns
726 | 4. Review privilege escalations
727 | 
728 | ### Error Troubleshooting
729 | 1. Register relevant log sources
730 | 2. Filter by error level
731 | 3. Analyze error patterns
732 | 4. Correlate with system events
733 | 
734 | ### Capacity Planning
735 | 1. Analyze usage patterns over time
736 | 2. Identify growth trends
737 | 3. Find resource bottlenecks
738 | 4. Project future needs
739 | 
740 | ## Best Practices
741 | ✅ Always start with summary analysis
742 | ✅ Use time-based filters to focus investigation
743 | ✅ Combine multiple analysis types
744 | ✅ Document your findings
745 | ✅ Clean up test sources when done
746 | """
747 | 
748 |     @mcp.prompt(
749 |         title="Log Troubleshooting",
750 |         description="Troubleshooting common log analysis issues"
751 |     )
752 |     async def log_troubleshooting() -> str:
753 |         """
754 |         Troubleshooting guide for common issues.
755 |         """
756 |         return """
757 | # 🔧 Log Troubleshooting Guide
758 | 
759 | ## Registration Issues
760 | 
761 | ### "Log source already exists"
762 | **Solution:**
763 | 1. List existing sources: `list_log_sources`
764 | 2. Delete if needed: `delete_log_source`
765 | 3. Choose different name
766 | 
767 | ### "File not found"
768 | **Solution:**
769 | 1. Verify file path is correct
770 | 2. Check file permissions
771 | 3. Ensure path is absolute, not relative
772 | 4. Test file access with system tools
773 | 
774 | ### "Unsupported source type"
775 | **Solution:**
776 | - Valid types: evt, json, xml, csv, text
777 | - Check spelling and case
778 | - Use "text" for custom formats
779 | 
780 | ## Query Issues
781 | 
782 | ### "No logs returned"
783 | **Possible causes:**
784 | 1. Time range too restrictive
785 | 2. Filters excluding all data
786 | 3. Log source empty in time range
787 | 4. Parsing errors
788 | 
789 | **Solutions:**
790 | - Remove filters and try again
791 | - Expand time range
792 | - Check source has recent data
793 | - Verify log format matches parser
794 | 
795 | ### "Query timeout"
796 | **Solutions:**
797 | 1. Reduce time range
798 | 2. Add more specific filters
799 | 3. Use pagination (limit/offset)
800 | 4. Query smaller time windows
801 | 
802 | ### "Invalid filter format"
803 | **Solutions:**
804 | - Check filter field names
805 | - Verify filter syntax
806 | - Use correct data types
807 | - Test filters incrementally
808 | 
809 | ## Analysis Issues
810 | 
811 | ### "Analysis returns empty"
812 | **Check:**
813 | 1. Log source contains data
814 | 2. Time range includes logs
815 | 3. Filters not too restrictive
816 | 4. Analysis type is valid
817 | 
818 | ### "Pattern analysis finds nothing"
819 | **Solutions:**
820 | - Increase time range for more data
821 | - Ensure logs have patterns to find
822 | - Check log format consistency
823 | - Try different filter criteria
824 | 
825 | ### "Anomaly detection not working"
826 | **Requirements:**
827 | - Sufficient historical data
828 | - Consistent log format
829 | - Baseline period available
830 | - Varied log content
831 | 
832 | ## Performance Issues
833 | 
834 | ### Slow Queries
835 | - Use specific time ranges
836 | - Apply filters early
837 | - Limit result count
838 | - Index frequently searched fields
839 | 
840 | ### Large Log Files
841 | - Implement log rotation
842 | - Archive old logs
843 | - Use time-based queries
844 | - Consider partitioning
845 | 
846 | ### Memory Issues
847 | - Process in smaller chunks
848 | - Use streaming where possible
849 | - Limit concurrent queries
850 | - Monitor resource usage
851 | 
852 | ## Platform-Specific Issues
853 | 
854 | ### Windows
855 | - **pywin32 missing**: Install with pip
856 | - **Access denied**: Need admin rights
857 | - **Security log**: Requires elevation
858 | 
859 | ### Linux
860 | - **Permission denied**: Check file permissions
861 | - **Log rotation**: Handle rotated files
862 | - **Different paths**: Check distribution
863 | 
864 | ## Quick Fixes Checklist
865 | - [ ] Verify log source is registered
866 | - [ ] Check file permissions and access
867 | - [ ] Validate time ranges in queries
868 | - [ ] Test with minimal filters first
869 | - [ ] Ensure proper log format
870 | - [ ] Check system resources
871 | - [ ] Review error messages carefully
872 | """
```

--------------------------------------------------------------------------------
/src/mcp_log_analyzer/mcp_server/tools/process_test_tools.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Process monitoring and system resource testing MCP tools.
  3 | """
  4 | 
  5 | from typing import Any, Dict, List
  6 | 
  7 | import psutil
  8 | from mcp.server import FastMCP
  9 | from pydantic import BaseModel, Field
 10 | 
 11 | 
 12 | class ProcessAnalysisRequest(BaseModel):
 13 |     """Request model for process analysis."""
 14 | 
 15 |     process_name: str = Field(None, description="Specific process name to analyze")
 16 |     min_cpu_percent: float = Field(0.0, description="Minimum CPU usage threshold")
 17 |     min_memory_percent: float = Field(0.0, description="Minimum memory usage threshold")
 18 |     max_results: int = Field(20, description="Maximum number of processes to return")
 19 |     sort_by: str = Field("cpu", description="Sort by 'cpu', 'memory', or 'pid'")
 20 | 
 21 | 
 22 | class SystemResourceRequest(BaseModel):
 23 |     """Request model for system resource monitoring."""
 24 | 
 25 |     include_network: bool = Field(True, description="Include network statistics")
 26 |     include_disk: bool = Field(True, description="Include disk I/O statistics")
 27 |     sample_interval: float = Field(1.0, description="Sampling interval in seconds")
 28 | 
 29 | 
 30 | class ProcessMonitoringRequest(BaseModel):
 31 |     """Request model for process monitoring over time."""
 32 | 
 33 |     process_name: str = Field(..., description="Process name to monitor")
 34 |     duration_seconds: int = Field(60, description="Monitoring duration in seconds")
 35 |     sample_interval: float = Field(5.0, description="Sampling interval in seconds")
 36 | 
 37 | 
 38 | def register_process_test_tools(mcp: FastMCP):
 39 |     """Register all process testing tools with the MCP server."""
 40 | 
 41 |     @mcp.tool()
 42 |     async def test_system_resources_access() -> Dict[str, Any]:
 43 |         """
 44 |         Test system resource monitoring capabilities.
 45 | 
 46 |         This tool checks if the system can access various system
 47 |         resource information and provides diagnostic data.
 48 |         """
 49 |         try:
 50 |             test_results = {}
 51 | 
 52 |             # Test basic system info access
 53 |             try:
 54 |                 test_results["cpu"] = {
 55 |                     "accessible": True,
 56 |                     "cpu_count": psutil.cpu_count(),
 57 |                     "cpu_count_logical": psutil.cpu_count(logical=True),
 58 |                     "current_usage": psutil.cpu_percent(interval=0.1),
 59 |                 }
 60 |             except Exception as e:
 61 |                 test_results["cpu"] = {"accessible": False, "error": str(e)}
 62 | 
 63 |             # Test memory access
 64 |             try:
 65 |                 memory = psutil.virtual_memory()
 66 |                 test_results["memory"] = {
 67 |                     "accessible": True,
 68 |                     "total_gb": round(memory.total / (1024**3), 2),
 69 |                     "available_gb": round(memory.available / (1024**3), 2),
 70 |                     "percent_used": memory.percent,
 71 |                 }
 72 |             except Exception as e:
 73 |                 test_results["memory"] = {"accessible": False, "error": str(e)}
 74 | 
 75 |             # Test disk access
 76 |             try:
 77 |                 disk = psutil.disk_usage("/")
 78 |                 test_results["disk"] = {
 79 |                     "accessible": True,
 80 |                     "total_gb": round(disk.total / (1024**3), 2),
 81 |                     "used_gb": round(disk.used / (1024**3), 2),
 82 |                     "free_gb": round(disk.free / (1024**3), 2),
 83 |                     "percent_used": round((disk.used / disk.total) * 100, 1),
 84 |                 }
 85 |             except Exception as e:
 86 |                 test_results["disk"] = {"accessible": False, "error": str(e)}
 87 | 
 88 |             # Test network access
 89 |             try:
 90 |                 network = psutil.net_io_counters()
 91 |                 test_results["network"] = {
 92 |                     "accessible": True,
 93 |                     "bytes_sent": network.bytes_sent,
 94 |                     "bytes_recv": network.bytes_recv,
 95 |                     "packets_sent": network.packets_sent,
 96 |                     "packets_recv": network.packets_recv,
 97 |                 }
 98 |             except Exception as e:
 99 |                 test_results["network"] = {"accessible": False, "error": str(e)}
100 | 
101 |             # Test process enumeration
102 |             try:
103 |                 processes = list(psutil.process_iter(["pid", "name"]))
104 |                 test_results["processes"] = {
105 |                     "accessible": True,
106 |                     "total_count": len(processes),
107 |                     "sample_processes": [p.info for p in processes[:5]],
108 |                 }
109 |             except Exception as e:
110 |                 test_results["processes"] = {"accessible": False, "error": str(e)}
111 | 
112 |             return {
113 |                 "status": "completed",
114 |                 "psutil_version": psutil.__version__,
115 |                 "test_results": test_results,
116 |             }
117 | 
118 |         except Exception as e:
119 |             return {"error": f"Error testing system resources: {str(e)}"}
120 | 
121 |     @mcp.tool()
122 |     async def analyze_system_performance(
123 |         request: SystemResourceRequest,
124 |     ) -> Dict[str, Any]:
125 |         """
126 |         Analyze current system performance and resource usage.
127 | 
128 |         This tool provides a comprehensive analysis of system performance
129 |         including CPU, memory, disk, and network usage patterns.
130 |         """
131 |         try:
132 |             performance_data = {}
133 | 
134 |             # CPU Analysis
135 |             cpu_percent = psutil.cpu_percent(interval=request.sample_interval)
136 |             cpu_freq = psutil.cpu_freq()
137 |             performance_data["cpu"] = {
138 |                 "usage_percent": cpu_percent,
139 |                 "core_count": psutil.cpu_count(),
140 |                 "logical_core_count": psutil.cpu_count(logical=True),
141 |                 "frequency": {
142 |                     "current": cpu_freq.current if cpu_freq else None,
143 |                     "min": cpu_freq.min if cpu_freq else None,
144 |                     "max": cpu_freq.max if cpu_freq else None,
145 |                 },
146 |                 "load_average": (
147 |                     psutil.getloadavg() if hasattr(psutil, "getloadavg") else None
148 |                 ),
149 |             }
150 | 
151 |             # Memory Analysis
152 |             memory = psutil.virtual_memory()
153 |             swap = psutil.swap_memory()
154 |             performance_data["memory"] = {
155 |                 "virtual": {
156 |                     "total_gb": round(memory.total / (1024**3), 2),
157 |                     "available_gb": round(memory.available / (1024**3), 2),
158 |                     "used_gb": round(memory.used / (1024**3), 2),
159 |                     "percent_used": memory.percent,
160 |                 },
161 |                 "swap": {
162 |                     "total_gb": round(swap.total / (1024**3), 2),
163 |                     "used_gb": round(swap.used / (1024**3), 2),
164 |                     "percent_used": swap.percent,
165 |                 },
166 |             }
167 | 
168 |             # Disk Analysis
169 |             if request.include_disk:
170 |                 disk_usage = psutil.disk_usage("/")
171 |                 disk_io = psutil.disk_io_counters()
172 |                 performance_data["disk"] = {
173 |                     "usage": {
174 |                         "total_gb": round(disk_usage.total / (1024**3), 2),
175 |                         "used_gb": round(disk_usage.used / (1024**3), 2),
176 |                         "free_gb": round(disk_usage.free / (1024**3), 2),
177 |                         "percent_used": round(
178 |                             (disk_usage.used / disk_usage.total) * 100, 1
179 |                         ),
180 |                     },
181 |                     "io_counters": (
182 |                         {
183 |                             "read_bytes": disk_io.read_bytes if disk_io else None,
184 |                             "write_bytes": disk_io.write_bytes if disk_io else None,
185 |                             "read_count": disk_io.read_count if disk_io else None,
186 |                             "write_count": disk_io.write_count if disk_io else None,
187 |                         }
188 |                         if disk_io
189 |                         else None
190 |                     ),
191 |                 }
192 | 
193 |             # Network Analysis
194 |             if request.include_network:
195 |                 net_io = psutil.net_io_counters()
196 |                 net_connections = len(psutil.net_connections())
197 |                 performance_data["network"] = {
198 |                     "io_counters": (
199 |                         {
200 |                             "bytes_sent": net_io.bytes_sent if net_io else None,
201 |                             "bytes_recv": net_io.bytes_recv if net_io else None,
202 |                             "packets_sent": net_io.packets_sent if net_io else None,
203 |                             "packets_recv": net_io.packets_recv if net_io else None,
204 |                         }
205 |                         if net_io
206 |                         else None
207 |                     ),
208 |                     "active_connections": net_connections,
209 |                 }
210 | 
211 |             # Performance Assessment
212 |             performance_status = "good"
213 |             issues = []
214 | 
215 |             if cpu_percent > 80:
216 |                 performance_status = "concerning"
217 |                 issues.append(f"High CPU usage: {cpu_percent}%")
218 |             elif cpu_percent > 60:
219 |                 performance_status = "fair"
220 |                 issues.append(f"Moderate CPU usage: {cpu_percent}%")
221 | 
222 |             if memory.percent > 90:
223 |                 performance_status = "concerning"
224 |                 issues.append(f"High memory usage: {memory.percent}%")
225 |             elif memory.percent > 75:
226 |                 if performance_status == "good":
227 |                     performance_status = "fair"
228 |                 issues.append(f"Moderate memory usage: {memory.percent}%")
229 | 
230 |             return {
231 |                 "performance_status": performance_status,
232 |                 "issues": issues,
233 |                 "performance_data": performance_data,
234 |                 "sampling_interval": request.sample_interval,
235 |             }
236 | 
237 |         except Exception as e:
238 |             return {"error": f"Error analyzing system performance: {str(e)}"}
239 | 
240 |     @mcp.tool()
241 |     async def find_resource_intensive_processes(
242 |         request: ProcessAnalysisRequest,
243 |     ) -> Dict[str, Any]:
244 |         """
245 |         Find processes that are consuming significant system resources.
246 | 
247 |         This tool identifies processes with high CPU or memory usage
248 |         and provides detailed information for troubleshooting.
249 |         """
250 |         try:
251 |             processes = []
252 | 
253 |             # Collect process information
254 |             for proc in psutil.process_iter(
255 |                 [
256 |                     "pid",
257 |                     "name",
258 |                     "cpu_percent",
259 |                     "memory_percent",
260 |                     "memory_info",
261 |                     "create_time",
262 |                     "status",
263 |                     "cmdline",
264 |                 ]
265 |             ):
266 |                 try:
267 |                     proc_info = proc.info
268 | 
269 |                     # Get CPU percentage with brief interval
270 |                     if proc_info["cpu_percent"] is None:
271 |                         proc_info["cpu_percent"] = proc.cpu_percent(interval=0.1)
272 | 
273 |                     # Apply filters
274 |                     if (
275 |                         request.process_name
276 |                         and request.process_name.lower()
277 |                         not in proc_info["name"].lower()
278 |                     ):
279 |                         continue
280 | 
281 |                     if proc_info["cpu_percent"] < request.min_cpu_percent:
282 |                         continue
283 | 
284 |                     if proc_info["memory_percent"] < request.min_memory_percent:
285 |                         continue
286 | 
287 |                     # Add additional details
288 |                     proc_info["memory_mb"] = (
289 |                         round(proc_info["memory_info"].rss / (1024 * 1024), 1)
290 |                         if proc_info["memory_info"]
291 |                         else 0
292 |                     )
293 |                     proc_info["command_line"] = (
294 |                         " ".join(proc_info["cmdline"][:3])
295 |                         if proc_info["cmdline"]
296 |                         else ""
297 |                     )
298 | 
299 |                     processes.append(proc_info)
300 | 
301 |                 except (psutil.NoSuchProcess, psutil.AccessDenied):
302 |                     continue
303 | 
304 |             # Sort processes
305 |             if request.sort_by == "cpu":
306 |                 processes.sort(key=lambda x: x.get("cpu_percent", 0), reverse=True)
307 |             elif request.sort_by == "memory":
308 |                 processes.sort(key=lambda x: x.get("memory_percent", 0), reverse=True)
309 |             elif request.sort_by == "pid":
310 |                 processes.sort(key=lambda x: x.get("pid", 0))
311 | 
312 |             # Limit results
313 |             limited_processes = processes[: request.max_results]
314 | 
315 |             # Calculate summary statistics
316 |             if processes:
317 |                 total_cpu = sum(p.get("cpu_percent", 0) for p in processes)
318 |                 total_memory = sum(p.get("memory_percent", 0) for p in processes)
319 |                 avg_cpu = total_cpu / len(processes)
320 |                 avg_memory = total_memory / len(processes)
321 |             else:
322 |                 total_cpu = avg_cpu = total_memory = avg_memory = 0
323 | 
324 |             return {
325 |                 "search_criteria": {
326 |                     "process_name": request.process_name,
327 |                     "min_cpu_percent": request.min_cpu_percent,
328 |                     "min_memory_percent": request.min_memory_percent,
329 |                     "sort_by": request.sort_by,
330 |                 },
331 |                 "processes": limited_processes,
332 |                 "summary": {
333 |                     "total_matching": len(processes),
334 |                     "returned_count": len(limited_processes),
335 |                     "total_cpu_usage": round(total_cpu, 1),
336 |                     "total_memory_usage": round(total_memory, 1),
337 |                     "average_cpu_usage": round(avg_cpu, 1),
338 |                     "average_memory_usage": round(avg_memory, 1),
339 |                 },
340 |             }
341 | 
342 |         except Exception as e:
343 |             return {"error": f"Error finding resource intensive processes: {str(e)}"}
344 | 
345 |     @mcp.tool()
346 |     async def monitor_process_health(process_name: str) -> Dict[str, Any]:
347 |         """
348 |         Monitor the health and status of a specific process.
349 | 
350 |         This tool provides detailed information about a specific process
351 |         including resource usage, status, and potential issues.
352 |         """
353 |         try:
354 |             matching_processes = []
355 | 
356 |             # Find all processes matching the name
357 |             for proc in psutil.process_iter(
358 |                 [
359 |                     "pid",
360 |                     "name",
361 |                     "cpu_percent",
362 |                     "memory_percent",
363 |                     "memory_info",
364 |                     "create_time",
365 |                     "status",
366 |                     "cmdline",
367 |                     "num_threads",
368 |                     "connections",
369 |                 ]
370 |             ):
371 |                 try:
372 |                     if process_name.lower() in proc.info["name"].lower():
373 |                         proc_info = proc.info.copy()
374 | 
375 |                         # Get current CPU usage
376 |                         proc_info["current_cpu"] = proc.cpu_percent(interval=0.1)
377 | 
378 |                         # Add memory in MB
379 |                         proc_info["memory_mb"] = (
380 |                             round(proc_info["memory_info"].rss / (1024 * 1024), 1)
381 |                             if proc_info["memory_info"]
382 |                             else 0
383 |                         )
384 | 
385 |                         # Get process age
386 |                         from datetime import datetime
387 | 
388 |                         create_time = datetime.fromtimestamp(proc_info["create_time"])
389 |                         proc_info["age"] = str(datetime.now() - create_time).split(".")[
390 |                             0
391 |                         ]
392 | 
393 |                         # Count network connections
394 |                         try:
395 |                             connections = proc.connections()
396 |                             proc_info["network_connections"] = len(connections)
397 |                         except (psutil.AccessDenied, psutil.NoSuchProcess):
398 |                             proc_info["network_connections"] = "Access denied"
399 | 
400 |                         matching_processes.append(proc_info)
401 | 
402 |                 except (psutil.NoSuchProcess, psutil.AccessDenied):
403 |                     continue
404 | 
405 |             if not matching_processes:
406 |                 return {
407 |                     "process_name": process_name,
408 |                     "found": False,
409 |                     "message": f"No processes found matching '{process_name}'",
410 |                 }
411 | 
412 |             # Health assessment
413 |             health_issues = []
414 |             total_cpu = sum(p.get("current_cpu", 0) for p in matching_processes)
415 |             total_memory = sum(p.get("memory_percent", 0) for p in matching_processes)
416 | 
417 |             if total_cpu > 50:
418 |                 health_issues.append(f"High CPU usage: {total_cpu:.1f}%")
419 |             if total_memory > 20:
420 |                 health_issues.append(f"High memory usage: {total_memory:.1f}%")
421 | 
422 |             # Check for multiple instances
423 |             if len(matching_processes) > 1:
424 |                 health_issues.append(
425 |                     f"Multiple instances running: {len(matching_processes)}"
426 |                 )
427 | 
428 |             health_status = "healthy" if not health_issues else "issues_detected"
429 | 
430 |             return {
431 |                 "process_name": process_name,
432 |                 "found": True,
433 |                 "health_status": health_status,
434 |                 "health_issues": health_issues,
435 |                 "process_count": len(matching_processes),
436 |                 "processes": matching_processes,
437 |                 "summary": {
438 |                     "total_cpu_usage": round(total_cpu, 1),
439 |                     "total_memory_usage": round(total_memory, 1),
440 |                     "total_memory_mb": sum(
441 |                         p.get("memory_mb", 0) for p in matching_processes
442 |                     ),
443 |                 },
444 |             }
445 | 
446 |         except Exception as e:
447 |             return {"error": f"Error monitoring process health: {str(e)}"}
448 | 
449 |     @mcp.tool()
450 |     async def get_system_health_summary() -> Dict[str, Any]:
451 |         """
452 |         Get overall system health summary.
453 | 
454 |         This tool provides a comprehensive overview of system health
455 |         including resource usage, top processes, and potential issues.
456 |         """
457 |         try:
458 |             from datetime import datetime
459 | 
460 |             # System resource summary
461 |             cpu_percent = psutil.cpu_percent(interval=1.0)
462 |             memory = psutil.virtual_memory()
463 |             disk = psutil.disk_usage("/")
464 | 
465 |             # Get top processes by CPU and memory
466 |             processes = []
467 |             for proc in psutil.process_iter(
468 |                 ["pid", "name", "cpu_percent", "memory_percent"]
469 |             ):
470 |                 try:
471 |                     proc_info = proc.info
472 |                     if proc_info["cpu_percent"] is None:
473 |                         proc_info["cpu_percent"] = proc.cpu_percent(interval=0.1)
474 |                     processes.append(proc_info)
475 |                 except (psutil.NoSuchProcess, psutil.AccessDenied):
476 |                     continue
477 | 
478 |             # Top CPU consumers
479 |             top_cpu = sorted(
480 |                 processes, key=lambda x: x.get("cpu_percent", 0), reverse=True
481 |             )[:5]
482 | 
483 |             # Top memory consumers
484 |             top_memory = sorted(
485 |                 processes, key=lambda x: x.get("memory_percent", 0), reverse=True
486 |             )[:5]
487 | 
488 |             # Health assessment
489 |             health_score = 100
490 |             issues = []
491 | 
492 |             if cpu_percent > 80:
493 |                 health_score -= 30
494 |                 issues.append(f"High CPU usage: {cpu_percent}%")
495 |             elif cpu_percent > 60:
496 |                 health_score -= 15
497 |                 issues.append(f"Moderate CPU usage: {cpu_percent}%")
498 | 
499 |             if memory.percent > 90:
500 |                 health_score -= 25
501 |                 issues.append(f"High memory usage: {memory.percent}%")
502 |             elif memory.percent > 75:
503 |                 health_score -= 10
504 |                 issues.append(f"Moderate memory usage: {memory.percent}%")
505 | 
506 |             disk_percent = (disk.used / disk.total) * 100
507 |             if disk_percent > 90:
508 |                 health_score -= 20
509 |                 issues.append(f"High disk usage: {disk_percent:.1f}%")
510 |             elif disk_percent > 80:
511 |                 health_score -= 10
512 |                 issues.append(f"Moderate disk usage: {disk_percent:.1f}%")
513 | 
514 |             # Determine overall health status
515 |             if health_score >= 80:
516 |                 health_status = "excellent"
517 |             elif health_score >= 60:
518 |                 health_status = "good"
519 |             elif health_score >= 40:
520 |                 health_status = "fair"
521 |             else:
522 |                 health_status = "poor"
523 | 
524 |             return {
525 |                 "health_status": health_status,
526 |                 "health_score": max(0, health_score),
527 |                 "issues": issues,
528 |                 "system_resources": {
529 |                     "cpu_usage_percent": cpu_percent,
530 |                     "memory_usage_percent": memory.percent,
531 |                     "disk_usage_percent": round(disk_percent, 1),
532 |                     "process_count": len(processes),
533 |                 },
534 |                 "top_processes": {
535 |                     "cpu_consumers": top_cpu,
536 |                     "memory_consumers": top_memory,
537 |                 },
538 |                 "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
539 |             }
540 | 
541 |         except Exception as e:
542 |             return {"error": f"Error getting system health summary: {str(e)}"}
543 | 
```

--------------------------------------------------------------------------------
/src/mcp_log_analyzer/parsers/etl_cached_parser.py:
--------------------------------------------------------------------------------

```python
  1 | """ETL parser with CSV caching to avoid repeated conversions."""
  2 | 
  3 | import csv
  4 | import hashlib
  5 | import json
  6 | import logging
  7 | import os
  8 | import platform
  9 | import subprocess
 10 | import tempfile
 11 | from datetime import datetime
 12 | from pathlib import Path
 13 | from typing import Any, Dict, Iterator, List, Optional, Union
 14 | 
 15 | from ..core.models import LogRecord, LogSource, LogType
 16 | from .base import BaseParser
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | class EtlCachedParser(BaseParser):
 22 |     """ETL parser that caches CSV conversions for performance."""
 23 | 
 24 |     # Class-level cache directory
 25 |     _cache_dir: Optional[str] = None
 26 |     _cache_registry: Dict[str, Dict[str, Any]] = {}  # Maps ETL file paths to cached CSV paths
 27 |     _conversion_locks: Dict[str, Any] = {}  # Prevents concurrent conversions of same file
 28 | 
 29 |     def __init__(self, config: Optional[Dict[str, Any]] = None):
 30 |         """Initialize ETL cached parser.
 31 | 
 32 |         Args:
 33 |             config: Parser configuration.
 34 |         """
 35 |         super().__init__(config)
 36 |         self.tracerpt_path = self._find_tracerpt()
 37 |         self._init_cache_dir()
 38 | 
 39 |     @classmethod
 40 |     def _init_cache_dir(cls) -> None:
 41 |         """Initialize the cache directory if not already done."""
 42 |         if cls._cache_dir is None:
 43 |             # Create cache directory in temp
 44 |             cls._cache_dir = os.path.join(tempfile.gettempdir(), "mcp_etl_cache")
 45 |             os.makedirs(cls._cache_dir, exist_ok=True)
 46 | 
 47 |             # Load cache registry if it exists
 48 |             registry_file = os.path.join(cls._cache_dir, "cache_registry.json")
 49 |             if os.path.exists(registry_file):
 50 |                 try:
 51 |                     with open(registry_file, "r") as f:
 52 |                         cls._cache_registry = json.load(f)
 53 |                     # Clean up stale entries
 54 |                     cls._cleanup_stale_cache()
 55 |                 except Exception:
 56 |                     cls._cache_registry = {}
 57 | 
 58 |     @classmethod
 59 |     def _save_cache_registry(cls) -> None:
 60 |         """Save the cache registry to disk."""
 61 |         if cls._cache_dir is None:
 62 |             return
 63 |         registry_file = os.path.join(cls._cache_dir, "cache_registry.json")
 64 |         try:
 65 |             with open(registry_file, "w") as f:
 66 |                 json.dump(cls._cache_registry, f, indent=2)
 67 |         except Exception as e:
 68 |             logger.error(f"Failed to save cache registry: {e}")
 69 | 
 70 |     @classmethod
 71 |     def _cleanup_stale_cache(cls) -> None:
 72 |         """Remove cache entries for files that no longer exist."""
 73 |         stale_entries = []
 74 |         for normalized_path, cache_info in cls._cache_registry.items():
 75 |             # Check if the CSV file still exists
 76 |             csv_exists = os.path.exists(cache_info.get("csv_path", ""))
 77 |             
 78 |             # For ETL file, try to check if it exists (normalized path might not be exact)
 79 |             # Just check if CSV is missing, since ETL path might have changed
 80 |             if not csv_exists:
 81 |                 stale_entries.append(normalized_path)
 82 |                 
 83 |         for entry in stale_entries:
 84 |             del cls._cache_registry[entry]
 85 | 
 86 |         if stale_entries:
 87 |             cls._save_cache_registry()
 88 | 
 89 |     def _find_tracerpt(self) -> Optional[str]:
 90 |         """Find tracerpt.exe on the system."""
 91 |         if platform.system() != "Windows":
 92 |             return None
 93 | 
 94 |         # Common locations for tracerpt.exe
 95 |         possible_paths = [
 96 |             r"C:\Windows\System32\tracerpt.exe",
 97 |             r"C:\Windows\SysWOW64\tracerpt.exe",
 98 |         ]
 99 | 
100 |         for path in possible_paths:
101 |             if os.path.exists(path):
102 |                 return path
103 | 
104 |         # Try to find it in PATH
105 |         try:
106 |             result = subprocess.run(
107 |                 ["where", "tracerpt.exe"], capture_output=True, text=True, check=False
108 |             )
109 |             if result.returncode == 0 and result.stdout.strip():
110 |                 return result.stdout.strip().split("\n")[0]
111 |         except Exception:
112 |             pass
113 | 
114 |         return None
115 | 
116 |     def is_available(self) -> bool:
117 |         """Check if ETL parsing is available."""
118 |         return self.tracerpt_path is not None
119 | 
120 |     def _get_cache_key(self, file_path: str) -> str:
121 |         """Generate a cache key for an ETL file based on path and size."""
122 |         path = Path(file_path)
123 |         # Normalize the path to ensure consistency
124 |         normalized_path = str(path.resolve()).lower()
125 |         stat = path.stat()
126 |         # Include normalized file path and size in key (not mtime to preserve cache)
127 |         key_data = f"{normalized_path}|{stat.st_size}"
128 |         return hashlib.md5(key_data.encode()).hexdigest()
129 | 
130 |     def _get_cached_csv(self, file_path: str) -> Optional[str]:
131 |         """Get cached CSV path if it exists and is valid."""
132 |         # Normalize the path to match how we store in registry
133 |         normalized_path = str(Path(file_path).resolve()).lower()
134 |         
135 |         if normalized_path not in self._cache_registry:
136 |             return None
137 | 
138 |         cache_info = self._cache_registry[normalized_path]
139 |         cache_key = self._get_cache_key(file_path)
140 | 
141 |         # Check if cache is still valid
142 |         if cache_info.get("cache_key") != cache_key:
143 |             # File has changed, invalidate cache
144 |             logger.info(f"ETL file has changed, invalidating cache for {file_path}")
145 |             self._remove_cache_entry(file_path)
146 |             return None
147 | 
148 |         csv_path = cache_info.get("csv_path")
149 |         if csv_path and os.path.exists(csv_path):
150 |             logger.info(f"Using cached CSV for {file_path}: {csv_path}")
151 |             return str(csv_path)
152 | 
153 |         # CSV file missing, remove entry
154 |         self._remove_cache_entry(file_path)
155 |         return None
156 | 
157 |     def _remove_cache_entry(self, file_path: str) -> None:
158 |         """Remove a cache entry and its CSV file."""
159 |         # Normalize the path to match how we store in registry
160 |         normalized_path = str(Path(file_path).resolve()).lower()
161 |         
162 |         if normalized_path in self._cache_registry:
163 |             cache_info = self._cache_registry[normalized_path]
164 |             csv_path = cache_info.get("csv_path")
165 |             if csv_path and os.path.exists(csv_path):
166 |                 try:
167 |                     os.remove(csv_path)
168 |                     logger.info(f"Removed cached CSV: {csv_path}")
169 |                 except Exception as e:
170 |                     logger.error(f"Failed to remove cached CSV: {e}")
171 |             del self._cache_registry[normalized_path]
172 |             self._save_cache_registry()
173 | 
174 |     def _convert_etl_to_csv_sync(self, etl_path: str) -> str:
175 |         """Convert ETL to CSV using tracerpt, with locking to prevent concurrent conversions."""
176 |         import threading
177 | 
178 |         # Use threading lock to prevent concurrent conversions of same file
179 |         if etl_path not in self._conversion_locks:
180 |             self._conversion_locks[etl_path] = threading.Lock()
181 | 
182 |         with self._conversion_locks[etl_path]:
183 |             # Check again if CSV was created while waiting for lock
184 |             cached_csv = self._get_cached_csv(etl_path)
185 |             if cached_csv:
186 |                 return cached_csv
187 | 
188 |             # Generate output filename
189 |             cache_key = self._get_cache_key(etl_path)
190 |             csv_filename = f"etl_{cache_key}.csv"
191 |             csv_path = os.path.join(self._cache_dir or tempfile.gettempdir(), csv_filename)
192 | 
193 |             # Check if the CSV file already exists in cache directory (missed by registry)
194 |             if os.path.exists(csv_path):
195 |                 logger.info(f"Found existing CSV file (missed by registry): {csv_path}")
196 |                 # Update cache registry with normalized path
197 |                 normalized_path = str(Path(etl_path).resolve()).lower()
198 |                 file_size_mb = Path(etl_path).stat().st_size / (1024 * 1024)
199 |                 self._cache_registry[normalized_path] = {
200 |                     "csv_path": csv_path,
201 |                     "cache_key": cache_key,
202 |                     "converted_at": datetime.now().isoformat(),
203 |                     "file_size_mb": file_size_mb,
204 |                     "conversion_duration_s": 0,  # Unknown
205 |                 }
206 |                 self._save_cache_registry()
207 |                 return csv_path
208 | 
209 |             logger.info(f"Converting ETL to CSV: {etl_path} -> {csv_path}")
210 | 
211 |             # Get file size for logging
212 |             file_size_mb = Path(etl_path).stat().st_size / (1024 * 1024)
213 |             logger.info(f"ETL file size: {file_size_mb:.1f} MB")
214 | 
215 |             # Run tracerpt
216 |             if self.tracerpt_path is None:
217 |                 raise RuntimeError("tracerpt.exe not found")
218 |             cmd = [
219 |                 self.tracerpt_path,
220 |                 etl_path,
221 |                 "-o",
222 |                 csv_path,
223 |                 "-of",
224 |                 "CSV",
225 |                 "-y",  # Overwrite without prompting
226 |                 "-lr",  # Less restrictive; attempt to process badly-formed events
227 |             ]
228 | 
229 |             start_time = datetime.now()
230 |             logger.info(f"Starting tracerpt conversion at {start_time}")
231 |             logger.info(f"Converting ETL file: {etl_path}")
232 |             logger.info(f"Output CSV: {csv_path}")
233 | 
234 |             try:
235 |                 # Start tracerpt process
236 |                 import threading
237 |                 import time
238 | 
239 |                 process = subprocess.Popen(
240 |                     cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
241 |                 )
242 | 
243 |                 # Monitor thread for progress updates
244 |                 def monitor_conversion() -> None:
245 |                     elapsed = 0
246 |                     while process.poll() is None:  # While process is running
247 |                         time.sleep(30)  # Check every 30 seconds
248 |                         elapsed += 30
249 |                         if os.path.exists(csv_path):
250 |                             try:
251 |                                 csv_size_mb = os.path.getsize(csv_path) / (1024 * 1024)
252 |                                 logger.info(
253 |                                     f"ETL conversion in progress... {elapsed}s elapsed, CSV size: {csv_size_mb:.1f} MB"
254 |                                 )
255 |                             except Exception:
256 |                                 logger.info(
257 |                                     f"ETL conversion in progress... {elapsed}s elapsed"
258 |                                 )
259 |                         else:
260 |                             logger.info(
261 |                                 f"ETL conversion in progress... {elapsed}s elapsed, waiting for CSV creation..."
262 |                             )
263 | 
264 |                 # Start monitoring in background thread
265 |                 monitor_thread = threading.Thread(
266 |                     target=monitor_conversion, daemon=True
267 |                 )
268 |                 monitor_thread.start()
269 | 
270 |                 try:
271 |                     # Wait for process to complete with timeout
272 |                     stdout, stderr = process.communicate(
273 |                         timeout=600
274 |                     )  # 10 minute timeout
275 | 
276 |                     if process.returncode != 0:
277 |                         raise RuntimeError(
278 |                             f"tracerpt failed with code {process.returncode}: {stderr}"
279 |                         )
280 | 
281 |                 except subprocess.TimeoutExpired:
282 |                     # Kill the process if it times out
283 |                     process.terminate()
284 |                     try:
285 |                         process.wait(timeout=5)
286 |                     except subprocess.TimeoutExpired:
287 |                         process.kill()
288 |                     raise RuntimeError("tracerpt conversion timed out after 10 minutes")
289 | 
290 |                 end_time = datetime.now()
291 |                 duration = (end_time - start_time).total_seconds()
292 |                 logger.info(f"Tracerpt completed in {duration:.1f} seconds")
293 | 
294 |                 if process.returncode != 0:
295 |                     raise RuntimeError(
296 |                         f"tracerpt failed with code {process.returncode}: {stderr}"
297 |                     )
298 | 
299 |                 # Verify CSV was created
300 |                 if not os.path.exists(csv_path):
301 |                     raise RuntimeError("tracerpt completed but produced no output file")
302 | 
303 |                 # Update cache registry with normalized path
304 |                 normalized_path = str(Path(etl_path).resolve()).lower()
305 |                 self._cache_registry[normalized_path] = {
306 |                     "csv_path": csv_path,
307 |                     "cache_key": cache_key,
308 |                     "converted_at": datetime.now().isoformat(),
309 |                     "file_size_mb": file_size_mb,
310 |                     "conversion_duration_s": duration,
311 |                 }
312 |                 self._save_cache_registry()
313 | 
314 |                 logger.info(f"Successfully cached ETL conversion: {csv_path}")
315 |                 return csv_path
316 | 
317 |             except subprocess.TimeoutExpired:
318 |                 raise RuntimeError("tracerpt conversion timed out after 10 minutes")
319 |             except Exception as e:
320 |                 # Clean up partial file if it exists
321 |                 if os.path.exists(csv_path):
322 |                     try:
323 |                         os.remove(csv_path)
324 |                     except Exception:
325 |                         pass
326 |                 raise
327 | 
328 |     def parse_file(
329 |         self, source: LogSource, file_path: Union[str, Path]
330 |     ) -> Iterator[LogRecord]:
331 |         """Parse ETL log records from a file using cached CSV.
332 | 
333 |         Args:
334 |             source: The log source information.
335 |             file_path: Path to the ETL file.
336 | 
337 |         Yields:
338 |             LogRecord objects parsed from the ETL file.
339 |         """
340 |         if not self.is_available():
341 |             raise RuntimeError(
342 |                 "Windows ETL parsing is not available. tracerpt.exe not found."
343 |             )
344 | 
345 |         path = str(Path(file_path))
346 |         if not os.path.exists(path):
347 |             raise FileNotFoundError(f"ETL file not found: {file_path}")
348 | 
349 |         # Convert to CSV (cached)
350 |         csv_path = self._convert_etl_to_csv_sync(path)
351 | 
352 |         # Parse CSV file
353 |         yield from self._parse_csv_file(source, csv_path)
354 | 
355 |     def _parse_csv_file(
356 |         self, source: LogSource, csv_path: str, limit: int = 10000, offset: int = 0
357 |     ) -> Iterator[LogRecord]:
358 |         """Parse records from the cached CSV file.
359 | 
360 |         Args:
361 |             source: The log source information.
362 |             csv_path: Path to the CSV file.
363 |             limit: Maximum number of records to yield.
364 |             offset: Number of records to skip.
365 | 
366 |         Yields:
367 |             LogRecord objects.
368 |         """
369 |         records_yielded = 0
370 |         records_skipped = 0
371 | 
372 |         with open(csv_path, "r", encoding="utf-8", errors="ignore") as f:
373 |             reader = csv.DictReader(f)
374 | 
375 |             for row_num, row in enumerate(reader):
376 |                 # Handle offset
377 |                 if records_skipped < offset:
378 |                     records_skipped += 1
379 |                     continue
380 | 
381 |                 # Convert and yield record
382 |                 log_record = self._convert_csv_row(source, row)
383 |                 if log_record:
384 |                     yield log_record
385 |                     records_yielded += 1
386 | 
387 |                     # Check limit
388 |                     if records_yielded >= limit:
389 |                         break
390 | 
391 |     def _convert_csv_row(
392 |         self, source: LogSource, row: Dict[str, str]
393 |     ) -> Optional[LogRecord]:
394 |         """Convert a CSV row from tracerpt to a LogRecord.
395 | 
396 |         Args:
397 |             source: The log source information.
398 |             row: CSV row dictionary.
399 | 
400 |         Returns:
401 |             LogRecord or None if conversion fails.
402 |         """
403 |         try:
404 |             # Clean up field names (remove alignment underscores)
405 |             clean_data = {}
406 | 
407 |             for key, value in row.items():
408 |                 if key and value:
409 |                     # Remove leading/trailing underscores and spaces
410 |                     clean_key = key.strip().strip("_").lower().replace(" ", "_")
411 |                     clean_value = value.strip()
412 |                     if clean_key and clean_value:
413 |                         clean_data[clean_key] = clean_value
414 | 
415 |             # Try to parse timestamp from clock_time
416 |             timestamp = None
417 |             if "clock_time" in clean_data:
418 |                 # Clock time is in Windows FILETIME format (100-nanosecond intervals since 1601)
419 |                 try:
420 |                     filetime = int(clean_data["clock_time"])
421 |                     # Convert to Unix timestamp
422 |                     unix_timestamp = (filetime - 116444736000000000) / 10000000.0
423 |                     timestamp = datetime.fromtimestamp(unix_timestamp)
424 |                 except Exception:
425 |                     pass
426 | 
427 |             return LogRecord(
428 |                 source_id=source.id,
429 |                 timestamp=timestamp,
430 |                 data=clean_data,
431 |             )
432 | 
433 |         except Exception as e:
434 |             if self.config.get("verbose", False):
435 |                 logger.error(f"Failed to convert CSV row: {e}")
436 |             return None
437 | 
438 |     def parse(
439 |         self,
440 |         path: str,
441 |         filters: Optional[Dict[str, Any]] = None,
442 |         start_time: Optional[datetime] = None,
443 |         end_time: Optional[datetime] = None,
444 |         limit: int = 1000,
445 |         offset: int = 0,
446 |     ) -> List[LogRecord]:
447 |         """Parse ETL file with filtering and pagination using cache.
448 | 
449 |         Args:
450 |             path: Path to the ETL file.
451 |             filters: Optional filters to apply.
452 |             start_time: Optional start time filter.
453 |             end_time: Optional end time filter.
454 |             limit: Maximum number of records to return.
455 |             offset: Number of records to skip.
456 | 
457 |         Returns:
458 |             List of LogRecord objects.
459 |         """
460 |         # Create a temporary log source for parsing
461 |         temp_source = LogSource(
462 |             name="temp_etl", type=LogType.ETL, path=path, metadata={}
463 |         )
464 | 
465 |         records: List[LogRecord] = []
466 | 
467 |         for record in self.parse_file(temp_source, path):
468 |             # Apply time filters
469 |             if start_time and record.timestamp and record.timestamp < start_time:
470 |                 continue
471 |             if end_time and record.timestamp and record.timestamp > end_time:
472 |                 continue
473 | 
474 |             # Apply custom filters
475 |             if filters:
476 |                 if not self._match_filters(record, filters):
477 |                     continue
478 | 
479 |             # We need to handle offset/limit at this level since parse_file
480 |             # doesn't know about filters
481 |             if len(records) < offset:
482 |                 continue
483 | 
484 |             records.append(record)
485 | 
486 |             if len(records) >= limit + offset:
487 |                 break
488 | 
489 |         # Apply offset by slicing
490 |         if offset > 0 and len(records) > offset:
491 |             return records[offset : offset + limit]
492 |         else:
493 |             return records[:limit]
494 | 
495 |     def _match_filters(self, record: LogRecord, filters: Dict[str, Any]) -> bool:
496 |         """Check if a record matches the provided filters.
497 | 
498 |         Args:
499 |             record: The log record to check.
500 |             filters: Dictionary of filters to apply.
501 | 
502 |         Returns:
503 |             True if record matches all filters.
504 |         """
505 |         for key, value in filters.items():
506 |             record_value = record.data.get(key)
507 | 
508 |             if isinstance(value, list):
509 |                 if record_value not in value:
510 |                     return False
511 |             else:
512 |                 if record_value != value:
513 |                     return False
514 | 
515 |         return True
516 | 
517 |     def parse_content(self, source: LogSource, content: str) -> Iterator[LogRecord]:
518 |         """Parse ETL log records from content string.
519 | 
520 |         Note: ETL files are binary and cannot be parsed from string content.
521 | 
522 |         Args:
523 |             source: The log source information.
524 |             content: String content (not supported for ETL).
525 | 
526 |         Raises:
527 |             NotImplementedError: ETL files must be parsed from file.
528 |         """
529 |         raise NotImplementedError(
530 |             "ETL files are binary and must be parsed from file, not string content"
531 |         )
532 | 
533 |     def validate_file(self, file_path: Union[str, Path]) -> bool:
534 |         """Validate if the file can be parsed by this parser.
535 | 
536 |         Args:
537 |             file_path: Path to validate.
538 | 
539 |         Returns:
540 |             True if file appears to be an ETL file.
541 |         """
542 |         path = Path(file_path)
543 | 
544 |         # Check file extension
545 |         if not str(path).lower().endswith(".etl"):
546 |             return False
547 | 
548 |         # Check if file exists and is readable
549 |         if not path.exists() or not path.is_file():
550 |             return False
551 | 
552 |         # Check if we have tracerpt available
553 |         if not self.is_available():
554 |             return False
555 | 
556 |         return True
557 | 
558 |     @classmethod
559 |     def cleanup_cache_for_source(cls, source_path: str) -> None:
560 |         """Clean up cached CSV for a specific ETL source.
561 | 
562 |         Args:
563 |             source_path: Path to the ETL file whose cache should be removed.
564 |         """
565 |         logger.info(f"Cleaning up cache for ETL source: {source_path}")
566 | 
567 |         # Ensure cache is initialized
568 |         cls._init_cache_dir()
569 | 
570 |         # Remove cache entry (normalize path first)
571 |         normalized_path = str(Path(source_path).resolve()).lower()
572 |         if normalized_path in cls._cache_registry:
573 |             cache_info = cls._cache_registry[normalized_path]
574 |             csv_path = cache_info.get("csv_path")
575 | 
576 |             # Remove CSV file
577 |             if csv_path and os.path.exists(csv_path):
578 |                 try:
579 |                     os.remove(csv_path)
580 |                     logger.info(f"Removed cached CSV file: {csv_path}")
581 |                 except Exception as e:
582 |                     logger.error(f"Failed to remove cached CSV: {e}")
583 | 
584 |             # Remove from registry
585 |             del cls._cache_registry[normalized_path]
586 |             cls._save_cache_registry()
587 |             logger.info(f"Removed cache registry entry for: {source_path}")
588 | 
589 |     @classmethod
590 |     def cleanup_all_cache(cls) -> None:
591 |         """Clean up all cached CSV files."""
592 |         logger.info("Cleaning up all ETL cache")
593 | 
594 |         # Ensure cache is initialized
595 |         cls._init_cache_dir()
596 | 
597 |         # Remove all CSV files
598 |         for etl_path, cache_info in list(cls._cache_registry.items()):
599 |             cls.cleanup_cache_for_source(etl_path)
600 | 
601 |         # Clear registry
602 |         cls._cache_registry = {}
603 |         cls._save_cache_registry()
604 | 
```