This is page 3 of 4. Use http://codebase.md/sedwardstx/demomcp?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .gitignore
├── .mcp.json
├── check_server.py
├── CLAUDE.md
├── config
│ └── default.yml
├── docs
│ ├── api_reference.md
│ ├── demo-recording
│ │ └── MCPDemo.gif
│ ├── example-context-docs
│ │ ├── mcp-ai-agent-architecture.md
│ │ ├── mcp-ai-agent-dev-task.md
│ │ └── mcp-ai-agent-prd.md
│ └── getting_started.md
├── LICENSE
├── main_tcp.py
├── main.py
├── mcp_tcp_client.py
├── pyproject.toml
├── QUICK_START.md
├── README.md
├── scripts
│ └── test_server.py
├── setup.py
├── src
│ └── mcp_log_analyzer
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ └── server.py
│ ├── config
│ │ ├── __init__.py
│ │ └── settings.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── config.py
│ │ ├── models.py
│ │ └── state_manager.py
│ ├── mcp_server
│ │ ├── __init__.py
│ │ ├── models
│ │ │ ├── __init__.py
│ │ │ └── schemas.py
│ │ ├── prompts
│ │ │ ├── __init__.py
│ │ │ ├── linux_testing_prompt.py
│ │ │ ├── log_management_prompt.py
│ │ │ ├── mcp_assets_overview_prompt.py
│ │ │ ├── network_testing_prompt.py
│ │ │ ├── process_monitoring_prompt.py
│ │ │ └── windows_testing_prompt.py
│ │ ├── resources
│ │ │ ├── __init__.py
│ │ │ ├── linux_resources.py
│ │ │ ├── logs_resources.py
│ │ │ ├── network_resources.py
│ │ │ ├── process_resources.py
│ │ │ └── windows_resources.py
│ │ ├── server.py
│ │ └── tools
│ │ ├── __init__.py
│ │ ├── health_check_tools.py
│ │ ├── linux_test_tools.py
│ │ ├── log_management_tools.py
│ │ ├── network_test_tools.py
│ │ ├── process_test_tools.py
│ │ └── windows_test_tools.py
│ ├── parsers
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── csv_parser.py
│ │ ├── etl_cached_parser.py
│ │ ├── etl_large_file_parser.py
│ │ ├── etl_parser.py
│ │ ├── etl_windows_parser.py
│ │ └── evt_parser.py
│ └── tcp_proxy.py
├── TCP_PROXY_README.md
├── tcp_proxy.py
├── tcp_server.py
├── test_server.py
├── test_tcp_proxy.py
├── test_windows_setup.py
└── tests
├── test_base_parser.py
├── test_mcp_server.py
├── test_tool_utils.py
└── test_utils.py
```
# Files
--------------------------------------------------------------------------------
/src/mcp_log_analyzer/parsers/etl_large_file_parser.py:
--------------------------------------------------------------------------------
```python
1 | """Enhanced ETL parser for large files with streaming support."""
2 |
3 | import asyncio
4 | import os
5 | import platform
6 | import subprocess
7 | import tempfile
8 | from datetime import datetime
9 | from pathlib import Path
10 | from typing import Any, Dict, Iterator, List, Optional, Union
11 | from uuid import uuid4
12 | import csv
13 | import logging
14 |
15 | from ..core.models import LogRecord, LogSource, LogType
16 | from .base import BaseParser
17 |
18 | logger = logging.getLogger(__name__)
19 |
20 |
21 | class EtlLargeFileParser(BaseParser):
22 | """Enhanced ETL parser with support for large files using streaming."""
23 |
24 | def __init__(self, config: Optional[Dict[str, Any]] = None):
25 | """Initialize ETL large file parser.
26 |
27 | Args:
28 | config: Parser configuration.
29 | """
30 | super().__init__(config)
31 | self.chunk_size = self.config.get("chunk_size", 1000) # Records per chunk
32 | self.tracerpt_path = self._find_tracerpt()
33 | self.temp_dir = None
34 |
35 | def _find_tracerpt(self) -> Optional[str]:
36 | """Find tracerpt.exe on the system."""
37 | if platform.system() != "Windows":
38 | return None
39 |
40 | # Common locations for tracerpt.exe
41 | possible_paths = [
42 | r"C:\Windows\System32\tracerpt.exe",
43 | r"C:\Windows\SysWOW64\tracerpt.exe",
44 | ]
45 |
46 | for path in possible_paths:
47 | if os.path.exists(path):
48 | return path
49 |
50 | # Try to find it in PATH
51 | try:
52 | result = subprocess.run(
53 | ["where", "tracerpt.exe"],
54 | capture_output=True,
55 | text=True,
56 | check=False
57 | )
58 | if result.returncode == 0 and result.stdout.strip():
59 | return result.stdout.strip().split('\n')[0]
60 | except:
61 | pass
62 |
63 | return None
64 |
65 | def is_available(self) -> bool:
66 | """Check if ETL parsing is available."""
67 | return self.tracerpt_path is not None
68 |
69 | def parse_file_streaming(
70 | self, source: LogSource, file_path: Union[str, Path],
71 | limit: int = 1000, offset: int = 0
72 | ) -> Iterator[LogRecord]:
73 | """Parse ETL file with streaming to handle large files.
74 |
75 | Args:
76 | source: The log source information.
77 | file_path: Path to the ETL file.
78 | limit: Maximum number of records to return.
79 | offset: Number of records to skip.
80 |
81 | Yields:
82 | LogRecord objects parsed from the ETL file.
83 | """
84 | if not self.is_available():
85 | raise RuntimeError(
86 | "Windows ETL parsing is not available. tracerpt.exe not found."
87 | )
88 |
89 | path = Path(file_path)
90 | if not path.exists():
91 | raise FileNotFoundError(f"ETL file not found: {file_path}")
92 |
93 | # Get file size for logging
94 | file_size_mb = path.stat().st_size / (1024 * 1024)
95 | logger.info(f"Processing ETL file: {file_size_mb:.1f} MB")
96 |
97 | # Create a persistent temp directory if not exists
98 | if self.temp_dir is None:
99 | self.temp_dir = tempfile.mkdtemp(prefix="etl_parser_")
100 |
101 | output_file = os.path.join(self.temp_dir, f"etl_{uuid4()}.csv")
102 |
103 | try:
104 | # Use tracerpt with specific parameters for large files
105 | cmd = [
106 | self.tracerpt_path,
107 | str(path),
108 | "-o", output_file,
109 | "-of", "CSV",
110 | "-y", # Overwrite without prompting
111 | "-lr", # Less restrictive; attempt to process badly-formed events
112 | ]
113 |
114 | # For very large files, we might want to limit the time range
115 | if file_size_mb > 500: # If file is over 500MB
116 | logger.warning(f"Large ETL file ({file_size_mb:.1f} MB), processing may take time")
117 |
118 | # Run tracerpt as a subprocess
119 | logger.info("Starting tracerpt conversion...")
120 | process = subprocess.Popen(
121 | cmd,
122 | stdout=subprocess.PIPE,
123 | stderr=subprocess.PIPE,
124 | text=True
125 | )
126 |
127 | # Monitor tracerpt process
128 | import time
129 | start_time = time.time()
130 | max_wait_time = 600 # 10 minutes maximum
131 | check_interval = 5 # Check every 5 seconds
132 |
133 | logger.info(f"Waiting for tracerpt.exe to process {file_size_mb:.1f} MB file...")
134 |
135 | # Wait for initial processing
136 | time.sleep(2)
137 |
138 | # Check if process failed immediately
139 | if process.poll() is not None:
140 | stdout, stderr = process.communicate()
141 | if process.returncode != 0:
142 | raise RuntimeError(
143 | f"tracerpt failed immediately with code {process.returncode}: {stderr}"
144 | )
145 |
146 | # Start reading the CSV file as it's being written
147 | records_yielded = 0
148 | records_skipped = 0
149 | last_pos = 0
150 |
151 | # Wait for CSV file to be created with progress monitoring
152 | wait_time = 0
153 | last_log_time = start_time
154 |
155 | while not os.path.exists(output_file):
156 | current_time = time.time()
157 | elapsed = current_time - start_time
158 |
159 | # Log progress every 30 seconds
160 | if current_time - last_log_time >= 30:
161 | logger.info(f"tracerpt.exe still running... ({elapsed:.0f}s elapsed)")
162 | last_log_time = current_time
163 |
164 | # Check if we've exceeded max wait time
165 | if elapsed > max_wait_time:
166 | process.terminate()
167 | raise RuntimeError(f"tracerpt timed out after {max_wait_time} seconds")
168 |
169 | # Check if process ended
170 | if process.poll() is not None:
171 | stdout, stderr = process.communicate()
172 | if process.returncode != 0:
173 | raise RuntimeError(f"tracerpt failed with code {process.returncode}: {stderr}")
174 | # Process completed but no output file
175 | if not os.path.exists(output_file):
176 | raise RuntimeError("tracerpt completed but produced no output file")
177 | break
178 |
179 | time.sleep(check_interval)
180 |
181 | if os.path.exists(output_file):
182 | logger.info(f"CSV file created, starting to read records...")
183 | file_size = 0
184 | last_size_check = time.time()
185 |
186 | # Wait for file to have some content
187 | while os.path.getsize(output_file) == 0 and process.poll() is None:
188 | time.sleep(0.5)
189 |
190 | # Stream the CSV file as it's being written
191 | with open(output_file, 'r', encoding='utf-8', errors='ignore') as f:
192 | # Try to read header
193 | header_line = f.readline()
194 | if not header_line:
195 | # Wait a bit for header to be written
196 | time.sleep(1)
197 | f.seek(0)
198 | header_line = f.readline()
199 |
200 | if header_line:
201 | # Read file incrementally instead of all at once
202 | csv_reader = csv.DictReader(f, fieldnames=None)
203 | csv_reader.fieldnames = next(csv.reader([header_line]))
204 |
205 | for row_num, row in enumerate(csv_reader):
206 | # Log progress periodically
207 | if row_num > 0 and row_num % 1000 == 0:
208 | logger.info(f"Processed {row_num} records from CSV...")
209 |
210 | # Handle offset
211 | if records_skipped < offset:
212 | records_skipped += 1
213 | continue
214 |
215 | # Convert and yield record
216 | log_record = self._convert_csv_row(source, row)
217 | if log_record:
218 | yield log_record
219 | records_yielded += 1
220 |
221 | # Check limit
222 | if records_yielded >= limit:
223 | logger.info(f"Reached limit of {limit} records")
224 | # Terminate tracerpt if still running
225 | if process.poll() is None:
226 | logger.info("Terminating tracerpt as we have enough records")
227 | process.terminate()
228 | break
229 |
230 | # Check if process is still running periodically
231 | if row_num % 100 == 0 and process.poll() is not None:
232 | # Process ended, check if there was an error
233 | if process.returncode != 0:
234 | logger.warning(f"tracerpt ended with code {process.returncode}")
235 |
236 | # Wait for process to complete if still running
237 | if process.poll() is None:
238 | remaining_time = max_wait_time - (time.time() - start_time)
239 | if remaining_time > 0:
240 | logger.info(f"Waiting for tracerpt to complete (up to {remaining_time:.0f}s remaining)...")
241 | try:
242 | process.wait(timeout=remaining_time)
243 | logger.info(f"tracerpt completed successfully after {time.time() - start_time:.0f}s")
244 | except subprocess.TimeoutExpired:
245 | logger.warning(f"tracerpt timed out after {max_wait_time}s, terminating...")
246 | process.terminate()
247 | process.wait(timeout=5) # Give it 5 seconds to terminate
248 | else:
249 | logger.warning("Maximum wait time exceeded, terminating tracerpt...")
250 | process.terminate()
251 | process.wait(timeout=5)
252 |
253 | finally:
254 | # Clean up temp file
255 | if os.path.exists(output_file):
256 | try:
257 | os.remove(output_file)
258 | except:
259 | pass
260 |
261 | def _convert_csv_row(self, source: LogSource, row: Dict[str, str]) -> Optional[LogRecord]:
262 | """Convert a CSV row from tracerpt to a LogRecord.
263 |
264 | Args:
265 | source: The log source information.
266 | row: CSV row dictionary.
267 |
268 | Returns:
269 | LogRecord or None if conversion fails.
270 | """
271 | try:
272 | # Common tracerpt CSV columns
273 | record_data = {}
274 |
275 | # Map known columns
276 | field_mappings = {
277 | "Event Name": "event_name",
278 | "Type": "event_type",
279 | "Event ID": "event_id",
280 | "Version": "version",
281 | "Channel": "channel",
282 | "Level": "level",
283 | "Task": "task",
284 | "Opcode": "opcode",
285 | "Keyword": "keywords",
286 | "PID": "process_id",
287 | "TID": "thread_id",
288 | "Processor Number": "processor",
289 | "Provider Name": "provider_name",
290 | "Provider ID": "provider_id",
291 | "Message": "message",
292 | "Process Name": "process_name",
293 | }
294 |
295 | for csv_field, record_field in field_mappings.items():
296 | if csv_field in row and row[csv_field]:
297 | record_data[record_field] = row[csv_field]
298 |
299 | # Try to parse timestamp
300 | timestamp = None
301 | if "Clock-Time" in row and row["Clock-Time"]:
302 | try:
303 | # Handle different timestamp formats
304 | for fmt in ["%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S", "%m/%d/%Y %H:%M:%S.%f"]:
305 | try:
306 | timestamp = datetime.strptime(row["Clock-Time"], fmt)
307 | break
308 | except:
309 | continue
310 | except:
311 | pass
312 |
313 | # Include any additional fields
314 | for key, value in row.items():
315 | if key not in field_mappings and value:
316 | # Clean up field name
317 | clean_key = key.lower().replace(' ', '_').replace('-', '_')
318 | record_data[clean_key] = value
319 |
320 | return LogRecord(
321 | source_id=source.id,
322 | timestamp=timestamp,
323 | data=record_data,
324 | raw_content=None
325 | )
326 |
327 | except Exception as e:
328 | if self.config.get("verbose", False):
329 | logger.error(f"Failed to convert CSV row: {e}")
330 | return None
331 |
332 | def parse_file(
333 | self, source: LogSource, file_path: Union[str, Path]
334 | ) -> Iterator[LogRecord]:
335 | """Parse ETL log records from a file.
336 |
337 | Args:
338 | source: The log source information.
339 | file_path: Path to the ETL file.
340 |
341 | Yields:
342 | LogRecord objects parsed from the ETL file.
343 | """
344 | # Use streaming parser for all files
345 | yield from self.parse_file_streaming(source, file_path, limit=10000)
346 |
347 | def parse(
348 | self, path: str, filters: Optional[Dict[str, Any]] = None,
349 | start_time: Optional[datetime] = None, end_time: Optional[datetime] = None,
350 | limit: int = 1000, offset: int = 0
351 | ) -> List[LogRecord]:
352 | """Parse ETL file with filtering and pagination.
353 |
354 | Args:
355 | path: Path to the ETL file.
356 | filters: Optional filters to apply.
357 | start_time: Optional start time filter.
358 | end_time: Optional end time filter.
359 | limit: Maximum number of records to return.
360 | offset: Number of records to skip.
361 |
362 | Returns:
363 | List of LogRecord objects.
364 | """
365 | # Create a temporary log source for parsing
366 | temp_source = LogSource(
367 | name="temp_etl",
368 | type=LogType.ETL,
369 | path=path,
370 | metadata={}
371 | )
372 |
373 | records = []
374 |
375 | # Use streaming parser
376 | for record in self.parse_file_streaming(temp_source, path, limit=limit + offset):
377 | # Apply time filters
378 | if start_time and record.timestamp and record.timestamp < start_time:
379 | continue
380 | if end_time and record.timestamp and record.timestamp > end_time:
381 | continue
382 |
383 | # Apply custom filters
384 | if filters:
385 | if not self._match_filters(record, filters):
386 | continue
387 |
388 | records.append(record)
389 |
390 | if len(records) >= limit + offset:
391 | break
392 |
393 | # Apply offset by slicing
394 | if offset > 0:
395 | return records[offset:offset + limit]
396 | else:
397 | return records[:limit]
398 |
399 | def _match_filters(self, record: LogRecord, filters: Dict[str, Any]) -> bool:
400 | """Check if a record matches the provided filters.
401 |
402 | Args:
403 | record: The log record to check.
404 | filters: Dictionary of filters to apply.
405 |
406 | Returns:
407 | True if record matches all filters.
408 | """
409 | for key, value in filters.items():
410 | record_value = record.data.get(key)
411 |
412 | if isinstance(value, list):
413 | if record_value not in value:
414 | return False
415 | else:
416 | if record_value != value:
417 | return False
418 |
419 | return True
420 |
421 | def parse_content(self, source: LogSource, content: str) -> Iterator[LogRecord]:
422 | """Parse ETL log records from content string.
423 |
424 | Note: ETL files are binary and cannot be parsed from string content.
425 |
426 | Args:
427 | source: The log source information.
428 | content: String content (not supported for ETL).
429 |
430 | Raises:
431 | NotImplementedError: ETL files must be parsed from file.
432 | """
433 | raise NotImplementedError(
434 | "ETL files are binary and must be parsed from file, not string content"
435 | )
436 |
437 | def validate_file(self, file_path: Union[str, Path]) -> bool:
438 | """Validate if the file can be parsed by this parser.
439 |
440 | Args:
441 | file_path: Path to validate.
442 |
443 | Returns:
444 | True if file appears to be an ETL file.
445 | """
446 | path = Path(file_path)
447 |
448 | # Check file extension
449 | if not str(path).lower().endswith('.etl'):
450 | return False
451 |
452 | # Check if file exists and is readable
453 | if not path.exists() or not path.is_file():
454 | return False
455 |
456 | # Check if we have tracerpt available
457 | if not self.is_available():
458 | return False
459 |
460 | return True
461 |
462 | def __del__(self):
463 | """Cleanup temp directory on deletion."""
464 | if self.temp_dir and os.path.exists(self.temp_dir):
465 | try:
466 | import shutil
467 | shutil.rmtree(self.temp_dir)
468 | except:
469 | pass
```
--------------------------------------------------------------------------------
/src/mcp_log_analyzer/mcp_server/prompts/linux_testing_prompt.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Linux system testing and diagnostics prompts for the MCP Log Analyzer server.
3 | """
4 |
5 | from typing import Optional
6 | from mcp.server import FastMCP
7 |
8 |
9 | def register_linux_testing_prompts(mcp: FastMCP):
10 | """Register all Linux testing prompts."""
11 |
12 | @mcp.prompt(
13 | title="Linux Log Access Test",
14 | description="Guide for testing access to Linux log files and systemd journal"
15 | )
16 | async def linux_log_access_test() -> str:
17 | """
18 | Test access to common Linux log files and systemd journal.
19 | """
20 | return """
21 | # 🔍 Linux Log Access Test
22 |
23 | ## Tool: test_linux_log_access
24 |
25 | ### Purpose
26 | Tests access to common log files in /var/log/ and systemd journal accessibility.
27 |
28 | ### What It Tests
29 | - **Traditional log files**: /var/log/syslog, messages, auth.log, kern.log
30 | - **systemd journal**: Journal accessibility and permissions
31 | - **System commands**: ss, netstat, systemctl availability
32 | - **File permissions**: Read access and file sizes
33 |
34 | ### Usage
35 | ```
36 | Tool: test_linux_log_access
37 | ```
38 |
39 | ### Interpreting Results
40 | - **Accessible files**: Can be queried and analyzed
41 | - **Permission denied**: Need elevated privileges or group membership
42 | - **File not found**: May indicate different distribution or log rotation
43 | - **Journal access**: Check systemd-journal group membership
44 |
45 | ### Next Steps
46 | - Add user to systemd-journal group for journal access
47 | - Use sudo for system log access if needed
48 | - Check distribution-specific log locations
49 | """
50 |
51 | @mcp.prompt(
52 | title="systemd Journal Query Guide",
53 | description="How to query and filter systemd journal logs"
54 | )
55 | async def systemd_journal_query(
56 | priority: Optional[str] = None
57 | ) -> str:
58 | """
59 | Guide for querying systemd journal.
60 |
61 | Args:
62 | priority: Optional priority level filter (err, warning, info, etc.)
63 | """
64 |
65 | base_guide = """
66 | # 📊 systemd Journal Query Guide
67 |
68 | ## Tool: query_systemd_journal
69 |
70 | ### Purpose
71 | Query systemd journal with specific filters for targeted log analysis.
72 |
73 | ### Parameters
74 | - **service_name**: Specific service to query (e.g., "nginx", "sshd")
75 | - **priority**: Log level (emerg, alert, crit, err, warning, notice, info, debug)
76 | - **time_duration**: Time range ("30m", "2h", "1d", etc.)
77 | - **max_lines**: Result limit (default: 100)
78 |
79 | ### Usage Examples
80 | ```
81 | # Service errors
82 | Tool: query_systemd_journal
83 | Parameters: service_name="nginx", priority="err", time_duration="1h"
84 |
85 | # Recent critical events
86 | Tool: query_systemd_journal
87 | Parameters: priority="crit", time_duration="24h"
88 |
89 | # SSH authentication logs
90 | Tool: query_systemd_journal
91 | Parameters: service_name="sshd", time_duration="6h", max_lines=200
92 | ```
93 | """
94 |
95 | priority_guide = {
96 | "err": """
97 | ### Error Priority Focus
98 | - **Use for**: Service failures, application errors
99 | - **Common patterns**: "Failed to start", "Main process exited"
100 | - **Time range**: Start with 1-6 hours, expand if needed
101 | """,
102 | "warning": """
103 | ### Warning Priority Focus
104 | - **Use for**: Performance issues, non-critical failures
105 | - **Common patterns**: Resource warnings, configuration issues
106 | - **Time range**: 6-24 hours typically sufficient
107 | """,
108 | "info": """
109 | ### Info Priority Focus
110 | - **Use for**: Normal operations, service status
111 | - **Common patterns**: "Started", "Stopped", "Reloading"
112 | - **Time range**: Keep short (1-2 hours) due to volume
113 | """
114 | }
115 |
116 | if priority and priority.lower() in priority_guide:
117 | base_guide += priority_guide[priority.lower()]
118 |
119 | base_guide += """
120 | ### Priority Levels Reference
121 | - **emerg (0)**: System unusable
122 | - **alert (1)**: Immediate action required
123 | - **crit (2)**: Critical conditions
124 | - **err (3)**: Error conditions
125 | - **warning (4)**: Warning conditions
126 | - **notice (5)**: Normal but significant
127 | - **info (6)**: Informational messages
128 | - **debug (7)**: Debug-level messages
129 | """
130 |
131 | return base_guide
132 |
133 | @mcp.prompt(
134 | title="Linux Service Analysis",
135 | description="Guide for analyzing Linux service status and health"
136 | )
137 | async def linux_service_analysis() -> str:
138 | """
139 | Analyze Linux services using systemd tools.
140 | """
141 | return """
142 | # 🛠️ Linux Service Analysis Guide
143 |
144 | ## Tool: analyze_linux_services
145 |
146 | ### Purpose
147 | Lists and analyzes systemd services, identifying failed services and recent issues.
148 |
149 | ### Parameters
150 | - **service_pattern**: Filter services by name pattern (optional)
151 | - **include_failed**: Include failed services in analysis (default: true)
152 |
153 | ### Usage Examples
154 | ```
155 | # Check all failed services
156 | Tool: analyze_linux_services
157 | Parameters: include_failed=true
158 |
159 | # Analyze web services
160 | Tool: analyze_linux_services
161 | Parameters: service_pattern="nginx|apache"
162 |
163 | # Check database services
164 | Tool: analyze_linux_services
165 | Parameters: service_pattern="mysql|postgres|mongodb"
166 | ```
167 |
168 | ### What It Provides
169 | ✅ Active and failed service lists
170 | ✅ Recent logs for failed services
171 | ✅ Service health assessment
172 | ✅ Service dependencies and issues
173 |
174 | ### Common Service States
175 | - **active (running)**: Service operating normally
176 | - **active (exited)**: One-shot service completed
177 | - **inactive (dead)**: Service not running
178 | - **failed**: Service failed to start or crashed
179 |
180 | ### Troubleshooting Failed Services
181 | 1. Check service logs for error details
182 | 2. Verify configuration files
183 | 3. Check service dependencies
184 | 4. Review system resources
185 | 5. Test service restart capability
186 | """
187 |
188 | @mcp.prompt(
189 | title="Linux System Overview",
190 | description="Get comprehensive Linux system health information"
191 | )
192 | async def linux_system_overview() -> str:
193 | """
194 | Guide for getting Linux system overview.
195 | """
196 | return """
197 | # 🖥️ Linux System Overview
198 |
199 | ## Tool: get_linux_system_overview
200 |
201 | ### Purpose
202 | Provides comprehensive system information and health status for Linux systems.
203 |
204 | ### Usage
205 | ```
206 | Tool: get_linux_system_overview
207 | ```
208 |
209 | ### Information Provided
210 | - **System Info**: Hostname, uptime, kernel version
211 | - **Distribution**: OS name and version
212 | - **Resources**: CPU, memory, disk usage
213 | - **Critical Errors**: Recent error logs
214 | - **Service Status**: Failed service count
215 |
216 | ### Health Indicators
217 | ✅ **Healthy**: No failed services, low resource usage
218 | ⚠️ **Warning**: 1-2 failed services, moderate resource usage
219 | ❌ **Critical**: Multiple failures, high resource usage
220 |
221 | ### Follow-up Actions
222 | Based on the overview:
223 | 1. Investigate failed services with analyze_linux_services
224 | 2. Check specific errors with query_systemd_journal
225 | 3. Monitor resource usage trends
226 | 4. Plan maintenance if needed
227 | """
228 |
229 | @mcp.prompt(
230 | title="Linux Boot Troubleshooting",
231 | description="Diagnose Linux boot and startup issues"
232 | )
233 | async def linux_boot_troubleshooting() -> str:
234 | """
235 | Guide for troubleshooting Linux boot issues.
236 | """
237 | return """
238 | # 🚀 Linux Boot Troubleshooting
239 |
240 | ## Diagnosing Boot Issues
241 |
242 | ### Step 1: Check Boot Messages
243 | ```
244 | Tool: query_systemd_journal
245 | Parameters: time_duration="2h", max_lines=500
246 | ```
247 | Look for systemd and kernel messages during boot.
248 |
249 | ### Step 2: Identify Failed Services
250 | ```
251 | Tool: analyze_linux_services
252 | Parameters: include_failed=true
253 | ```
254 | Services that fail during boot often indicate issues.
255 |
256 | ### Step 3: Check Critical Errors
257 | ```
258 | Tool: query_systemd_journal
259 | Parameters: priority="err", time_duration="1h"
260 | ```
261 | Focus on error messages from boot time.
262 |
263 | ### Common Boot Problems
264 |
265 | #### Service Dependency Failures
266 | - **Symptom**: "Dependency failed for..."
267 | - **Check**: Service order and requirements
268 | - **Fix**: Resolve dependent service issues first
269 |
270 | #### Hardware Initialization
271 | - **Symptom**: Kernel errors, driver failures
272 | - **Check**: dmesg output, kernel logs
273 | - **Fix**: Update drivers, check hardware
274 |
275 | #### Filesystem Issues
276 | - **Symptom**: Mount failures, read-only root
277 | - **Check**: fstab entries, disk errors
278 | - **Fix**: fsck, correct mount options
279 |
280 | ### Boot Performance
281 | - Use `systemd-analyze` for boot timing
282 | - Check for slow services
283 | - Optimize service startup order
284 | - Disable unnecessary boot services
285 | """
286 |
287 | @mcp.prompt(
288 | title="Linux Security Monitoring",
289 | description="Monitor authentication and security events on Linux"
290 | )
291 | async def linux_security_monitoring() -> str:
292 | """
293 | Guide for monitoring Linux security events.
294 | """
295 | return """
296 | # 🔒 Linux Security Monitoring
297 |
298 | ## Authentication Monitoring
299 |
300 | ### Step 1: SSH Login Activity
301 | ```
302 | Tool: query_systemd_journal
303 | Parameters: service_name="sshd", time_duration="24h"
304 | ```
305 |
306 | ### Step 2: Failed Authentication
307 | ```
308 | Tool: query_systemd_journal
309 | Parameters: priority="warning", time_duration="6h"
310 | ```
311 | Look for "Failed password" or "authentication failure".
312 |
313 | ### Step 3: Sudo Usage
314 | ```
315 | Tool: query_systemd_journal
316 | Parameters: service_name="sudo", time_duration="24h"
317 | ```
318 |
319 | ## Security Patterns to Watch
320 |
321 | ### Failed Login Attempts
322 | - Multiple failures from same IP
323 | - Attempts on non-existent users
324 | - Rapid retry patterns
325 | - Unusual login times
326 |
327 | ### Privilege Escalation
328 | - sudo usage by new users
329 | - Unexpected root processes
330 | - Service account activities
331 | - Permission changes
332 |
333 | ### System Modifications
334 | - Package installations
335 | - Configuration changes
336 | - New user accounts
337 | - Service modifications
338 |
339 | ## Security Event Examples
340 | - **"Failed password for"**: SSH authentication failure
341 | - **"Accepted publickey"**: Successful SSH key auth
342 | - **"session opened for user root"**: Root access
343 | - **"COMMAND="**: Sudo command execution
344 | """
345 |
346 | @mcp.prompt(
347 | title="Linux Performance Issues",
348 | description="Diagnose Linux system performance problems"
349 | )
350 | async def linux_performance_issues() -> str:
351 | """
352 | Guide for diagnosing Linux performance issues.
353 | """
354 | return """
355 | # 📊 Linux Performance Issues
356 |
357 | ## Investigating Performance Problems
358 |
359 | ### Step 1: Check System Warnings
360 | ```
361 | Tool: query_systemd_journal
362 | Parameters: priority="warning", time_duration="6h"
363 | ```
364 | Look for resource-related warnings.
365 |
366 | ### Step 2: Memory Issues
367 | Search for OOM (Out of Memory) events:
368 | ```
369 | Tool: query_systemd_journal
370 | Parameters: time_duration="24h", max_lines=200
371 | ```
372 | Look for "Out of memory" or "killed process".
373 |
374 | ### Step 3: Disk Space Problems
375 | ```
376 | Tool: query_systemd_journal
377 | Parameters: priority="err", time_duration="12h"
378 | ```
379 | Search for "No space left on device".
380 |
381 | ### Step 4: Service Performance
382 | ```
383 | Tool: analyze_linux_services
384 | Parameters: include_failed=true
385 | ```
386 | Check for services with performance issues.
387 |
388 | ## Common Performance Issues
389 |
390 | ### High Memory Usage
391 | - **Symptoms**: OOM killer activations
392 | - **Investigation**: Check process memory usage
393 | - **Solutions**: Add swap, optimize applications
394 |
395 | ### Disk I/O Bottlenecks
396 | - **Symptoms**: Slow response, high wait times
397 | - **Investigation**: iostat, iotop results
398 | - **Solutions**: Optimize I/O patterns, upgrade storage
399 |
400 | ### CPU Saturation
401 | - **Symptoms**: High load average, slow processing
402 | - **Investigation**: Check CPU-intensive processes
403 | - **Solutions**: Optimize code, add CPU resources
404 |
405 | ### Network Issues
406 | - **Symptoms**: Connection timeouts, packet loss
407 | - **Investigation**: Network service logs
408 | - **Solutions**: Check bandwidth, optimize network
409 | """
410 |
411 | @mcp.prompt(
412 | title="Linux Service Management",
413 | description="Managing and troubleshooting specific Linux services"
414 | )
415 | async def linux_service_management(
416 | service_type: Optional[str] = None
417 | ) -> str:
418 | """
419 | Guide for managing specific Linux services.
420 |
421 | Args:
422 | service_type: Type of service (web, database, system, etc.)
423 | """
424 |
425 | base_guide = """
426 | # 🛠️ Linux Service Management
427 |
428 | ## Managing systemd Services
429 |
430 | ### Check Service Status
431 | ```
432 | Tool: analyze_linux_services
433 | Parameters: service_pattern="service-name"
434 | ```
435 |
436 | ### View Service Logs
437 | ```
438 | Tool: query_systemd_journal
439 | Parameters: service_name="service-name", time_duration="1h"
440 | ```
441 |
442 | ### Common Service Operations
443 | - **Start**: systemctl start service-name
444 | - **Stop**: systemctl stop service-name
445 | - **Restart**: systemctl restart service-name
446 | - **Enable**: systemctl enable service-name
447 | - **Status**: systemctl status service-name
448 | """
449 |
450 | service_guides = {
451 | "web": """
452 | ## Web Server Services
453 |
454 | ### Nginx
455 | ```
456 | Tool: query_systemd_journal
457 | Parameters: service_name="nginx", priority="err"
458 | ```
459 | Common issues: Port conflicts, configuration errors
460 |
461 | ### Apache
462 | ```
463 | Tool: query_systemd_journal
464 | Parameters: service_name="apache2", priority="err"
465 | ```
466 | Common issues: Module conflicts, .htaccess errors
467 | """,
468 | "database": """
469 | ## Database Services
470 |
471 | ### MySQL/MariaDB
472 | ```
473 | Tool: query_systemd_journal
474 | Parameters: service_name="mysql", priority="err"
475 | ```
476 | Common issues: Connection limits, disk space
477 |
478 | ### PostgreSQL
479 | ```
480 | Tool: query_systemd_journal
481 | Parameters: service_name="postgresql", priority="err"
482 | ```
483 | Common issues: Shared memory, connection pooling
484 | """,
485 | "system": """
486 | ## System Services
487 |
488 | ### SSH
489 | ```
490 | Tool: query_systemd_journal
491 | Parameters: service_name="sshd", time_duration="6h"
492 | ```
493 | Monitor: Authentication attempts, configuration
494 |
495 | ### Cron
496 | ```
497 | Tool: query_systemd_journal
498 | Parameters: service_name="cron", time_duration="24h"
499 | ```
500 | Monitor: Job execution, failures
501 | """
502 | }
503 |
504 | if service_type and service_type.lower() in service_guides:
505 | base_guide += service_guides[service_type.lower()]
506 |
507 | base_guide += """
508 | ## Service Troubleshooting Steps
509 | 1. Check service status and recent logs
510 | 2. Verify configuration files
511 | 3. Check service dependencies
512 | 4. Review resource availability
513 | 5. Test service functionality
514 | 6. Monitor after restart
515 | """
516 |
517 | return base_guide
518 |
519 | @mcp.prompt(
520 | title="Linux Log Patterns Reference",
521 | description="Common Linux log patterns and their meanings"
522 | )
523 | async def linux_log_patterns() -> str:
524 | """
525 | Reference guide for common Linux log patterns.
526 | """
527 | return """
528 | # 📖 Linux Log Patterns Reference
529 |
530 | ## Service Management Patterns
531 |
532 | ### Successful Operations
533 | - **"Started [Service]"**: Service startup success
534 | - **"Reloaded [Service]"**: Configuration reload
535 | - **"Listening on"**: Service accepting connections
536 | - **"Reached target"**: systemd target achieved
537 |
538 | ### Service Failures
539 | - **"Failed to start"**: Startup failure
540 | - **"Main process exited"**: Service crash
541 | - **"Dependency failed"**: Required service unavailable
542 | - **"Timed out"**: Service startup timeout
543 | - **"code=exited, status=1"**: Exit with error
544 |
545 | ## Security Patterns
546 |
547 | ### Authentication
548 | - **"Failed password for"**: Login failure
549 | - **"Accepted publickey"**: SSH key success
550 | - **"session opened"**: User session start
551 | - **"session closed"**: User session end
552 | - **"COMMAND="**: Sudo command execution
553 |
554 | ### Security Events
555 | - **"authentication failure"**: PAM auth fail
556 | - **"Connection closed by"**: Dropped connection
557 | - **"Invalid user"**: Non-existent user login
558 | - **"Connection reset"**: Network interruption
559 |
560 | ## System Events
561 |
562 | ### Boot/Shutdown
563 | - **"Booting Linux"**: Kernel boot start
564 | - **"Started Session"**: User session start
565 | - **"Reached target Multi-User"**: Boot complete
566 | - **"Stopped target"**: Shutdown initiated
567 |
568 | ### Resource Issues
569 | - **"Out of memory"**: OOM killer activated
570 | - **"No space left"**: Disk full
571 | - **"Too many open files"**: File descriptor limit
572 | - **"Cannot allocate memory"**: Memory exhaustion
573 |
574 | ## Network Patterns
575 |
576 | ### Connection Events
577 | - **"link is up"**: Network interface active
578 | - **"link is down"**: Network interface inactive
579 | - **"DHCPREQUEST"**: IP address request
580 | - **"DHCPACK"**: IP address assigned
581 |
582 | ### Network Errors
583 | - **"Name or service not known"**: DNS failure
584 | - **"Connection refused"**: Service not listening
585 | - **"Network is unreachable"**: Routing issue
586 | - **"Connection timed out"**: No response
587 |
588 | ## Performance Indicators
589 |
590 | ### Warning Signs
591 | - **"took too long"**: Slow operation
592 | - **"degraded"**: Performance issue
593 | - **"high load"**: System overload
594 | - **"throttling"**: Rate limiting active
595 |
596 | ### Critical Issues
597 | - **"segfault"**: Memory violation
598 | - **"core dumped"**: Process crash
599 | - **"kernel panic"**: System crash
600 | - **"hung task"**: Process stuck
601 | """
602 |
603 | @mcp.prompt(
604 | title="Linux Distribution Differences",
605 | description="Guide for log locations across different Linux distributions"
606 | )
607 | async def linux_distribution_guide() -> str:
608 | """
609 | Guide for handling distribution-specific differences.
610 | """
611 | return """
612 | # 🐧 Linux Distribution Differences
613 |
614 | ## Log File Locations
615 |
616 | ### Debian/Ubuntu
617 | - **System logs**: /var/log/syslog
618 | - **Auth logs**: /var/log/auth.log
619 | - **Kernel**: /var/log/kern.log
620 | - **Package manager**: /var/log/dpkg.log
621 |
622 | ### RHEL/CentOS/Fedora
623 | - **System logs**: /var/log/messages
624 | - **Auth logs**: /var/log/secure
625 | - **Kernel**: /var/log/messages
626 | - **Package manager**: /var/log/yum.log
627 |
628 | ### Arch Linux
629 | - **Primary logging**: systemd journal only
630 | - **Persistent logs**: /var/log/journal/
631 | - **Package manager**: /var/log/pacman.log
632 |
633 | ### SUSE
634 | - **System logs**: /var/log/messages
635 | - **Auth logs**: /var/log/messages
636 | - **Package manager**: /var/log/zypper.log
637 |
638 | ## systemd Adoption
639 |
640 | ### Full systemd
641 | - Ubuntu 16.04+
642 | - Debian 8+
643 | - RHEL/CentOS 7+
644 | - Fedora 15+
645 | - Arch Linux
646 | - openSUSE
647 |
648 | ### SysV Init or Other
649 | - Older distributions
650 | - Some embedded systems
651 | - Specialized distributions
652 |
653 | ## Best Practices
654 | 1. Check for systemd first (systemctl available)
655 | 2. Fall back to traditional logs if needed
656 | 3. Use distribution detection for paths
657 | 4. Handle both logging systems when possible
658 | """
659 |
660 | @mcp.prompt(
661 | title="Linux Emergency Diagnostics",
662 | description="Quick diagnostics for Linux system emergencies"
663 | )
664 | async def linux_emergency_diagnostics() -> str:
665 | """
666 | Emergency diagnostic procedures for critical Linux issues.
667 | """
668 | return """
669 | # 🚨 Linux Emergency Diagnostics
670 |
671 | ## Critical System Failure
672 |
673 | ### Phase 1: Initial Assessment (< 2 minutes)
674 | ```
675 | Tool: get_linux_system_overview
676 | ```
677 | Get immediate system status.
678 |
679 | ### Phase 2: Service Status (2-5 minutes)
680 | ```
681 | Tool: analyze_linux_services
682 | Parameters: include_failed=true
683 | ```
684 | Identify all failed services.
685 |
686 | ### Phase 3: Recent Errors (5-10 minutes)
687 | ```
688 | Tool: query_systemd_journal
689 | Parameters: priority="err", time_duration="1h"
690 | ```
691 | Find recent critical errors.
692 |
693 | ## Emergency Scenarios
694 |
695 | ### System Won't Boot
696 | 1. Check journal from rescue mode
697 | 2. Look for kernel panic messages
698 | 3. Verify filesystem integrity
699 | 4. Check hardware initialization
700 |
701 | ### All Services Failing
702 | 1. Check system resources (disk, memory)
703 | 2. Verify systemd functionality
704 | 3. Check for dependency loops
705 | 4. Review recent system changes
706 |
707 | ### Performance Crisis
708 | 1. Check for OOM killer activity
709 | 2. Look for disk full errors
710 | 3. Monitor CPU/memory usage
711 | 4. Identify resource hogs
712 |
713 | ### Security Breach
714 | 1. Check authentication logs immediately
715 | 2. Look for privilege escalations
716 | 3. Monitor network connections
717 | 4. Review system modifications
718 |
719 | ## Recovery Actions
720 |
721 | ### Service Recovery
722 | - Restart failed services systematically
723 | - Check service dependencies first
724 | - Monitor logs during restart
725 | - Verify functionality after start
726 |
727 | ### Resource Recovery
728 | - Free disk space (logs, temp files)
729 | - Kill memory-intensive processes
730 | - Clear system caches if needed
731 | - Add swap space temporarily
732 |
733 | ### Access Recovery
734 | - Reset service configurations
735 | - Restore from known-good backups
736 | - Check file permissions
737 | - Verify network connectivity
738 |
739 | ## Critical Commands Reference
740 | - **Journal since boot**: journalctl -b
741 | - **Follow live logs**: journalctl -f
742 | - **System status**: systemctl status
743 | - **Failed services**: systemctl --failed
744 | - **Resource usage**: top, htop, free, df
745 | """
```
--------------------------------------------------------------------------------
/src/mcp_log_analyzer/mcp_server/tools/windows_test_tools.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Windows Event Log testing and diagnostic MCP tools.
3 | """
4 |
5 | from typing import Any, Dict
6 |
7 | from mcp.server import FastMCP
8 | from pydantic import BaseModel, Field
9 |
10 |
11 | class WindowsEventLogTestRequest(BaseModel):
12 | """Request model for testing Windows Event Log access."""
13 |
14 | log_name: str = Field(
15 | "System", description="Event log name to test (System, Application, Security)"
16 | )
17 | max_entries: int = Field(10, description="Maximum number of entries to fetch")
18 |
19 |
20 | class WindowsEventLogQueryRequest(BaseModel):
21 | """Request model for querying Windows Event Logs."""
22 |
23 | log_name: str = Field("System", description="Event log name to query")
24 | event_id: int = Field(None, description="Specific Event ID to filter by")
25 | level: str = Field(None, description="Event level (Error, Warning, Information)")
26 | time_duration: str = Field(
27 | "1h", description="Time duration (e.g., '30m', '2h', '1d')"
28 | )
29 | max_entries: int = Field(50, description="Maximum number of entries to return")
30 |
31 |
32 | def register_windows_test_tools(mcp: FastMCP):
33 | """Register all Windows testing tools with the MCP server."""
34 |
35 | @mcp.tool()
36 | async def test_windows_event_log_access() -> Dict[str, Any]:
37 | """
38 | Test Windows Event Log access and permissions.
39 |
40 | This tool checks if the system can access Windows Event Logs
41 | and provides diagnostic information about available logs.
42 | """
43 | import platform
44 |
45 | if platform.system() != "Windows":
46 | return {
47 | "status": "unavailable",
48 | "message": "Windows Event Logs are only available on Windows systems",
49 | "platform": platform.system(),
50 | }
51 |
52 | try:
53 | import win32evtlog
54 |
55 | # Test access to common event logs
56 | test_results = {}
57 | common_logs = ["System", "Application", "Security"]
58 |
59 | for log_name in common_logs:
60 | try:
61 | hand = win32evtlog.OpenEventLog(None, log_name)
62 | win32evtlog.CloseEventLog(hand)
63 | test_results[log_name] = {"accessible": True, "error": None}
64 | except Exception as e:
65 | test_results[log_name] = {"accessible": False, "error": str(e)}
66 |
67 | return {
68 | "status": "available",
69 | "message": "Windows Event Log access test completed",
70 | "log_access": test_results,
71 | "pywin32_available": True,
72 | }
73 |
74 | except ImportError:
75 | return {
76 | "status": "missing_dependencies",
77 | "message": "pywin32 package is required for Windows Event Log access",
78 | "pywin32_available": False,
79 | }
80 | except Exception as e:
81 | return {
82 | "status": "error",
83 | "message": f"Error testing Windows Event Log access: {str(e)}",
84 | }
85 |
86 | @mcp.tool()
87 | async def get_windows_event_log_info(
88 | request: WindowsEventLogTestRequest,
89 | ) -> Dict[str, Any]:
90 | """
91 | Get detailed information about a specific Windows Event Log.
92 |
93 | This tool provides metadata and recent entries from the specified
94 | Windows Event Log for diagnostic purposes.
95 | """
96 | import platform
97 |
98 | if platform.system() != "Windows":
99 | return {"error": "This tool is only available on Windows systems"}
100 |
101 | try:
102 | import win32evtlog
103 | import win32evtlogutil
104 | from win32con import EVENTLOG_BACKWARDS_READ, EVENTLOG_SEQUENTIAL_READ
105 |
106 | hand = win32evtlog.OpenEventLog(None, request.log_name)
107 |
108 | # Get log information
109 | try:
110 | num_records = win32evtlog.GetNumberOfEventLogRecords(hand)
111 | oldest_record = win32evtlog.GetOldestEventLogRecord(hand)
112 | info = (oldest_record, num_records)
113 | except:
114 | info = None
115 |
116 | # Get recent entries
117 | flags = EVENTLOG_BACKWARDS_READ | EVENTLOG_SEQUENTIAL_READ
118 |
119 | entries = []
120 | count = 0
121 |
122 | while count < request.max_entries:
123 | events = win32evtlog.ReadEventLog(hand, flags, 0)
124 | if not events:
125 | break # No more events to read
126 |
127 | for event in events:
128 | if count >= request.max_entries:
129 | break
130 |
131 | try:
132 | message = win32evtlogutil.SafeFormatMessage(
133 | event, request.log_name
134 | )
135 | except:
136 | message = "(Unable to format message)"
137 |
138 | entries.append(
139 | {
140 | "event_id": event.EventID
141 | & 0xFFFFFFFF, # Convert to unsigned
142 | "time_generated": str(event.TimeGenerated),
143 | "source_name": event.SourceName,
144 | "event_type": event.EventType,
145 | "message_preview": message[:200] if message else "",
146 | }
147 | )
148 | count += 1
149 |
150 | win32evtlog.CloseEventLog(hand)
151 |
152 | return {
153 | "log_name": request.log_name,
154 | "log_info": {
155 | "oldest_record_number": info[0] if info else "Unknown",
156 | "total_records": info[1] if info else "Unknown",
157 | },
158 | "recent_entries": entries,
159 | "entries_retrieved": len(entries),
160 | "max_requested": request.max_entries,
161 | }
162 |
163 | except ImportError:
164 | return {"error": "pywin32 package is required for Windows Event Log access"}
165 | except Exception as e:
166 | return {"error": f"Error accessing Windows Event Log: {str(e)}"}
167 |
168 | @mcp.tool()
169 | async def query_windows_events_by_criteria(
170 | request: WindowsEventLogQueryRequest,
171 | ) -> Dict[str, Any]:
172 | """
173 | Query Windows Event Logs with specific criteria.
174 |
175 | This tool allows filtering Windows Event Logs by Event ID,
176 | level, and time range for targeted analysis.
177 | """
178 | import platform
179 |
180 | if platform.system() != "Windows":
181 | return {"error": "This tool is only available on Windows systems"}
182 |
183 | try:
184 | import win32evtlog
185 | import win32evtlogutil
186 | from win32con import EVENTLOG_BACKWARDS_READ, EVENTLOG_SEQUENTIAL_READ
187 | import xml.etree.ElementTree as ET
188 | from datetime import datetime
189 |
190 | from ..server import parse_time_param
191 |
192 | # Parse time duration
193 | if request.time_duration:
194 | start_time = parse_time_param(request.time_duration)
195 | else:
196 | start_time = None
197 |
198 | matching_events = []
199 | count = 0
200 | total_checked = 0
201 | level_map = {1: "Error", 2: "Warning", 4: "Information"}
202 |
203 | # Check if this is a custom Application and Services log
204 | if "/" in request.log_name or "\\" in request.log_name:
205 | # Use newer EvtQuery API for custom logs
206 | try:
207 | query_flags = (
208 | win32evtlog.EvtQueryChannelPath
209 | | win32evtlog.EvtQueryReverseDirection
210 | )
211 |
212 | # Build XPath query
213 | conditions = []
214 | if start_time:
215 | start_ms = int(start_time.timestamp() * 1000)
216 | conditions.append(f"TimeCreated[@SystemTime >= '{start_ms}']")
217 | if request.event_id:
218 | conditions.append(f"EventID={request.event_id}")
219 | if request.level:
220 | level_num = {"error": 2, "warning": 3, "information": 4}.get(
221 | request.level.lower(), 0
222 | )
223 | if level_num:
224 | conditions.append(f"Level={level_num}")
225 |
226 | xpath_query = "*"
227 | if conditions:
228 | xpath_query = f"*[System[{' and '.join(conditions)}]]"
229 |
230 | query_handle = win32evtlog.EvtQuery(
231 | request.log_name, query_flags, xpath_query
232 | )
233 |
234 | while count < request.max_entries:
235 | events = win32evtlog.EvtNext(query_handle, 10)
236 | if not events:
237 | break
238 |
239 | for event in events:
240 | total_checked += 1
241 |
242 | # Render event as XML
243 | xml_content = win32evtlog.EvtRender(
244 | event, win32evtlog.EvtRenderEventXml
245 | )
246 |
247 | # Parse XML to extract event data
248 | root = ET.fromstring(xml_content)
249 | system = root.find(".//System")
250 |
251 | event_id = (
252 | int(system.find("EventID").text)
253 | if system.find("EventID") is not None
254 | else 0
255 | )
256 | event_id = event_id & 0xFFFFFFFF
257 |
258 | provider = system.find("Provider")
259 | source_name = (
260 | provider.get("Name", "Unknown")
261 | if provider is not None
262 | else "Unknown"
263 | )
264 |
265 | time_created = system.find("TimeCreated")
266 | if time_created is not None:
267 | time_str = time_created.get(
268 | "SystemTime", str(datetime.now())
269 | )
270 | else:
271 | time_str = str(datetime.now())
272 |
273 | level = system.find("Level")
274 | event_type = int(level.text) if level is not None else 4
275 |
276 | # Extract message
277 | message = ""
278 | event_data = root.find(".//EventData")
279 | if event_data is not None:
280 | data_items = []
281 | for data in event_data:
282 | name = data.get("Name", "")
283 | value = data.text or ""
284 | if name:
285 | data_items.append(f"{name}: {value}")
286 | message = "; ".join(data_items)
287 |
288 | matching_events.append(
289 | {
290 | "event_id": event_id,
291 | "time_generated": time_str,
292 | "source_name": source_name,
293 | "event_type": event_type,
294 | "level": level_map.get(event_type, "Unknown"),
295 | "message": message[:500] if message else "",
296 | }
297 | )
298 |
299 | count += 1
300 | win32evtlog.EvtClose(event)
301 |
302 | if count >= request.max_entries:
303 | break
304 |
305 | win32evtlog.EvtClose(query_handle)
306 |
307 | except Exception as e:
308 | return {"error": f"Error querying custom event log: {str(e)}"}
309 | else:
310 | # Use legacy API for standard logs
311 | hand = win32evtlog.OpenEventLog(None, request.log_name)
312 | flags = EVENTLOG_BACKWARDS_READ | EVENTLOG_SEQUENTIAL_READ
313 |
314 | # Continue reading until we have enough matching events or no more events
315 | while count < request.max_entries:
316 | events = win32evtlog.ReadEventLog(hand, flags, 0)
317 | if not events:
318 | break # No more events to read
319 |
320 | for event in events:
321 | total_checked += 1
322 |
323 | # Check time filter
324 | if start_time and event.TimeGenerated < start_time:
325 | continue
326 |
327 | # Check Event ID filter
328 | # Handle both signed and unsigned Event ID comparisons
329 | if request.event_id:
330 | # Convert to unsigned 32-bit for comparison
331 | event_id_unsigned = event.EventID & 0xFFFFFFFF
332 | if (
333 | event_id_unsigned != request.event_id
334 | and event.EventID != request.event_id
335 | ):
336 | continue
337 |
338 | # Check level filter (simplified mapping)
339 | if request.level:
340 | event_level = level_map.get(event.EventType, "Unknown")
341 | if event_level.lower() != request.level.lower():
342 | continue
343 |
344 | # Event matches all criteria
345 | try:
346 | message = win32evtlogutil.SafeFormatMessage(
347 | event, request.log_name
348 | )
349 | except:
350 | message = "(Unable to format message)"
351 |
352 | matching_events.append(
353 | {
354 | "event_id": event.EventID
355 | & 0xFFFFFFFF, # Convert to unsigned
356 | "time_generated": str(event.TimeGenerated),
357 | "source_name": event.SourceName,
358 | "event_type": event.EventType,
359 | "level": level_map.get(event.EventType, "Unknown"),
360 | "message": message[:500] if message else "",
361 | }
362 | )
363 |
364 | count += 1
365 | if count >= request.max_entries:
366 | break
367 |
368 | win32evtlog.CloseEventLog(hand)
369 |
370 | return {
371 | "log_name": request.log_name,
372 | "query_criteria": {
373 | "event_id": request.event_id,
374 | "level": request.level,
375 | "time_duration": request.time_duration,
376 | "start_time": str(start_time) if start_time else None,
377 | },
378 | "matching_events": matching_events,
379 | "total_matches": len(matching_events),
380 | "total_events_checked": total_checked,
381 | "max_requested": request.max_entries,
382 | }
383 |
384 | except ImportError:
385 | return {"error": "pywin32 package is required for Windows Event Log access"}
386 | except Exception as e:
387 | return {"error": f"Error querying Windows Event Logs: {str(e)}"}
388 |
389 | @mcp.tool()
390 | async def get_windows_system_health() -> Dict[str, Any]:
391 | """
392 | Get Windows system health overview from Event Logs.
393 |
394 | This tool analyzes recent System and Application event logs
395 | to provide a quick health assessment of the Windows system.
396 | """
397 | import platform
398 |
399 | if platform.system() != "Windows":
400 | return {"error": "This tool is only available on Windows systems"}
401 |
402 | try:
403 | from datetime import datetime, timedelta
404 |
405 | import win32evtlog
406 | import win32evtlogutil
407 | from win32con import EVENTLOG_BACKWARDS_READ, EVENTLOG_SEQUENTIAL_READ
408 |
409 | # Check last 24 hours
410 | start_time = datetime.now() - timedelta(hours=24)
411 |
412 | health_summary = {"errors": 0, "warnings": 0, "critical_events": []}
413 |
414 | for log_name in ["System", "Application"]:
415 | try:
416 | hand = win32evtlog.OpenEventLog(None, log_name)
417 | flags = EVENTLOG_BACKWARDS_READ | EVENTLOG_SEQUENTIAL_READ
418 |
419 | log_errors = 0
420 | log_warnings = 0
421 | done_reading = False
422 |
423 | while not done_reading:
424 | events = win32evtlog.ReadEventLog(hand, flags, 0)
425 | if not events:
426 | break # No more events to read
427 |
428 | for event in events:
429 | if event.TimeGenerated < start_time:
430 | done_reading = True
431 | break
432 |
433 | if event.EventType == 1: # Error
434 | log_errors += 1
435 | if log_errors <= 5: # Capture first 5 errors
436 | try:
437 | message = win32evtlogutil.SafeFormatMessage(
438 | event, log_name
439 | )
440 | except:
441 | message = "Unable to format message"
442 |
443 | health_summary["critical_events"].append(
444 | {
445 | "log": log_name,
446 | "type": "Error",
447 | "event_id": event.EventID
448 | & 0xFFFFFFFF, # Convert to unsigned
449 | "source": event.SourceName,
450 | "time": str(event.TimeGenerated),
451 | "message": message[:200],
452 | }
453 | )
454 |
455 | elif event.EventType == 2: # Warning
456 | log_warnings += 1
457 |
458 | health_summary["errors"] += log_errors
459 | health_summary["warnings"] += log_warnings
460 |
461 | win32evtlog.CloseEventLog(hand)
462 |
463 | except Exception as e:
464 | health_summary[f"{log_name}_error"] = str(e)
465 |
466 | # Determine overall health status
467 | if health_summary["errors"] == 0 and health_summary["warnings"] < 5:
468 | status = "healthy"
469 | elif health_summary["errors"] < 3 and health_summary["warnings"] < 20:
470 | status = "fair"
471 | else:
472 | status = "concerning"
473 |
474 | return {
475 | "time_period": "Last 24 hours",
476 | "overall_status": status,
477 | "summary": {
478 | "total_errors": health_summary["errors"],
479 | "total_warnings": health_summary["warnings"],
480 | },
481 | "critical_events": health_summary["critical_events"],
482 | "timestamp": str(datetime.now()),
483 | }
484 |
485 | except ImportError:
486 | return {"error": "pywin32 package is required for Windows Event Log access"}
487 | except Exception as e:
488 | return {"error": f"Error analyzing Windows system health: {str(e)}"}
489 |
```
--------------------------------------------------------------------------------
/src/mcp_log_analyzer/mcp_server/prompts/process_monitoring_prompt.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Process monitoring and system resource prompts for the MCP Log Analyzer server.
3 | """
4 |
5 | from typing import Optional
6 | from mcp.server import FastMCP
7 |
8 |
9 | def register_process_monitoring_prompts(mcp: FastMCP):
10 | """Register all process monitoring prompts."""
11 |
12 | @mcp.prompt(
13 | title="Test System Resources",
14 | description="Guide for testing system resource monitoring capabilities"
15 | )
16 | async def test_system_resources() -> str:
17 | """
18 | Test system resource monitoring access and capabilities.
19 | """
20 | return """
21 | # 🔍 Test System Resources
22 |
23 | ## Tool: test_system_resources_access
24 |
25 | ### Purpose
26 | Tests CPU, memory, disk, network, and process monitoring capabilities.
27 |
28 | ### What It Tests
29 | - **CPU**: Core count, usage, frequency
30 | - **Memory**: Total, available, usage percentage
31 | - **Disk**: Space usage, I/O counters
32 | - **Network**: Traffic statistics, connections
33 | - **Processes**: Enumeration and basic info
34 |
35 | ### Usage
36 | ```
37 | Tool: test_system_resources_access
38 | ```
39 |
40 | ### Interpreting Results
41 | - **psutil version**: Library version for compatibility
42 | - **Accessible resources**: What can be monitored
43 | - **Current values**: Baseline measurements
44 | - **Platform notes**: OS-specific capabilities
45 |
46 | ### Next Steps
47 | - Use specialized monitoring tools for details
48 | - Set up regular monitoring if needed
49 | - Note any access limitations
50 | - Plan resource tracking strategy
51 | """
52 |
53 | @mcp.prompt(
54 | title="System Performance Analysis",
55 | description="Guide for analyzing current system performance"
56 | )
57 | async def system_performance_guide() -> str:
58 | """
59 | Analyze system performance and resource usage.
60 | """
61 | return """
62 | # 📊 System Performance Analysis
63 |
64 | ## Tool: analyze_system_performance
65 |
66 | ### Purpose
67 | Provides comprehensive analysis of system performance metrics.
68 |
69 | ### Parameters
70 | - **include_network**: Include network statistics (default: true)
71 | - **include_disk**: Include disk I/O statistics (default: true)
72 | - **sample_interval**: Sampling interval in seconds (default: 1.0)
73 |
74 | ### Usage Examples
75 | ```
76 | # Full system analysis
77 | Tool: analyze_system_performance
78 |
79 | # CPU and memory only (faster)
80 | Tool: analyze_system_performance
81 | Parameters: include_network=false, include_disk=false
82 |
83 | # Extended sampling
84 | Tool: analyze_system_performance
85 | Parameters: sample_interval=5.0
86 | ```
87 |
88 | ### Performance Metrics
89 |
90 | #### CPU Metrics
91 | - **Usage percent**: Current utilization
92 | - **Core count**: Physical and logical cores
93 | - **Frequency**: Current/min/max MHz
94 | - **Load average**: 1/5/15 minute (Unix)
95 |
96 | #### Memory Metrics
97 | - **Virtual memory**: Physical RAM usage
98 | - **Swap memory**: Virtual memory usage
99 | - **Available**: Memory for new processes
100 | - **Percent used**: Overall utilization
101 |
102 | #### Disk Metrics
103 | - **Usage**: Total/used/free space
104 | - **I/O counters**: Read/write operations
105 | - **Throughput**: Bytes read/written
106 |
107 | #### Network Metrics
108 | - **Traffic**: Bytes sent/received
109 | - **Packets**: Count sent/received
110 | - **Connections**: Active connection count
111 |
112 | ### Health Status
113 | - **Good**: Normal resource usage
114 | - **Fair**: Elevated but manageable
115 | - **Concerning**: Action needed
116 | """
117 |
118 | @mcp.prompt(
119 | title="Find Resource-Intensive Processes",
120 | description="Guide for identifying high CPU and memory consumers"
121 | )
122 | async def resource_intensive_processes(
123 | sort_by: Optional[str] = None
124 | ) -> str:
125 | """
126 | Find processes consuming significant resources.
127 |
128 | Args:
129 | sort_by: Sort criteria (cpu, memory, pid)
130 | """
131 |
132 | base_guide = """
133 | # 🔍 Find Resource-Intensive Processes
134 |
135 | ## Tool: find_resource_intensive_processes
136 |
137 | ### Purpose
138 | Identifies processes with high CPU or memory usage for troubleshooting.
139 |
140 | ### Parameters
141 | - **process_name**: Filter by name (optional)
142 | - **min_cpu_percent**: CPU threshold (default: 0.0)
143 | - **min_memory_percent**: Memory threshold (default: 0.0)
144 | - **max_results**: Result limit (default: 20)
145 | - **sort_by**: Sort order (cpu/memory/pid)
146 |
147 | ### Usage Examples
148 | ```
149 | # Top CPU consumers
150 | Tool: find_resource_intensive_processes
151 | Parameters: min_cpu_percent=10, sort_by="cpu"
152 |
153 | # Memory hogs (>5% memory)
154 | Tool: find_resource_intensive_processes
155 | Parameters: min_memory_percent=5, sort_by="memory"
156 |
157 | # Specific process analysis
158 | Tool: find_resource_intensive_processes
159 | Parameters: process_name="python", sort_by="memory"
160 | ```
161 | """
162 |
163 | sort_guides = {
164 | "cpu": """
165 | ### Sorting by CPU
166 | Best for identifying:
167 | - Runaway processes
168 | - CPU-intensive tasks
169 | - Performance bottlenecks
170 | - Hung applications
171 |
172 | Look for:
173 | - Consistently high CPU usage
174 | - Unexpected CPU consumers
175 | - Multiple instances of same process
176 | """,
177 | "memory": """
178 | ### Sorting by Memory
179 | Best for identifying:
180 | - Memory leaks
181 | - Resource-heavy applications
182 | - Cache/buffer usage
183 | - Potential OOM risks
184 |
185 | Look for:
186 | - Growing memory usage
187 | - Disproportionate memory use
188 | - Zombie processes
189 | """,
190 | "pid": """
191 | ### Sorting by PID
192 | Best for:
193 | - Chronological process view
194 | - Parent-child relationships
195 | - System vs user processes
196 | - Process lifecycle analysis
197 |
198 | Lower PIDs are typically system processes.
199 | """
200 | }
201 |
202 | if sort_by and sort_by.lower() in sort_guides:
203 | base_guide += sort_guides[sort_by.lower()]
204 |
205 | base_guide += """
206 | ### Process Information Provided
207 | - **PID**: Process identifier
208 | - **Name**: Process name
209 | - **CPU %**: Current CPU usage
210 | - **Memory %**: Memory usage percentage
211 | - **Memory MB**: Actual memory in MB
212 | - **Status**: Running/sleeping/zombie
213 | - **Command**: Full command line
214 | - **Create time**: Process start time
215 | """
216 |
217 | return base_guide
218 |
219 | @mcp.prompt(
220 | title="Monitor Process Health",
221 | description="Guide for monitoring specific process health and status"
222 | )
223 | async def monitor_process_health() -> str:
224 | """
225 | Monitor health of a specific process.
226 | """
227 | return """
228 | # 🎯 Monitor Process Health
229 |
230 | ## Tool: monitor_process_health
231 |
232 | ### Purpose
233 | Monitors specific process health, resource usage, and potential issues.
234 |
235 | ### Parameters
236 | - **process_name**: Name of process to monitor (required)
237 |
238 | ### Usage Examples
239 | ```
240 | # Monitor web server
241 | Tool: monitor_process_health
242 | Parameters: process_name="nginx"
243 |
244 | # Monitor database
245 | Tool: monitor_process_health
246 | Parameters: process_name="postgres"
247 |
248 | # Monitor custom application
249 | Tool: monitor_process_health
250 | Parameters: process_name="myapp"
251 | ```
252 |
253 | ### Health Indicators
254 |
255 | #### Healthy Process
256 | ✅ Stable CPU usage
257 | ✅ Consistent memory usage
258 | ✅ Normal connection count
259 | ✅ Appropriate age for service
260 | ✅ Status: running
261 |
262 | #### Warning Signs
263 | ⚠️ High CPU spikes
264 | ⚠️ Growing memory usage
265 | ⚠️ Many connections
266 | ⚠️ Recent restarts
267 | ⚠️ Status: sleeping (if unexpected)
268 |
269 | #### Critical Issues
270 | ❌ Excessive CPU usage
271 | ❌ Memory leak indicators
272 | ❌ Connection exhaustion
273 | ❌ Very recent start (crash?)
274 | ❌ Status: zombie
275 |
276 | ### Multiple Instances
277 | Tool handles multiple processes with same name:
278 | - Reports each instance separately
279 | - Shows total resource usage
280 | - Identifies newest/oldest instances
281 |
282 | ### Follow-up Actions
283 | 1. Check process logs for errors
284 | 2. Monitor trends over time
285 | 3. Compare with baseline values
286 | 4. Investigate recent restarts
287 | 5. Check system resources
288 | """
289 |
290 | @mcp.prompt(
291 | title="System Health Summary",
292 | description="Get comprehensive system health overview"
293 | )
294 | async def system_health_summary() -> str:
295 | """
296 | Overall system health assessment guide.
297 | """
298 | return """
299 | # 📊 System Health Summary
300 |
301 | ## Tool: get_system_health_summary
302 |
303 | ### Purpose
304 | Provides overall system health assessment with resource usage and top consumers.
305 |
306 | ### Usage
307 | ```
308 | Tool: get_system_health_summary
309 | ```
310 |
311 | ### Health Score Interpretation
312 | - **80-100**: Excellent - System running smoothly
313 | - **60-79**: Good - Minor resource usage
314 | - **40-59**: Fair - Moderate load, monitor closely
315 | - **20-39**: Poor - High resource usage
316 | - **0-19**: Critical - Immediate action needed
317 |
318 | ### Information Provided
319 |
320 | #### Resource Summary
321 | - CPU usage percentage
322 | - Memory usage percentage
323 | - Disk usage percentage
324 | - Total process count
325 |
326 | #### Top Consumers
327 | - Top 5 CPU processes
328 | - Top 5 memory processes
329 | - Resource usage details
330 | - Process command lines
331 |
332 | #### Health Assessment
333 | - Overall status (excellent/good/fair/poor/critical)
334 | - Identified issues
335 | - Recommended actions
336 | - Timestamp
337 |
338 | ### Common Issues Detected
339 | - High CPU usage (>80%)
340 | - High memory usage (>90%)
341 | - Excessive disk usage (>95%)
342 | - Too many processes (>1000)
343 | - Resource exhaustion risks
344 |
345 | ### Using the Summary
346 | 1. Quick daily health checks
347 | 2. Baseline establishment
348 | 3. Trend monitoring
349 | 4. Capacity planning
350 | 5. Problem identification
351 | """
352 |
353 | @mcp.prompt(
354 | title="CPU Troubleshooting",
355 | description="Diagnose and resolve high CPU usage issues"
356 | )
357 | async def cpu_troubleshooting() -> str:
358 | """
359 | Guide for troubleshooting CPU-related issues.
360 | """
361 | return """
362 | # 🔥 CPU Troubleshooting Guide
363 |
364 | ## Diagnosing High CPU Usage
365 |
366 | ### Step 1: Identify CPU Consumers
367 | ```
368 | Tool: find_resource_intensive_processes
369 | Parameters: min_cpu_percent=20, sort_by="cpu"
370 | ```
371 |
372 | ### Step 2: Analyze System Performance
373 | ```
374 | Tool: analyze_system_performance
375 | Parameters: include_disk=false, include_network=false
376 | ```
377 |
378 | ### Step 3: Monitor Specific Process
379 | ```
380 | Tool: monitor_process_health
381 | Parameters: process_name="high-cpu-process"
382 | ```
383 |
384 | ## Common CPU Issues
385 |
386 | ### Runaway Process
387 | **Symptoms**: Single process at 100% CPU
388 | **Causes**:
389 | - Infinite loops
390 | - Busy waiting
391 | - Algorithm issues
392 |
393 | **Solutions**:
394 | - Restart the process
395 | - Debug application code
396 | - Apply CPU limits
397 |
398 | ### System Overload
399 | **Symptoms**: Multiple processes high CPU
400 | **Causes**:
401 | - Too many concurrent tasks
402 | - Insufficient CPU cores
403 | - Background jobs
404 |
405 | **Solutions**:
406 | - Reduce concurrent load
407 | - Schedule tasks off-peak
408 | - Upgrade CPU resources
409 |
410 | ### CPU Thrashing
411 | **Symptoms**: Rapid CPU spikes
412 | **Causes**:
413 | - Context switching
414 | - Memory pressure
415 | - I/O wait
416 |
417 | **Solutions**:
418 | - Reduce process count
419 | - Increase memory
420 | - Optimize I/O operations
421 |
422 | ## Investigation Checklist
423 | - [ ] Check top CPU consumers
424 | - [ ] Review process command lines
425 | - [ ] Monitor CPU trends
426 | - [ ] Check load average
427 | - [ ] Verify cooling/throttling
428 | - [ ] Review recent changes
429 | """
430 |
431 | @mcp.prompt(
432 | title="Memory Troubleshooting",
433 | description="Diagnose and resolve memory usage issues"
434 | )
435 | async def memory_troubleshooting() -> str:
436 | """
437 | Guide for troubleshooting memory-related issues.
438 | """
439 | return """
440 | # 💾 Memory Troubleshooting Guide
441 |
442 | ## Diagnosing High Memory Usage
443 |
444 | ### Step 1: Find Memory Consumers
445 | ```
446 | Tool: find_resource_intensive_processes
447 | Parameters: min_memory_percent=5, sort_by="memory"
448 | ```
449 |
450 | ### Step 2: Check System Memory
451 | ```
452 | Tool: analyze_system_performance
453 | Parameters: include_disk=false, include_network=false
454 | ```
455 |
456 | ### Step 3: Monitor for Leaks
457 | ```
458 | Tool: monitor_process_health
459 | Parameters: process_name="suspected-process"
460 | ```
461 | Watch for growing memory over time.
462 |
463 | ## Common Memory Issues
464 |
465 | ### Memory Leaks
466 | **Symptoms**:
467 | - Gradual memory increase
468 | - Never releases memory
469 | - Eventually crashes
470 |
471 | **Detection**:
472 | - Monitor process over hours/days
473 | - Check memory vs process age
474 | - Look for linear growth
475 |
476 | **Solutions**:
477 | - Restart process periodically
478 | - Fix application code
479 | - Implement memory limits
480 |
481 | ### Memory Exhaustion
482 | **Symptoms**:
483 | - System using 95%+ memory
484 | - Heavy swap usage
485 | - System slowdown
486 |
487 | **Solutions**:
488 | - Kill unnecessary processes
489 | - Add more RAM
490 | - Configure swap space
491 | - Optimize applications
492 |
493 | ### Cache/Buffer Usage
494 | **Symptoms**:
495 | - High memory usage
496 | - But available memory exists
497 | - System performs well
498 |
499 | **Note**: This is normal Linux behavior.
500 | Cache/buffers are released when needed.
501 |
502 | ## Memory Analysis Steps
503 | 1. Identify top consumers
504 | 2. Check for growth patterns
505 | 3. Monitor swap usage
506 | 4. Review OOM killer logs
507 | 5. Calculate actual free memory
508 | 6. Plan capacity upgrades
509 | """
510 |
511 | @mcp.prompt(
512 | title="Process Monitoring Best Practices",
513 | description="Best practices for effective process monitoring"
514 | )
515 | async def process_monitoring_practices() -> str:
516 | """
517 | Best practices guide for process monitoring.
518 | """
519 | return """
520 | # 📋 Process Monitoring Best Practices
521 |
522 | ## Establishing Baselines
523 |
524 | ### Initial Baseline
525 | 1. Run system health summary
526 | ```
527 | Tool: get_system_health_summary
528 | ```
529 | 2. Document normal values:
530 | - Typical CPU usage
531 | - Average memory usage
532 | - Normal process count
533 | - Standard disk usage
534 |
535 | ### Regular Monitoring
536 | - **Daily**: Quick health check
537 | - **Weekly**: Trend analysis
538 | - **Monthly**: Capacity review
539 |
540 | ## Monitoring Strategy
541 |
542 | ### Proactive Monitoring
543 | Set thresholds for alerts:
544 | - CPU > 80% for 5 minutes
545 | - Memory > 90%
546 | - Disk > 85%
547 | - Critical process not running
548 |
549 | ### Resource Tracking
550 | ```
551 | # Track specific application
552 | Tool: monitor_process_health
553 | Parameters: process_name="critical-app"
554 |
555 | # Find resource spikes
556 | Tool: find_resource_intensive_processes
557 | Parameters: min_cpu_percent=50
558 | ```
559 |
560 | ### Trend Analysis
561 | 1. Collect metrics over time
562 | 2. Identify patterns:
563 | - Peak usage hours
564 | - Growth trends
565 | - Recurring issues
566 | 3. Plan capacity accordingly
567 |
568 | ## Common Monitoring Tasks
569 |
570 | ### Daily Health Check
571 | ```
572 | 1. Tool: get_system_health_summary
573 | 2. Review any issues flagged
574 | 3. Check critical processes
575 | 4. Note unusual patterns
576 | ```
577 |
578 | ### Performance Investigation
579 | ```
580 | 1. Tool: analyze_system_performance
581 | 2. Tool: find_resource_intensive_processes
582 | 3. Deep dive on problem processes
583 | 4. Check system logs
584 | ```
585 |
586 | ### Capacity Planning
587 | ```
588 | 1. Track resource trends
589 | 2. Project growth rates
590 | 3. Identify bottlenecks
591 | 4. Plan upgrades
592 | ```
593 |
594 | ## Key Metrics to Track
595 | - **CPU**: Usage %, load average
596 | - **Memory**: Used %, swap usage
597 | - **Disk**: Space %, I/O rates
598 | - **Network**: Bandwidth, connections
599 | - **Processes**: Count, top consumers
600 |
601 | ## Documentation
602 | Maintain records of:
603 | - Normal baselines
604 | - Known issues
605 | - Growth trends
606 | - Remediation steps
607 | - Capacity plans
608 | """
609 |
610 | @mcp.prompt(
611 | title="Emergency Performance Response",
612 | description="Quick response guide for performance emergencies"
613 | )
614 | async def emergency_performance() -> str:
615 | """
616 | Emergency response for critical performance issues.
617 | """
618 | return """
619 | # 🚨 Emergency Performance Response
620 |
621 | ## System Unresponsive
622 |
623 | ### Immediate Actions (< 2 minutes)
624 | ```
625 | Tool: get_system_health_summary
626 | ```
627 | Quick assessment of system state.
628 |
629 | ### Find Culprits (2-5 minutes)
630 | ```
631 | Tool: find_resource_intensive_processes
632 | Parameters: min_cpu_percent=50, min_memory_percent=20
633 | ```
634 |
635 | ### Kill Problem Processes
636 | 1. Identify non-critical high users
637 | 2. Terminate gracefully if possible
638 | 3. Force kill if necessary
639 | 4. Monitor system response
640 |
641 | ## High CPU Emergency
642 |
643 | ### Quick Fix
644 | ```
645 | # Find top CPU users
646 | Tool: find_resource_intensive_processes
647 | Parameters: min_cpu_percent=30, sort_by="cpu"
648 | ```
649 |
650 | Actions:
651 | 1. Kill non-essential processes
652 | 2. Nice/renice CPU hogs
653 | 3. Disable background tasks
654 | 4. Check for runaway processes
655 |
656 | ## Memory Emergency
657 |
658 | ### Quick Fix
659 | ```
660 | # Find memory hogs
661 | Tool: find_resource_intensive_processes
662 | Parameters: min_memory_percent=10, sort_by="memory"
663 | ```
664 |
665 | Actions:
666 | 1. Kill largest non-critical process
667 | 2. Clear caches if possible
668 | 3. Add emergency swap
669 | 4. Restart memory-leaking services
670 |
671 | ## Disk Full Emergency
672 |
673 | ### Quick Actions
674 | 1. Find large files/directories
675 | 2. Clear logs and temp files
676 | 3. Remove old backups
677 | 4. Empty trash/recycle bin
678 | 5. Compress large files
679 |
680 | ## Network Saturation
681 |
682 | ### Quick Fix
683 | ```
684 | Tool: analyze_system_performance
685 | Parameters: include_network=true
686 | ```
687 |
688 | Actions:
689 | 1. Identify bandwidth hogs
690 | 2. Rate limit if possible
691 | 3. Block non-essential traffic
692 | 4. Check for DDoS/attacks
693 |
694 | ## Recovery Checklist
695 | - [ ] System responsive again?
696 | - [ ] Critical services running?
697 | - [ ] Resources below thresholds?
698 | - [ ] Root cause identified?
699 | - [ ] Temporary fixes documented?
700 | - [ ] Permanent fix planned?
701 | - [ ] Monitoring increased?
702 | """
703 |
704 | @mcp.prompt(
705 | title="Process Lifecycle Management",
706 | description="Understanding and managing process lifecycles"
707 | )
708 | async def process_lifecycle() -> str:
709 | """
710 | Guide for understanding process states and lifecycle.
711 | """
712 | return """
713 | # 🔄 Process Lifecycle Management
714 |
715 | ## Process States
716 |
717 | ### Running
718 | - Actively executing on CPU
719 | - Normal state for active processes
720 | - Should match expected workload
721 |
722 | ### Sleeping
723 | - Waiting for event/resource
724 | - Normal for idle processes
725 | - Check if unexpectedly sleeping
726 |
727 | ### Zombie
728 | - Process terminated
729 | - Parent hasn't collected status
730 | - Indicates parent process issue
731 | - Can't be killed directly
732 |
733 | ### Stopped
734 | - Suspended (SIGSTOP)
735 | - Debugging or job control
736 | - Can be resumed
737 |
738 | ## Monitoring Process Age
739 |
740 | ### Check Process Start Time
741 | ```
742 | Tool: monitor_process_health
743 | Parameters: process_name="service-name"
744 | ```
745 |
746 | ### Age Indicators
747 | - **Very new** (< 1 min): Just started or restarted
748 | - **Recent** (< 1 hour): May indicate crash/restart
749 | - **Stable** (> 1 day): Normal for services
750 | - **Very old**: Check for memory leaks
751 |
752 | ## Process Relationships
753 |
754 | ### Parent-Child
755 | - Parent spawns children
756 | - Children inherit resources
757 | - Orphans adopted by init
758 | - Zombies need parent action
759 |
760 | ### Process Groups
761 | - Related processes
762 | - Share signals
763 | - Common for services
764 | - Monitor as group
765 |
766 | ## Lifecycle Management
767 |
768 | ### Graceful Restart
769 | 1. Monitor current state
770 | 2. Send termination signal
771 | 3. Wait for cleanup
772 | 4. Start new instance
773 | 5. Verify functionality
774 |
775 | ### Resource Limits
776 | - CPU time limits
777 | - Memory limits
778 | - File descriptor limits
779 | - Process count limits
780 |
781 | ### Automatic Management
782 | - Systemd restart policies
783 | - Process supervisors
784 | - Health check scripts
785 | - Resource governors
786 |
787 | ## Common Issues
788 |
789 | ### Frequent Restarts
790 | - Check logs for crashes
791 | - Review resource limits
792 | - Verify dependencies
793 | - Check configuration
794 |
795 | ### Long-Running Processes
796 | - Monitor for memory leaks
797 | - Check file descriptor leaks
798 | - Verify log rotation
799 | - Plan periodic restarts
800 | """
801 |
802 | @mcp.prompt(
803 | title="System Resource Thresholds",
804 | description="Guidelines for setting resource monitoring thresholds"
805 | )
806 | async def resource_thresholds() -> str:
807 | """
808 | Guide for setting appropriate resource thresholds.
809 | """
810 | return """
811 | # 📏 System Resource Thresholds
812 |
813 | ## CPU Thresholds
814 |
815 | ### Usage Levels
816 | - **0-40%**: Low usage, optimal
817 | - **40-60%**: Moderate, normal
818 | - **60-80%**: High, monitor closely
819 | - **80-95%**: Very high, investigate
820 | - **95-100%**: Critical, take action
821 |
822 | ### Load Average (Unix/Linux)
823 | - **< 1.0 per core**: Good
824 | - **1-2 per core**: Busy
825 | - **> 2 per core**: Overloaded
826 |
827 | Example: 4-core system
828 | - Good: < 4.0
829 | - Busy: 4-8
830 | - Overloaded: > 8
831 |
832 | ## Memory Thresholds
833 |
834 | ### RAM Usage
835 | - **0-60%**: Healthy
836 | - **60-75%**: Normal
837 | - **75-85%**: Monitor
838 | - **85-95%**: Warning
839 | - **95-100%**: Critical
840 |
841 | ### Swap Usage
842 | - **0-20%**: Normal
843 | - **20-50%**: Monitor
844 | - **50-80%**: Performance impact
845 | - **> 80%**: Critical
846 |
847 | ## Disk Thresholds
848 |
849 | ### Space Usage
850 | - **0-70%**: Safe
851 | - **70-80%**: Plan cleanup
852 | - **80-90%**: Warning
853 | - **90-95%**: Critical
854 | - **> 95%**: Emergency
855 |
856 | ### I/O Metrics
857 | - Response time > 20ms: Investigate
858 | - Queue depth > 10: Bottleneck
859 | - Utilization > 80%: Saturated
860 |
861 | ## Process Thresholds
862 |
863 | ### Process Count
864 | - **< 200**: Light load
865 | - **200-500**: Normal
866 | - **500-1000**: Heavy
867 | - **> 1000**: Very heavy
868 |
869 | ### Per-Process Limits
870 | - CPU > 50%: Investigate
871 | - Memory > 10%: Monitor
872 | - Connections > 1000: Check
873 | - Threads > 500: Review
874 |
875 | ## Network Thresholds
876 |
877 | ### Bandwidth
878 | - **< 50%**: Good
879 | - **50-70%**: Normal
880 | - **70-85%**: High
881 | - **> 85%**: Saturated
882 |
883 | ### Connections
884 | - Depends on service type
885 | - Web server: 1000s normal
886 | - Database: 100s typical
887 | - Monitor for growth
888 |
889 | ## Setting Custom Thresholds
890 |
891 | Consider:
892 | 1. Baseline measurements
893 | 2. Application requirements
894 | 3. Peak vs average load
895 | 4. Business criticality
896 | 5. Hardware capabilities
897 |
898 | Adjust based on:
899 | - Historical data
900 | - Growth projections
901 | - SLA requirements
902 | - User experience
903 | """
```
--------------------------------------------------------------------------------
/src/mcp_log_analyzer/mcp_server/prompts/windows_testing_prompt.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Windows testing and diagnostics prompts for the MCP Log Analyzer server.
3 | """
4 |
5 | from typing import Optional
6 | from mcp.server import FastMCP
7 |
8 |
9 | def register_windows_testing_prompts(mcp: FastMCP):
10 | """Register all Windows testing prompts."""
11 |
12 | @mcp.prompt(
13 | title="Test Windows Event Log Access",
14 | description="Guide for testing Windows Event Log access and permissions"
15 | )
16 | async def test_event_log_access() -> str:
17 | """
18 | Test Windows Event Log access capabilities.
19 | """
20 | return """
21 | # 🔍 Test Windows Event Log Access
22 |
23 | ## Tool: test_windows_event_log_access
24 |
25 | ### Purpose
26 | Tests access to Windows Event Logs and verifies permissions.
27 |
28 | ### What It Tests
29 | - **System Log**: Hardware, drivers, system services
30 | - **Application Log**: Application crashes and errors
31 | - **Security Log**: Authentication and audit events
32 | - **pywin32**: Package availability check
33 |
34 | ### Usage
35 | ```
36 | Tool: test_windows_event_log_access
37 | ```
38 |
39 | ### Interpreting Results
40 | - **Accessible**: Can read and query the log
41 | - **Access Denied**: Need administrator privileges
42 | - **pywin32 available**: Required for Event Log access
43 |
44 | ### Common Issues
45 | - Security log requires admin rights
46 | - pywin32 not installed: `pip install pywin32`
47 | - Need to run as administrator
48 |
49 | ### Next Steps
50 | - Install pywin32 if missing
51 | - Run with admin rights for Security log
52 | - Register accessible logs as sources
53 | """
54 |
55 | @mcp.prompt(
56 | title="Windows Event Log Information",
57 | description="Get detailed information about specific Windows Event Logs"
58 | )
59 | async def event_log_info_guide() -> str:
60 | """
61 | Guide for retrieving Windows Event Log information.
62 | """
63 | return """
64 | # 📊 Windows Event Log Information
65 |
66 | ## Tool: get_windows_event_log_info
67 |
68 | ### Purpose
69 | Retrieves metadata and recent entries from Windows Event Logs.
70 |
71 | ### Parameters
72 | - **log_name**: "System", "Application", or "Security"
73 | - **max_entries**: Number of recent entries (default: 10)
74 |
75 | ### Usage Examples
76 | ```
77 | # Get System log info
78 | Tool: get_windows_event_log_info
79 | Parameters: log_name="System", max_entries=20
80 |
81 | # Check Application events
82 | Tool: get_windows_event_log_info
83 | Parameters: log_name="Application", max_entries=50
84 |
85 | # Security events (requires admin)
86 | Tool: get_windows_event_log_info
87 | Parameters: log_name="Security", max_entries=10
88 | ```
89 |
90 | ### Information Returned
91 | - **Log metadata**: Size, record count, timestamps
92 | - **Recent entries**: Event ID, source, type, message
93 | - **Event details**: Formatted for readability
94 |
95 | ### Event Types
96 | - 1 = Error (Red)
97 | - 2 = Warning (Yellow)
98 | - 4 = Information (Blue)
99 | - 8 = Success Audit
100 | - 16 = Failure Audit
101 | """
102 |
103 | @mcp.prompt(
104 | title="Query Windows Events",
105 | description="Query Windows Event Logs with specific criteria"
106 | )
107 | async def query_windows_events(
108 | filter_type: Optional[str] = None
109 | ) -> str:
110 | """
111 | Guide for querying Windows events by criteria.
112 |
113 | Args:
114 | filter_type: Type of filter (event_id, level, time)
115 | """
116 |
117 | base_guide = """
118 | # 🔍 Query Windows Events by Criteria
119 |
120 | ## Tool: query_windows_events_by_criteria
121 |
122 | ### Purpose
123 | Query Windows Event Logs with powerful filtering options.
124 |
125 | ### Parameters
126 | - **log_name**: Target log (default: "System")
127 | - **event_id**: Specific Event ID to find
128 | - **level**: "Error", "Warning", or "Information"
129 | - **time_duration**: Time range (e.g., "30m", "2h", "1d")
130 | - **max_entries**: Result limit (default: 50)
131 |
132 | ### Usage Examples
133 | ```
134 | # Find all errors in last hour
135 | Tool: query_windows_events_by_criteria
136 | Parameters: level="Error", time_duration="1h"
137 |
138 | # Find specific Event ID
139 | Tool: query_windows_events_by_criteria
140 | Parameters: event_id=7001, time_duration="24h"
141 |
142 | # Application warnings
143 | Tool: query_windows_events_by_criteria
144 | Parameters: log_name="Application", level="Warning"
145 | ```
146 | """
147 |
148 | filter_guides = {
149 | "event_id": """
150 | ### Filtering by Event ID
151 | Common Event IDs to search:
152 | - **1074**: System shutdown/restart
153 | - **6005/6006**: Event Log start/stop
154 | - **7000-7034**: Service control events
155 | - **1000**: Application crashes
156 | - **4624/4625**: Logon success/failure
157 |
158 | Example:
159 | ```
160 | Tool: query_windows_events_by_criteria
161 | Parameters: event_id=7001, time_duration="6h"
162 | ```
163 | """,
164 | "level": """
165 | ### Filtering by Level
166 | Event severity levels:
167 | - **Error**: Critical failures
168 | - **Warning**: Potential issues
169 | - **Information**: Normal operations
170 |
171 | Example:
172 | ```
173 | Tool: query_windows_events_by_criteria
174 | Parameters: level="Error", time_duration="2h"
175 | ```
176 | """,
177 | "time": """
178 | ### Time-based Filtering
179 | Duration formats:
180 | - **Minutes**: "30m", "45m"
181 | - **Hours**: "1h", "6h", "12h"
182 | - **Days**: "1d", "7d", "30d"
183 |
184 | Example:
185 | ```
186 | Tool: query_windows_events_by_criteria
187 | Parameters: time_duration="4h", level="Error"
188 | ```
189 | """
190 | }
191 |
192 | if filter_type and filter_type.lower() in filter_guides:
193 | base_guide += filter_guides[filter_type.lower()]
194 |
195 | base_guide += """
196 | ### Query Strategy
197 | 1. Start with recent time ranges
198 | 2. Use specific Event IDs when known
199 | 3. Combine filters for precision
200 | 4. Expand search if needed
201 | """
202 |
203 | return base_guide
204 |
205 | @mcp.prompt(
206 | title="Windows System Health",
207 | description="Get Windows system health overview from Event Logs"
208 | )
209 | async def windows_system_health() -> str:
210 | """
211 | Guide for Windows system health assessment.
212 | """
213 | return """
214 | # 🎯 Windows System Health Overview
215 |
216 | ## Tool: get_windows_system_health
217 |
218 | ### Purpose
219 | Analyzes System and Application logs to assess Windows health.
220 |
221 | ### Usage
222 | ```
223 | Tool: get_windows_system_health
224 | ```
225 |
226 | ### Analysis Period
227 | - Last 24 hours of events
228 | - System and Application logs
229 | - Error and warning counts
230 | - Critical event identification
231 |
232 | ### Health Status Levels
233 | - **Healthy**: 0 errors, minimal warnings
234 | - **Fair**: <3 errors, <20 warnings
235 | - **Concerning**: 3+ errors or 20+ warnings
236 |
237 | ### Critical Events Shown
238 | - Service failures
239 | - Application crashes
240 | - System errors
241 | - Hardware issues
242 | - Driver problems
243 |
244 | ### Follow-up Actions
245 | Based on health status:
246 | 1. **Healthy**: Continue monitoring
247 | 2. **Fair**: Investigate warnings
248 | 3. **Concerning**: Address errors immediately
249 |
250 | ### Common Issues Found
251 | - Windows Update failures
252 | - Service startup problems
253 | - Application crashes
254 | - Driver errors
255 | - Hardware warnings
256 | """
257 |
258 | @mcp.prompt(
259 | title="Windows Service Troubleshooting",
260 | description="Diagnose Windows service failures and issues"
261 | )
262 | async def service_troubleshooting() -> str:
263 | """
264 | Guide for troubleshooting Windows services.
265 | """
266 | return """
267 | # 🛠️ Windows Service Troubleshooting
268 |
269 | ## Common Service Event IDs
270 |
271 | ### Service Failures
272 | - **7000**: Service failed to start (logon failure)
273 | - **7001**: Service depends on failed service
274 | - **7023**: Service terminated with error
275 | - **7024**: Service-specific error
276 | - **7031**: Service crashed unexpectedly
277 |
278 | ### Service Timeouts
279 | - **7009**: Connection timeout
280 | - **7011**: Response timeout
281 | - **7022**: Service hung on starting
282 |
283 | ## Diagnostic Steps
284 |
285 | ### Step 1: Find Service Errors
286 | ```
287 | Tool: query_windows_events_by_criteria
288 | Parameters: event_id=7000, time_duration="6h"
289 | ```
290 |
291 | ### Step 2: Check Dependencies
292 | ```
293 | Tool: query_windows_events_by_criteria
294 | Parameters: event_id=7001, time_duration="6h"
295 | ```
296 |
297 | ### Step 3: Review Service Crashes
298 | ```
299 | Tool: query_windows_events_by_criteria
300 | Parameters: event_id=7031, time_duration="24h"
301 | ```
302 |
303 | ## Common Causes
304 |
305 | ### Logon Failures (7000)
306 | - Incorrect service account password
307 | - Account locked or disabled
308 | - Insufficient permissions
309 |
310 | ### Dependency Issues (7001)
311 | - Required service not started
312 | - Circular dependencies
313 | - Network service unavailable
314 |
315 | ### Service Crashes (7031)
316 | - Application bugs
317 | - Resource exhaustion
318 | - Configuration errors
319 |
320 | ## Resolution Steps
321 | 1. Check service account credentials
322 | 2. Verify service dependencies
323 | 3. Review service configuration
324 | 4. Check system resources
325 | 5. Examine application logs
326 | """
327 |
328 | @mcp.prompt(
329 | title="Windows Application Crashes",
330 | description="Analyze application crashes and errors"
331 | )
332 | async def application_crashes() -> str:
333 | """
334 | Guide for investigating application crashes.
335 | """
336 | return """
337 | # 💥 Windows Application Crash Analysis
338 |
339 | ## Key Event IDs
340 |
341 | ### Application Errors
342 | - **1000**: Application crash/fault
343 | - **1001**: Windows Error Reporting
344 | - **1002**: Application hang
345 | - **1026**: .NET runtime error
346 |
347 | ## Investigation Steps
348 |
349 | ### Step 1: Find Recent Crashes
350 | ```
351 | Tool: query_windows_events_by_criteria
352 | Parameters: log_name="Application", event_id=1000, time_duration="24h"
353 | ```
354 |
355 | ### Step 2: Check Application Hangs
356 | ```
357 | Tool: query_windows_events_by_criteria
358 | Parameters: log_name="Application", event_id=1002, time_duration="24h"
359 | ```
360 |
361 | ### Step 3: Review Error Details
362 | ```
363 | Tool: get_windows_event_log_info
364 | Parameters: log_name="Application", max_entries=50
365 | ```
366 |
367 | ## Crash Information
368 |
369 | ### Event 1000 Details
370 | - Faulting application name
371 | - Faulting module (DLL/EXE)
372 | - Exception code
373 | - Fault offset
374 | - Process ID and path
375 |
376 | ### Common Exception Codes
377 | - **0xc0000005**: Access violation
378 | - **0xc0000409**: Stack buffer overrun
379 | - **0xc00000fd**: Stack overflow
380 | - **0x80000003**: Breakpoint
381 |
382 | ## Troubleshooting Steps
383 | 1. Identify crashing application
384 | 2. Check for patterns (time, frequency)
385 | 3. Review exception codes
386 | 4. Look for module conflicts
387 | 5. Check for updates/patches
388 | 6. Test in safe mode
389 | """
390 |
391 | @mcp.prompt(
392 | title="Windows Security Monitoring",
393 | description="Monitor Windows security events and authentication"
394 | )
395 | async def security_monitoring() -> str:
396 | """
397 | Guide for Windows security event monitoring.
398 | """
399 | return """
400 | # 🔒 Windows Security Monitoring
401 |
402 | ## Important Security Event IDs
403 |
404 | ### Logon Events
405 | - **4624**: Successful logon
406 | - **4625**: Failed logon attempt
407 | - **4634**: Account logoff
408 | - **4647**: User initiated logoff
409 |
410 | ### Account Management
411 | - **4720**: User account created
412 | - **4722**: User account enabled
413 | - **4725**: User account disabled
414 | - **4726**: User account deleted
415 | - **4740**: Account locked out
416 |
417 | ### Privilege Use
418 | - **4672**: Special privileges assigned
419 | - **4673**: Privileged service called
420 | - **4674**: Operation attempted on object
421 |
422 | ## Security Queries
423 |
424 | ### Failed Login Attempts
425 | ```
426 | Tool: query_windows_events_by_criteria
427 | Parameters: log_name="Security", event_id=4625, time_duration="2h"
428 | ```
429 |
430 | ### Account Lockouts
431 | ```
432 | Tool: query_windows_events_by_criteria
433 | Parameters: log_name="Security", event_id=4740, time_duration="24h"
434 | ```
435 |
436 | ### Successful Logins
437 | ```
438 | Tool: query_windows_events_by_criteria
439 | Parameters: log_name="Security", event_id=4624, time_duration="1h"
440 | ```
441 |
442 | ## Security Analysis
443 |
444 | ### Brute Force Detection
445 | - Multiple 4625 events
446 | - Same username, different IPs
447 | - Rapid attempts
448 |
449 | ### Suspicious Activity
450 | - Logins at unusual times
451 | - New user accounts created
452 | - Privilege escalations
453 | - Service account usage
454 |
455 | ### Monitoring Best Practices
456 | 1. Regular failed login reviews
457 | 2. Track account changes
458 | 3. Monitor privileged access
459 | 4. Check for patterns
460 | 5. Set up alerts
461 |
462 | **Note**: Security log requires administrator privileges.
463 | """
464 |
465 | @mcp.prompt(
466 | title="Windows Boot and Startup",
467 | description="Diagnose Windows boot and startup issues"
468 | )
469 | async def boot_startup_issues() -> str:
470 | """
471 | Guide for Windows boot and startup diagnostics.
472 | """
473 | return """
474 | # 🚀 Windows Boot and Startup Diagnostics
475 |
476 | ## Boot-Related Event IDs
477 |
478 | ### System Start/Stop
479 | - **6005**: Event Log service started (boot)
480 | - **6006**: Event Log service stopped (shutdown)
481 | - **6008**: Unexpected shutdown detected
482 | - **6009**: Processor information at boot
483 |
484 | ### Shutdown/Restart
485 | - **1074**: System shutdown by user/process
486 | - **1076**: Reason for shutdown
487 |
488 | ### Driver/Service Issues
489 | - **7026**: Boot-start driver failed
490 | - **7000**: Service failed at startup
491 |
492 | ## Diagnostic Queries
493 |
494 | ### Check Last Boot
495 | ```
496 | Tool: query_windows_events_by_criteria
497 | Parameters: event_id=6005, time_duration="24h"
498 | ```
499 |
500 | ### Unexpected Shutdowns
501 | ```
502 | Tool: query_windows_events_by_criteria
503 | Parameters: event_id=6008, time_duration="7d"
504 | ```
505 |
506 | ### Boot Driver Failures
507 | ```
508 | Tool: query_windows_events_by_criteria
509 | Parameters: event_id=7026, time_duration="24h"
510 | ```
511 |
512 | ### Service Startup Issues
513 | ```
514 | Tool: query_windows_events_by_criteria
515 | Parameters: event_id=7000, time_duration="2h"
516 | ```
517 |
518 | ## Boot Problem Analysis
519 |
520 | ### Slow Boot
521 | 1. Check service startup times
522 | 2. Look for driver failures
523 | 3. Review dependency chains
524 | 4. Check for timeout events
525 |
526 | ### Boot Loops
527 | 1. Check Event ID 6008 frequency
528 | 2. Look for critical errors before shutdown
529 | 3. Review hardware events
530 | 4. Check for driver conflicts
531 |
532 | ### Service Failures
533 | 1. Identify failing services
534 | 2. Check dependencies
535 | 3. Review service accounts
536 | 4. Verify system resources
537 |
538 | ## Boot Optimization
539 | - Disable unnecessary startup services
540 | - Update drivers
541 | - Check disk health
542 | - Review startup programs
543 | """
544 |
545 | @mcp.prompt(
546 | title="Windows Event ID Reference",
547 | description="Quick reference for common Windows Event IDs"
548 | )
549 | async def event_id_reference(
550 | category: Optional[str] = None
551 | ) -> str:
552 | """
553 | Windows Event ID reference guide.
554 |
555 | Args:
556 | category: Event category (system, service, security, etc.)
557 | """
558 |
559 | all_categories = {
560 | "system": """
561 | ## System Events
562 | - **1074**: System shutdown/restart initiated
563 | - **6005**: Event Log service started
564 | - **6006**: Event Log service stopped
565 | - **6008**: Unexpected system shutdown
566 | - **6009**: Processor information
567 | - **6013**: System uptime
568 | """,
569 | "service": """
570 | ## Service Control Manager
571 | - **7000**: Service failed to start
572 | - **7001**: Service dependency failure
573 | - **7009**: Connection timeout
574 | - **7011**: Service timeout
575 | - **7023**: Service terminated with error
576 | - **7024**: Service-specific error
577 | - **7026**: Boot driver failed
578 | - **7031**: Service crash
579 | - **7034**: Service crashed (no recovery)
580 | """,
581 | "application": """
582 | ## Application Events
583 | - **1000**: Application error/crash
584 | - **1001**: Windows Error Reporting
585 | - **1002**: Application hang
586 | - **1004**: Application recovery
587 | - **1026**: .NET runtime error
588 | """,
589 | "security": """
590 | ## Security Events (Admin Required)
591 | - **4624**: Successful logon
592 | - **4625**: Failed logon
593 | - **4634**: Logoff
594 | - **4672**: Special privileges
595 | - **4720**: User created
596 | - **4726**: User deleted
597 | - **4740**: Account locked
598 | """,
599 | "hardware": """
600 | ## Hardware Events
601 | - **7**: Disk bad block
602 | - **11**: Disk controller error
603 | - **15**: Disk not ready
604 | - **51**: Paging error
605 | - **129**: Disk reset
606 | """
607 | }
608 |
609 | result = "# 📖 Windows Event ID Reference\n\n"
610 |
611 | if category and category.lower() in all_categories:
612 | result += all_categories[category.lower()]
613 | else:
614 | result += "## Common Windows Event IDs by Category\n\n"
615 | for cat_content in all_categories.values():
616 | result += cat_content + "\n"
617 |
618 | result += """
619 | ## Using Event IDs
620 | 1. Note the Event ID from logs
621 | 2. Query for specific IDs
622 | 3. Check patterns and frequency
623 | 4. Cross-reference with time
624 | 5. Correlate related events
625 |
626 | ## Event Levels
627 | - **Error**: Critical failures
628 | - **Warning**: Potential issues
629 | - **Information**: Normal operations
630 | - **Success Audit**: Security success
631 | - **Failure Audit**: Security failure
632 | """
633 |
634 | return result
635 |
636 | @mcp.prompt(
637 | title="Windows Performance Issues",
638 | description="Diagnose Windows performance problems using Event Logs"
639 | )
640 | async def performance_issues() -> str:
641 | """
642 | Guide for Windows performance diagnostics.
643 | """
644 | return """
645 | # 📊 Windows Performance Diagnostics
646 |
647 | ## Performance-Related Events
648 |
649 | ### Resource Issues
650 | - **2004**: Resource exhaustion
651 | - **1001**: Performance counter issues
652 | - **100**: Component timeout
653 |
654 | ### Application Performance
655 | - **1002**: Application hang
656 | - **1530**: Application slow response
657 |
658 | ## Investigation Steps
659 |
660 | ### Step 1: Check System Health
661 | ```
662 | Tool: get_windows_system_health
663 | ```
664 |
665 | ### Step 2: Find Application Hangs
666 | ```
667 | Tool: query_windows_events_by_criteria
668 | Parameters: log_name="Application", event_id=1002, time_duration="6h"
669 | ```
670 |
671 | ### Step 3: Look for Timeouts
672 | ```
673 | Tool: query_windows_events_by_criteria
674 | Parameters: event_id=7011, time_duration="6h"
675 | ```
676 |
677 | ### Step 4: Resource Warnings
678 | ```
679 | Tool: query_windows_events_by_criteria
680 | Parameters: level="Warning", time_duration="2h"
681 | ```
682 |
683 | ## Common Performance Issues
684 |
685 | ### High CPU Usage
686 | - Check for crashed services
687 | - Look for restart loops
688 | - Review application errors
689 |
690 | ### Memory Issues
691 | - Application crashes (1000)
692 | - Out of memory errors
693 | - Page file warnings
694 |
695 | ### Disk Problems
696 | - Event ID 7, 11, 51
697 | - Slow response warnings
698 | - I/O timeouts
699 |
700 | ### Network Issues
701 | - Connection timeouts
702 | - Service availability
703 | - DNS failures
704 |
705 | ## Performance Optimization
706 | 1. Identify problematic services
707 | 2. Check for memory leaks
708 | 3. Review disk errors
709 | 4. Monitor service restarts
710 | 5. Update drivers
711 | 6. Check for malware
712 | """
713 |
714 | @mcp.prompt(
715 | title="Windows Event Log Best Practices",
716 | description="Best practices for Windows Event Log monitoring"
717 | )
718 | async def event_log_practices() -> str:
719 | """
720 | Best practices for Event Log management.
721 | """
722 | return """
723 | # 📋 Windows Event Log Best Practices
724 |
725 | ## Regular Monitoring
726 |
727 | ### Daily Checks
728 | ```
729 | Tool: get_windows_system_health
730 | ```
731 | - Review health status
732 | - Check critical errors
733 | - Note new warnings
734 |
735 | ### Weekly Analysis
736 | - Service failure patterns
737 | - Application crash trends
738 | - Security audit review
739 | - Performance issues
740 |
741 | ## Effective Queries
742 |
743 | ### Start Specific
744 | ```
745 | # Known issue
746 | Tool: query_windows_events_by_criteria
747 | Parameters: event_id=7001, time_duration="24h"
748 | ```
749 |
750 | ### Then Broaden
751 | ```
752 | # General errors
753 | Tool: query_windows_events_by_criteria
754 | Parameters: level="Error", time_duration="6h"
755 | ```
756 |
757 | ## Key Event IDs to Monitor
758 |
759 | ### Critical System
760 | - 6008: Unexpected shutdown
761 | - 1074: System restart
762 | - 7031: Service crash
763 | - 41: Kernel power
764 |
765 | ### Security (if accessible)
766 | - 4625: Failed logins
767 | - 4740: Account lockouts
768 | - 4720: User creation
769 |
770 | ### Application Health
771 | - 1000: App crashes
772 | - 1002: App hangs
773 | - 1026: .NET errors
774 |
775 | ## Log Management
776 |
777 | ### Retention
778 | - System: 30-90 days
779 | - Application: 30 days
780 | - Security: 90-365 days
781 |
782 | ### Size Limits
783 | - Prevent logs from filling
784 | - Archive old events
785 | - Regular cleanup
786 |
787 | ## Automation Ideas
788 | 1. Schedule daily health checks
789 | 2. Alert on critical Event IDs
790 | 3. Weekly summary reports
791 | 4. Trend analysis
792 | 5. Correlation rules
793 |
794 | ## Documentation
795 | Track:
796 | - Recurring issues
797 | - Resolution steps
798 | - Event patterns
799 | - System changes
800 | """
801 |
802 | @mcp.prompt(
803 | title="Windows Emergency Diagnostics",
804 | description="Quick diagnostics for Windows emergencies"
805 | )
806 | async def emergency_diagnostics() -> str:
807 | """
808 | Emergency Windows diagnostic procedures.
809 | """
810 | return """
811 | # 🚨 Windows Emergency Diagnostics
812 |
813 | ## System Won't Boot
814 |
815 | ### Quick Checks
816 | 1. Boot to Safe Mode
817 | 2. Check Event Viewer for:
818 | ```
819 | Tool: query_windows_events_by_criteria
820 | Parameters: event_id=7026, time_duration="1h"
821 | ```
822 |
823 | 3. Service failures:
824 | ```
825 | Tool: query_windows_events_by_criteria
826 | Parameters: event_id=7000, time_duration="1h"
827 | ```
828 |
829 | ## Blue Screen (BSOD)
830 |
831 | ### After Reboot
832 | ```
833 | Tool: query_windows_events_by_criteria
834 | Parameters: event_id=41, time_duration="1h"
835 | ```
836 |
837 | Check for:
838 | - Kernel-Power events
839 | - Driver failures
840 | - Hardware errors
841 |
842 | ## Service Failures
843 |
844 | ### Critical Service Down
845 | ```
846 | # Find specific service
847 | Tool: query_windows_events_by_criteria
848 | Parameters: level="Error", time_duration="30m"
849 | ```
850 |
851 | Quick fixes:
852 | 1. Restart service
853 | 2. Check dependencies
854 | 3. Verify credentials
855 | 4. Review resources
856 |
857 | ## Performance Crisis
858 |
859 | ### System Slow
860 | ```
861 | Tool: get_windows_system_health
862 | ```
863 |
864 | Then check:
865 | - Application hangs (1002)
866 | - Service timeouts (7011)
867 | - Resource warnings
868 |
869 | ## Security Incident
870 |
871 | ### Suspected Breach
872 | ```
873 | # Failed logins
874 | Tool: query_windows_events_by_criteria
875 | Parameters: log_name="Security", event_id=4625
876 |
877 | # New accounts
878 | Parameters: log_name="Security", event_id=4720
879 | ```
880 |
881 | ## Recovery Checklist
882 | - [ ] System accessible?
883 | - [ ] Critical services running?
884 | - [ ] Recent errors identified?
885 | - [ ] Security verified?
886 | - [ ] Performance acceptable?
887 | - [ ] Root cause found?
888 | - [ ] Preventive measures?
889 | """
```
--------------------------------------------------------------------------------
/src/mcp_log_analyzer/mcp_server/prompts/log_management_prompt.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Log Management prompts for the MCP Log Analyzer server.
3 | """
4 |
5 | from typing import Optional
6 | from mcp.server import FastMCP
7 |
8 |
9 | def register_log_management_prompts(mcp: FastMCP):
10 | """Register all log management prompts."""
11 |
12 | @mcp.prompt(
13 | title="Register Log Source",
14 | description="Guide for registering new log sources for analysis"
15 | )
16 | async def register_log_source_guide() -> str:
17 | """
18 | Guide for registering various types of log sources.
19 | """
20 | return """
21 | # 💾 Register Log Source Guide
22 |
23 | ## Tool: register_log_source
24 |
25 | ### Purpose
26 | Register a new log source for analysis. Supports various log formats.
27 |
28 | ### Parameters
29 | - **name**: Unique identifier for your log source
30 | - **source_type**: Type of log (evt, json, xml, csv, text)
31 | - **path**: File path or directory containing logs
32 | - **config**: Additional parser configuration (optional)
33 |
34 | ### Supported Log Types
35 | - **evt**: Windows Event Logs (System, Application, Security)
36 | - **json**: JSON-formatted application logs
37 | - **xml**: XML-structured logs
38 | - **csv**: Comma-separated value logs
39 | - **text**: Plain text logs (syslog, custom formats)
40 |
41 | ### Usage Examples
42 | ```
43 | # Windows System Events
44 | Tool: register_log_source
45 | Parameters: name="windows_system", source_type="evt", path="System"
46 |
47 | # JSON Application Logs
48 | Tool: register_log_source
49 | Parameters: name="app_logs", source_type="json", path="/var/log/myapp/app.json"
50 |
51 | # Text-based Syslog
52 | Tool: register_log_source
53 | Parameters: name="syslog", source_type="text", path="/var/log/syslog"
54 | ```
55 |
56 | ### Best Practices
57 | ✅ Use descriptive names for easy identification
58 | ✅ Organize by system and log type
59 | ✅ Verify file path exists before registering
60 | ✅ Set appropriate parser configurations
61 | """
62 |
63 | @mcp.prompt(
64 | title="Query Logs",
65 | description="Guide for querying and filtering registered log sources"
66 | )
67 | async def query_logs_guide(
68 | filter_type: Optional[str] = None
69 | ) -> str:
70 | """
71 | Guide for querying logs with various filters.
72 |
73 | Args:
74 | filter_type: Type of filter (time, level, content, etc.)
75 | """
76 |
77 | base_guide = """
78 | # 🔍 Query Logs Guide
79 |
80 | ## Tool: query_logs
81 |
82 | ### Purpose
83 | Query and filter logs from registered sources with powerful filtering options.
84 |
85 | ### Basic Parameters
86 | - **source_name**: Which registered source to query
87 | - **start_time/end_time**: Define time ranges
88 | - **limit/offset**: Paginate through results
89 | - **filters**: Apply specific criteria
90 |
91 | ### Usage Examples
92 | ```
93 | # Query recent errors
94 | Tool: query_logs
95 | Parameters: source_name="windows_system", filters={"level": "Error"}, start_time="1 hour ago"
96 |
97 | # Search for specific content
98 | Tool: query_logs
99 | Parameters: source_name="app_logs", filters={"message_contains": "database error"}
100 |
101 | # Paginate through results
102 | Tool: query_logs
103 | Parameters: source_name="syslog", limit=50, offset=100
104 | ```
105 | """
106 |
107 | filter_guides = {
108 | "time": """
109 | ### Time-based Filtering
110 | - **Absolute time**: "2024-01-15 10:00:00"
111 | - **Relative time**: "1 hour ago", "24 hours ago"
112 | - **Time ranges**: start_time and end_time
113 | - **Duration shortcuts**: "last_hours": 6
114 |
115 | Examples:
116 | ```
117 | # Last 24 hours
118 | start_time="24 hours ago"
119 |
120 | # Specific date range
121 | start_time="2024-01-15 00:00:00", end_time="2024-01-15 23:59:59"
122 | ```
123 | """,
124 | "level": """
125 | ### Level-based Filtering
126 | - **Error levels**: Error, Warning, Info, Debug
127 | - **Windows levels**: Error, Warning, Information
128 | - **Syslog priorities**: 0-7 (emerg to debug)
129 |
130 | Examples:
131 | ```
132 | filters={"level": "Error"}
133 | filters={"severity": "critical"}
134 | filters={"priority": [0, 1, 2]} # emerg, alert, crit
135 | ```
136 | """,
137 | "content": """
138 | ### Content Filtering
139 | - **Text search**: message_contains
140 | - **Regex patterns**: regex_pattern
141 | - **Field matching**: Exact field values
142 | - **Multiple criteria**: AND/OR conditions
143 |
144 | Examples:
145 | ```
146 | filters={"message_contains": "authentication failed"}
147 | filters={"regex_pattern": "error.*database.*timeout"}
148 | filters={"event_id": 7001, "source": "Service Control Manager"}
149 | ```
150 | """
151 | }
152 |
153 | if filter_type and filter_type.lower() in filter_guides:
154 | base_guide += filter_guides[filter_type.lower()]
155 |
156 | base_guide += """
157 | ### Performance Tips
158 | ✅ Use time ranges to limit data scope
159 | ✅ Apply specific filters to reduce noise
160 | ✅ Start with recent time periods
161 | ✅ Use pagination for large datasets
162 | """
163 |
164 | return base_guide
165 |
166 | @mcp.prompt(
167 | title="Analyze Logs",
168 | description="Guide for running log analysis (summary, pattern, anomaly)"
169 | )
170 | async def analyze_logs_guide(
171 | analysis_type: Optional[str] = None
172 | ) -> str:
173 | """
174 | Guide for different types of log analysis.
175 |
176 | Args:
177 | analysis_type: Type of analysis (summary, pattern, anomaly)
178 | """
179 |
180 | base_guide = """
181 | # 📊 Analyze Logs Guide
182 |
183 | ## Tool: analyze_logs
184 |
185 | ### Purpose
186 | Perform advanced analysis on logs to identify patterns, anomalies, and trends.
187 |
188 | ### Analysis Types
189 | - **summary**: General statistics and overview
190 | - **pattern**: Detect recurring patterns and frequencies
191 | - **anomaly**: Identify unusual or suspicious log entries
192 |
193 | ### Basic Usage
194 | ```
195 | Tool: analyze_logs
196 | Parameters: source_name="app_logs", analysis_type="summary"
197 | ```
198 | """
199 |
200 | analysis_guides = {
201 | "summary": """
202 | ### Summary Analysis
203 | Provides high-level overview and statistics.
204 |
205 | **What it shows:**
206 | - Total log count and time range
207 | - Error/Warning/Info distribution
208 | - Top sources and components
209 | - Peak activity periods
210 | - Message frequency analysis
211 |
212 | **Best for:**
213 | - Initial investigation
214 | - Health assessment
215 | - Capacity planning
216 | - Report generation
217 |
218 | **Example:**
219 | ```
220 | Tool: analyze_logs
221 | Parameters: source_name="windows_system", analysis_type="summary", time_duration="24h"
222 | ```
223 | """,
224 | "pattern": """
225 | ### Pattern Analysis
226 | Detects recurring patterns and correlations.
227 |
228 | **What it finds:**
229 | - Frequent error messages
230 | - Event sequences and correlations
231 | - Time-based patterns (hourly, daily)
232 | - Recurring issues
233 | - Common failure modes
234 |
235 | **Best for:**
236 | - Root cause analysis
237 | - Predictive maintenance
238 | - Identifying systematic issues
239 | - Performance optimization
240 |
241 | **Example:**
242 | ```
243 | Tool: analyze_logs
244 | Parameters: source_name="app_logs", analysis_type="pattern", filters={"level": "Error"}
245 | ```
246 | """,
247 | "anomaly": """
248 | ### Anomaly Detection
249 | Identifies unusual events and outliers.
250 |
251 | **What it detects:**
252 | - Unusual error spikes or drops
253 | - New error types not seen before
254 | - Unexpected source activity
255 | - Timing anomalies
256 | - Statistical outliers
257 |
258 | **Best for:**
259 | - Security monitoring
260 | - Early problem detection
261 | - Change detection
262 | - Incident investigation
263 |
264 | **Example:**
265 | ```
266 | Tool: analyze_logs
267 | Parameters: source_name="security_logs", analysis_type="anomaly", time_duration="48h"
268 | ```
269 | """
270 | }
271 |
272 | if analysis_type and analysis_type.lower() in analysis_guides:
273 | base_guide += analysis_guides[analysis_type.lower()]
274 | else:
275 | # Show all types if none specified
276 | for guide in analysis_guides.values():
277 | base_guide += guide
278 |
279 | base_guide += """
280 | ### Analysis Strategy
281 | 1. Start with summary for overview
282 | 2. Use pattern analysis for recurring issues
283 | 3. Apply anomaly detection for security
284 | 4. Combine analyses for comprehensive insights
285 | """
286 |
287 | return base_guide
288 |
289 | @mcp.prompt(
290 | title="Manage Log Sources",
291 | description="Guide for listing, viewing, and deleting log sources"
292 | )
293 | async def manage_log_sources() -> str:
294 | """
295 | Guide for managing registered log sources.
296 | """
297 | return """
298 | # 📋 Manage Log Sources
299 |
300 | ## Available Management Tools
301 |
302 | ### List All Sources
303 | ```
304 | Tool: list_log_sources
305 | ```
306 | Shows all registered log sources with:
307 | - Source names and types
308 | - File paths
309 | - Registration timestamps
310 | - Parser configurations
311 |
312 | ### Get Source Details
313 | ```
314 | Tool: get_log_source
315 | Parameters: name="source_name"
316 | ```
317 | Provides detailed information about a specific source:
318 | - Full configuration
319 | - Parser settings
320 | - Access status
321 | - Recent activity
322 |
323 | ### Delete Log Source
324 | ```
325 | Tool: delete_log_source
326 | Parameters: name="source_name"
327 | ```
328 | Removes a log source registration:
329 | - Cleans up configuration
330 | - Does not delete actual log files
331 | - Frees up the source name
332 |
333 | ## Management Best Practices
334 |
335 | ### Organization
336 | - Use naming conventions (system_component_type)
337 | - Group related sources logically
338 | - Document source purposes
339 | - Regular cleanup of unused sources
340 |
341 | ### Maintenance
342 | - Verify sources are still accessible
343 | - Update paths after log rotation
344 | - Remove obsolete sources
345 | - Monitor source performance
346 |
347 | ### Examples
348 | ```
349 | # List all sources to review
350 | Tool: list_log_sources
351 |
352 | # Check specific source status
353 | Tool: get_log_source
354 | Parameters: name="prod_app_logs"
355 |
356 | # Remove old test source
357 | Tool: delete_log_source
358 | Parameters: name="test_logs_old"
359 | ```
360 | """
361 |
362 | @mcp.prompt(
363 | title="Windows Event Log Setup",
364 | description="Guide for setting up Windows Event Log sources"
365 | )
366 | async def windows_event_setup() -> str:
367 | """
368 | Guide for Windows Event Log configuration.
369 | """
370 | return """
371 | # 🪟 Windows Event Log Setup
372 |
373 | ## Registering Windows Event Logs
374 |
375 | ### System Event Log
376 | ```
377 | Tool: register_log_source
378 | Parameters: name="windows_system", source_type="evt", path="System"
379 | ```
380 | **Contains**: Hardware, drivers, system services, kernel events
381 |
382 | ### Application Event Log
383 | ```
384 | Tool: register_log_source
385 | Parameters: name="windows_application", source_type="evt", path="Application"
386 | ```
387 | **Contains**: Application crashes, errors, informational events
388 |
389 | ### Security Event Log
390 | ```
391 | Tool: register_log_source
392 | Parameters: name="windows_security", source_type="evt", path="Security"
393 | ```
394 | **Contains**: Authentication, authorization, audit events
395 | **Note**: Requires administrator privileges
396 |
397 | ## Common Windows Queries
398 |
399 | ### Recent System Errors
400 | ```
401 | Tool: query_logs
402 | Parameters: source_name="windows_system", filters={"level": "Error"}, start_time="24 hours ago"
403 | ```
404 |
405 | ### Service Failures
406 | ```
407 | Tool: query_logs
408 | Parameters: source_name="windows_system", filters={"event_id": [7000, 7001, 7023]}
409 | ```
410 |
411 | ### Application Crashes
412 | ```
413 | Tool: query_logs
414 | Parameters: source_name="windows_application", filters={"event_id": 1000}
415 | ```
416 |
417 | ### Failed Logins (Security)
418 | ```
419 | Tool: query_logs
420 | Parameters: source_name="windows_security", filters={"event_id": 4625}
421 | ```
422 |
423 | ## Prerequisites
424 | - Windows operating system
425 | - pywin32 package installed
426 | - Administrator rights for Security log
427 | - Appropriate Event Log permissions
428 | """
429 |
430 | @mcp.prompt(
431 | title="Structured Log Setup",
432 | description="Guide for JSON, XML, and CSV log sources"
433 | )
434 | async def structured_log_setup(
435 | format_type: Optional[str] = None
436 | ) -> str:
437 | """
438 | Guide for structured log formats.
439 |
440 | Args:
441 | format_type: Log format type (json, xml, csv)
442 | """
443 |
444 | base_guide = """
445 | # 📄 Structured Log Setup
446 |
447 | ## Supported Structured Formats
448 | - **JSON**: JavaScript Object Notation logs
449 | - **XML**: Extensible Markup Language logs
450 | - **CSV**: Comma-Separated Values logs
451 | """
452 |
453 | format_guides = {
454 | "json": """
455 | ### JSON Log Configuration
456 | ```
457 | Tool: register_log_source
458 | Parameters:
459 | name="app_json_logs"
460 | source_type="json"
461 | path="/var/log/app/application.json"
462 | config={
463 | "timestamp_field": "timestamp",
464 | "level_field": "severity",
465 | "message_field": "message"
466 | }
467 | ```
468 |
469 | **Example JSON Format:**
470 | ```json
471 | {
472 | "timestamp": "2024-01-15T10:30:00Z",
473 | "severity": "ERROR",
474 | "message": "Database connection failed",
475 | "component": "database",
476 | "error_code": "DB_001"
477 | }
478 | ```
479 |
480 | **Query Example:**
481 | ```
482 | Tool: query_logs
483 | Parameters:
484 | source_name="app_json_logs"
485 | filters={"severity": "ERROR", "component": "database"}
486 | ```
487 | """,
488 | "xml": """
489 | ### XML Log Configuration
490 | ```
491 | Tool: register_log_source
492 | Parameters:
493 | name="app_xml_logs"
494 | source_type="xml"
495 | path="/var/log/app/events.xml"
496 | config={
497 | "root_element": "events",
498 | "event_element": "event",
499 | "timestamp_path": "event/timestamp",
500 | "level_path": "event/level"
501 | }
502 | ```
503 |
504 | **Example XML Format:**
505 | ```xml
506 | <events>
507 | <event>
508 | <timestamp>2024-01-15T10:30:00Z</timestamp>
509 | <level>ERROR</level>
510 | <message>Service initialization failed</message>
511 | <source>ServiceManager</source>
512 | </event>
513 | </events>
514 | ```
515 |
516 | **Query Example:**
517 | ```
518 | Tool: query_logs
519 | Parameters:
520 | source_name="app_xml_logs"
521 | filters={"level": "ERROR", "source": "ServiceManager"}
522 | ```
523 | """,
524 | "csv": """
525 | ### CSV Log Configuration
526 | ```
527 | Tool: register_log_source
528 | Parameters:
529 | name="app_csv_logs"
530 | source_type="csv"
531 | path="/var/log/app/metrics.csv"
532 | config={
533 | "delimiter": ",",
534 | "has_header": true,
535 | "timestamp_column": 0,
536 | "level_column": 2,
537 | "message_column": 3
538 | }
539 | ```
540 |
541 | **Example CSV Format:**
542 | ```csv
543 | timestamp,host,level,message,duration_ms
544 | 2024-01-15T10:30:00Z,server01,ERROR,Request timeout,5023
545 | 2024-01-15T10:30:01Z,server01,INFO,Request processed,245
546 | ```
547 |
548 | **Query Example:**
549 | ```
550 | Tool: query_logs
551 | Parameters:
552 | source_name="app_csv_logs"
553 | filters={"level": "ERROR", "host": "server01"}
554 | ```
555 | """
556 | }
557 |
558 | if format_type and format_type.lower() in format_guides:
559 | base_guide += format_guides[format_type.lower()]
560 | else:
561 | for guide in format_guides.values():
562 | base_guide += guide
563 |
564 | base_guide += """
565 | ## Configuration Best Practices
566 | ✅ Specify field mappings clearly
567 | ✅ Use consistent timestamp formats
568 | ✅ Validate log format before registering
569 | ✅ Test queries after registration
570 | ✅ Document custom field meanings
571 | """
572 |
573 | return base_guide
574 |
575 | @mcp.prompt(
576 | title="Text Log Setup",
577 | description="Guide for plain text and custom format logs"
578 | )
579 | async def text_log_setup() -> str:
580 | """
581 | Guide for text-based log sources.
582 | """
583 | return """
584 | # 📝 Text Log Setup
585 |
586 | ## Registering Text Logs
587 |
588 | ### Basic Text Log
589 | ```
590 | Tool: register_log_source
591 | Parameters:
592 | name="syslog"
593 | source_type="text"
594 | path="/var/log/syslog"
595 | ```
596 |
597 | ### Custom Format Configuration
598 | ```
599 | Tool: register_log_source
600 | Parameters:
601 | name="custom_app_log"
602 | source_type="text"
603 | path="/var/log/app/custom.log"
604 | config={
605 | "pattern": "(?P<timestamp>\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) \\[(?P<level>\\w+)\\] (?P<message>.*)",
606 | "timestamp_format": "%Y-%m-%d %H:%M:%S"
607 | }
608 | ```
609 |
610 | ## Common Text Log Formats
611 |
612 | ### Syslog Format
613 | ```
614 | Jan 15 10:30:45 hostname service[1234]: Error message here
615 | ```
616 | **Config**: Built-in syslog parser
617 |
618 | ### Apache/Nginx Access Logs
619 | ```
620 | 192.168.1.1 - - [15/Jan/2024:10:30:45 +0000] "GET /api/data HTTP/1.1" 500 1234
621 | ```
622 | **Config**: Use pattern matching for fields
623 |
624 | ### Application Logs
625 | ```
626 | 2024-01-15 10:30:45 [ERROR] [database] Connection pool exhausted
627 | ```
628 | **Config**: Define custom regex pattern
629 |
630 | ## Query Examples
631 |
632 | ### Search by Content
633 | ```
634 | Tool: query_logs
635 | Parameters:
636 | source_name="syslog"
637 | filters={"message_contains": "authentication failed"}
638 | ```
639 |
640 | ### Filter by Pattern
641 | ```
642 | Tool: query_logs
643 | Parameters:
644 | source_name="custom_app_log"
645 | filters={"regex_pattern": "ERROR.*database.*timeout"}
646 | ```
647 |
648 | ## Parsing Tips
649 | ✅ Test regex patterns before registering
650 | ✅ Use named capture groups for fields
651 | ✅ Handle multi-line log entries
652 | ✅ Consider log rotation handling
653 | ✅ Validate timestamp parsing
654 | """
655 |
656 | @mcp.prompt(
657 | title="Log Analysis Workflow",
658 | description="Step-by-step workflow for comprehensive log analysis"
659 | )
660 | async def log_analysis_workflow() -> str:
661 | """
662 | Complete workflow for log analysis tasks.
663 | """
664 | return """
665 | # 🔄 Log Analysis Workflow
666 |
667 | ## Step-by-Step Analysis Process
668 |
669 | ### 1. Setup Phase
670 | ```
671 | # Register your log sources
672 | Tool: register_log_source
673 | Parameters: [appropriate for your log type]
674 |
675 | # Verify registration
676 | Tool: list_log_sources
677 | ```
678 |
679 | ### 2. Initial Assessment
680 | ```
681 | # Get overview with summary analysis
682 | Tool: analyze_logs
683 | Parameters: source_name="your_source", analysis_type="summary"
684 |
685 | # Check recent errors
686 | Tool: query_logs
687 | Parameters: source_name="your_source", filters={"level": "Error"}, start_time="6 hours ago"
688 | ```
689 |
690 | ### 3. Deep Dive Investigation
691 | ```
692 | # Find patterns in errors
693 | Tool: analyze_logs
694 | Parameters: source_name="your_source", analysis_type="pattern", filters={"level": "Error"}
695 |
696 | # Search for specific issues
697 | Tool: query_logs
698 | Parameters: source_name="your_source", filters={"message_contains": "specific error"}
699 | ```
700 |
701 | ### 4. Anomaly Detection
702 | ```
703 | # Check for unusual activity
704 | Tool: analyze_logs
705 | Parameters: source_name="your_source", analysis_type="anomaly", time_duration="48h"
706 | ```
707 |
708 | ### 5. Reporting
709 | - Document findings from summary analysis
710 | - List identified patterns and frequencies
711 | - Note any anomalies detected
712 | - Provide recommendations
713 |
714 | ## Common Analysis Scenarios
715 |
716 | ### Performance Investigation
717 | 1. Register application logs
718 | 2. Query for performance warnings
719 | 3. Analyze patterns in slow operations
720 | 4. Identify peak problem times
721 |
722 | ### Security Audit
723 | 1. Register security/auth logs
724 | 2. Search for failed authentications
725 | 3. Detect anomalous access patterns
726 | 4. Review privilege escalations
727 |
728 | ### Error Troubleshooting
729 | 1. Register relevant log sources
730 | 2. Filter by error level
731 | 3. Analyze error patterns
732 | 4. Correlate with system events
733 |
734 | ### Capacity Planning
735 | 1. Analyze usage patterns over time
736 | 2. Identify growth trends
737 | 3. Find resource bottlenecks
738 | 4. Project future needs
739 |
740 | ## Best Practices
741 | ✅ Always start with summary analysis
742 | ✅ Use time-based filters to focus investigation
743 | ✅ Combine multiple analysis types
744 | ✅ Document your findings
745 | ✅ Clean up test sources when done
746 | """
747 |
748 | @mcp.prompt(
749 | title="Log Troubleshooting",
750 | description="Troubleshooting common log analysis issues"
751 | )
752 | async def log_troubleshooting() -> str:
753 | """
754 | Troubleshooting guide for common issues.
755 | """
756 | return """
757 | # 🔧 Log Troubleshooting Guide
758 |
759 | ## Registration Issues
760 |
761 | ### "Log source already exists"
762 | **Solution:**
763 | 1. List existing sources: `list_log_sources`
764 | 2. Delete if needed: `delete_log_source`
765 | 3. Choose different name
766 |
767 | ### "File not found"
768 | **Solution:**
769 | 1. Verify file path is correct
770 | 2. Check file permissions
771 | 3. Ensure path is absolute, not relative
772 | 4. Test file access with system tools
773 |
774 | ### "Unsupported source type"
775 | **Solution:**
776 | - Valid types: evt, json, xml, csv, text
777 | - Check spelling and case
778 | - Use "text" for custom formats
779 |
780 | ## Query Issues
781 |
782 | ### "No logs returned"
783 | **Possible causes:**
784 | 1. Time range too restrictive
785 | 2. Filters excluding all data
786 | 3. Log source empty in time range
787 | 4. Parsing errors
788 |
789 | **Solutions:**
790 | - Remove filters and try again
791 | - Expand time range
792 | - Check source has recent data
793 | - Verify log format matches parser
794 |
795 | ### "Query timeout"
796 | **Solutions:**
797 | 1. Reduce time range
798 | 2. Add more specific filters
799 | 3. Use pagination (limit/offset)
800 | 4. Query smaller time windows
801 |
802 | ### "Invalid filter format"
803 | **Solutions:**
804 | - Check filter field names
805 | - Verify filter syntax
806 | - Use correct data types
807 | - Test filters incrementally
808 |
809 | ## Analysis Issues
810 |
811 | ### "Analysis returns empty"
812 | **Check:**
813 | 1. Log source contains data
814 | 2. Time range includes logs
815 | 3. Filters not too restrictive
816 | 4. Analysis type is valid
817 |
818 | ### "Pattern analysis finds nothing"
819 | **Solutions:**
820 | - Increase time range for more data
821 | - Ensure logs have patterns to find
822 | - Check log format consistency
823 | - Try different filter criteria
824 |
825 | ### "Anomaly detection not working"
826 | **Requirements:**
827 | - Sufficient historical data
828 | - Consistent log format
829 | - Baseline period available
830 | - Varied log content
831 |
832 | ## Performance Issues
833 |
834 | ### Slow Queries
835 | - Use specific time ranges
836 | - Apply filters early
837 | - Limit result count
838 | - Index frequently searched fields
839 |
840 | ### Large Log Files
841 | - Implement log rotation
842 | - Archive old logs
843 | - Use time-based queries
844 | - Consider partitioning
845 |
846 | ### Memory Issues
847 | - Process in smaller chunks
848 | - Use streaming where possible
849 | - Limit concurrent queries
850 | - Monitor resource usage
851 |
852 | ## Platform-Specific Issues
853 |
854 | ### Windows
855 | - **pywin32 missing**: Install with pip
856 | - **Access denied**: Need admin rights
857 | - **Security log**: Requires elevation
858 |
859 | ### Linux
860 | - **Permission denied**: Check file permissions
861 | - **Log rotation**: Handle rotated files
862 | - **Different paths**: Check distribution
863 |
864 | ## Quick Fixes Checklist
865 | - [ ] Verify log source is registered
866 | - [ ] Check file permissions and access
867 | - [ ] Validate time ranges in queries
868 | - [ ] Test with minimal filters first
869 | - [ ] Ensure proper log format
870 | - [ ] Check system resources
871 | - [ ] Review error messages carefully
872 | """
```
--------------------------------------------------------------------------------
/src/mcp_log_analyzer/mcp_server/tools/process_test_tools.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Process monitoring and system resource testing MCP tools.
3 | """
4 |
5 | from typing import Any, Dict, List
6 |
7 | import psutil
8 | from mcp.server import FastMCP
9 | from pydantic import BaseModel, Field
10 |
11 |
12 | class ProcessAnalysisRequest(BaseModel):
13 | """Request model for process analysis."""
14 |
15 | process_name: str = Field(None, description="Specific process name to analyze")
16 | min_cpu_percent: float = Field(0.0, description="Minimum CPU usage threshold")
17 | min_memory_percent: float = Field(0.0, description="Minimum memory usage threshold")
18 | max_results: int = Field(20, description="Maximum number of processes to return")
19 | sort_by: str = Field("cpu", description="Sort by 'cpu', 'memory', or 'pid'")
20 |
21 |
22 | class SystemResourceRequest(BaseModel):
23 | """Request model for system resource monitoring."""
24 |
25 | include_network: bool = Field(True, description="Include network statistics")
26 | include_disk: bool = Field(True, description="Include disk I/O statistics")
27 | sample_interval: float = Field(1.0, description="Sampling interval in seconds")
28 |
29 |
30 | class ProcessMonitoringRequest(BaseModel):
31 | """Request model for process monitoring over time."""
32 |
33 | process_name: str = Field(..., description="Process name to monitor")
34 | duration_seconds: int = Field(60, description="Monitoring duration in seconds")
35 | sample_interval: float = Field(5.0, description="Sampling interval in seconds")
36 |
37 |
38 | def register_process_test_tools(mcp: FastMCP):
39 | """Register all process testing tools with the MCP server."""
40 |
41 | @mcp.tool()
42 | async def test_system_resources_access() -> Dict[str, Any]:
43 | """
44 | Test system resource monitoring capabilities.
45 |
46 | This tool checks if the system can access various system
47 | resource information and provides diagnostic data.
48 | """
49 | try:
50 | test_results = {}
51 |
52 | # Test basic system info access
53 | try:
54 | test_results["cpu"] = {
55 | "accessible": True,
56 | "cpu_count": psutil.cpu_count(),
57 | "cpu_count_logical": psutil.cpu_count(logical=True),
58 | "current_usage": psutil.cpu_percent(interval=0.1),
59 | }
60 | except Exception as e:
61 | test_results["cpu"] = {"accessible": False, "error": str(e)}
62 |
63 | # Test memory access
64 | try:
65 | memory = psutil.virtual_memory()
66 | test_results["memory"] = {
67 | "accessible": True,
68 | "total_gb": round(memory.total / (1024**3), 2),
69 | "available_gb": round(memory.available / (1024**3), 2),
70 | "percent_used": memory.percent,
71 | }
72 | except Exception as e:
73 | test_results["memory"] = {"accessible": False, "error": str(e)}
74 |
75 | # Test disk access
76 | try:
77 | disk = psutil.disk_usage("/")
78 | test_results["disk"] = {
79 | "accessible": True,
80 | "total_gb": round(disk.total / (1024**3), 2),
81 | "used_gb": round(disk.used / (1024**3), 2),
82 | "free_gb": round(disk.free / (1024**3), 2),
83 | "percent_used": round((disk.used / disk.total) * 100, 1),
84 | }
85 | except Exception as e:
86 | test_results["disk"] = {"accessible": False, "error": str(e)}
87 |
88 | # Test network access
89 | try:
90 | network = psutil.net_io_counters()
91 | test_results["network"] = {
92 | "accessible": True,
93 | "bytes_sent": network.bytes_sent,
94 | "bytes_recv": network.bytes_recv,
95 | "packets_sent": network.packets_sent,
96 | "packets_recv": network.packets_recv,
97 | }
98 | except Exception as e:
99 | test_results["network"] = {"accessible": False, "error": str(e)}
100 |
101 | # Test process enumeration
102 | try:
103 | processes = list(psutil.process_iter(["pid", "name"]))
104 | test_results["processes"] = {
105 | "accessible": True,
106 | "total_count": len(processes),
107 | "sample_processes": [p.info for p in processes[:5]],
108 | }
109 | except Exception as e:
110 | test_results["processes"] = {"accessible": False, "error": str(e)}
111 |
112 | return {
113 | "status": "completed",
114 | "psutil_version": psutil.__version__,
115 | "test_results": test_results,
116 | }
117 |
118 | except Exception as e:
119 | return {"error": f"Error testing system resources: {str(e)}"}
120 |
121 | @mcp.tool()
122 | async def analyze_system_performance(
123 | request: SystemResourceRequest,
124 | ) -> Dict[str, Any]:
125 | """
126 | Analyze current system performance and resource usage.
127 |
128 | This tool provides a comprehensive analysis of system performance
129 | including CPU, memory, disk, and network usage patterns.
130 | """
131 | try:
132 | performance_data = {}
133 |
134 | # CPU Analysis
135 | cpu_percent = psutil.cpu_percent(interval=request.sample_interval)
136 | cpu_freq = psutil.cpu_freq()
137 | performance_data["cpu"] = {
138 | "usage_percent": cpu_percent,
139 | "core_count": psutil.cpu_count(),
140 | "logical_core_count": psutil.cpu_count(logical=True),
141 | "frequency": {
142 | "current": cpu_freq.current if cpu_freq else None,
143 | "min": cpu_freq.min if cpu_freq else None,
144 | "max": cpu_freq.max if cpu_freq else None,
145 | },
146 | "load_average": (
147 | psutil.getloadavg() if hasattr(psutil, "getloadavg") else None
148 | ),
149 | }
150 |
151 | # Memory Analysis
152 | memory = psutil.virtual_memory()
153 | swap = psutil.swap_memory()
154 | performance_data["memory"] = {
155 | "virtual": {
156 | "total_gb": round(memory.total / (1024**3), 2),
157 | "available_gb": round(memory.available / (1024**3), 2),
158 | "used_gb": round(memory.used / (1024**3), 2),
159 | "percent_used": memory.percent,
160 | },
161 | "swap": {
162 | "total_gb": round(swap.total / (1024**3), 2),
163 | "used_gb": round(swap.used / (1024**3), 2),
164 | "percent_used": swap.percent,
165 | },
166 | }
167 |
168 | # Disk Analysis
169 | if request.include_disk:
170 | disk_usage = psutil.disk_usage("/")
171 | disk_io = psutil.disk_io_counters()
172 | performance_data["disk"] = {
173 | "usage": {
174 | "total_gb": round(disk_usage.total / (1024**3), 2),
175 | "used_gb": round(disk_usage.used / (1024**3), 2),
176 | "free_gb": round(disk_usage.free / (1024**3), 2),
177 | "percent_used": round(
178 | (disk_usage.used / disk_usage.total) * 100, 1
179 | ),
180 | },
181 | "io_counters": (
182 | {
183 | "read_bytes": disk_io.read_bytes if disk_io else None,
184 | "write_bytes": disk_io.write_bytes if disk_io else None,
185 | "read_count": disk_io.read_count if disk_io else None,
186 | "write_count": disk_io.write_count if disk_io else None,
187 | }
188 | if disk_io
189 | else None
190 | ),
191 | }
192 |
193 | # Network Analysis
194 | if request.include_network:
195 | net_io = psutil.net_io_counters()
196 | net_connections = len(psutil.net_connections())
197 | performance_data["network"] = {
198 | "io_counters": (
199 | {
200 | "bytes_sent": net_io.bytes_sent if net_io else None,
201 | "bytes_recv": net_io.bytes_recv if net_io else None,
202 | "packets_sent": net_io.packets_sent if net_io else None,
203 | "packets_recv": net_io.packets_recv if net_io else None,
204 | }
205 | if net_io
206 | else None
207 | ),
208 | "active_connections": net_connections,
209 | }
210 |
211 | # Performance Assessment
212 | performance_status = "good"
213 | issues = []
214 |
215 | if cpu_percent > 80:
216 | performance_status = "concerning"
217 | issues.append(f"High CPU usage: {cpu_percent}%")
218 | elif cpu_percent > 60:
219 | performance_status = "fair"
220 | issues.append(f"Moderate CPU usage: {cpu_percent}%")
221 |
222 | if memory.percent > 90:
223 | performance_status = "concerning"
224 | issues.append(f"High memory usage: {memory.percent}%")
225 | elif memory.percent > 75:
226 | if performance_status == "good":
227 | performance_status = "fair"
228 | issues.append(f"Moderate memory usage: {memory.percent}%")
229 |
230 | return {
231 | "performance_status": performance_status,
232 | "issues": issues,
233 | "performance_data": performance_data,
234 | "sampling_interval": request.sample_interval,
235 | }
236 |
237 | except Exception as e:
238 | return {"error": f"Error analyzing system performance: {str(e)}"}
239 |
240 | @mcp.tool()
241 | async def find_resource_intensive_processes(
242 | request: ProcessAnalysisRequest,
243 | ) -> Dict[str, Any]:
244 | """
245 | Find processes that are consuming significant system resources.
246 |
247 | This tool identifies processes with high CPU or memory usage
248 | and provides detailed information for troubleshooting.
249 | """
250 | try:
251 | processes = []
252 |
253 | # Collect process information
254 | for proc in psutil.process_iter(
255 | [
256 | "pid",
257 | "name",
258 | "cpu_percent",
259 | "memory_percent",
260 | "memory_info",
261 | "create_time",
262 | "status",
263 | "cmdline",
264 | ]
265 | ):
266 | try:
267 | proc_info = proc.info
268 |
269 | # Get CPU percentage with brief interval
270 | if proc_info["cpu_percent"] is None:
271 | proc_info["cpu_percent"] = proc.cpu_percent(interval=0.1)
272 |
273 | # Apply filters
274 | if (
275 | request.process_name
276 | and request.process_name.lower()
277 | not in proc_info["name"].lower()
278 | ):
279 | continue
280 |
281 | if proc_info["cpu_percent"] < request.min_cpu_percent:
282 | continue
283 |
284 | if proc_info["memory_percent"] < request.min_memory_percent:
285 | continue
286 |
287 | # Add additional details
288 | proc_info["memory_mb"] = (
289 | round(proc_info["memory_info"].rss / (1024 * 1024), 1)
290 | if proc_info["memory_info"]
291 | else 0
292 | )
293 | proc_info["command_line"] = (
294 | " ".join(proc_info["cmdline"][:3])
295 | if proc_info["cmdline"]
296 | else ""
297 | )
298 |
299 | processes.append(proc_info)
300 |
301 | except (psutil.NoSuchProcess, psutil.AccessDenied):
302 | continue
303 |
304 | # Sort processes
305 | if request.sort_by == "cpu":
306 | processes.sort(key=lambda x: x.get("cpu_percent", 0), reverse=True)
307 | elif request.sort_by == "memory":
308 | processes.sort(key=lambda x: x.get("memory_percent", 0), reverse=True)
309 | elif request.sort_by == "pid":
310 | processes.sort(key=lambda x: x.get("pid", 0))
311 |
312 | # Limit results
313 | limited_processes = processes[: request.max_results]
314 |
315 | # Calculate summary statistics
316 | if processes:
317 | total_cpu = sum(p.get("cpu_percent", 0) for p in processes)
318 | total_memory = sum(p.get("memory_percent", 0) for p in processes)
319 | avg_cpu = total_cpu / len(processes)
320 | avg_memory = total_memory / len(processes)
321 | else:
322 | total_cpu = avg_cpu = total_memory = avg_memory = 0
323 |
324 | return {
325 | "search_criteria": {
326 | "process_name": request.process_name,
327 | "min_cpu_percent": request.min_cpu_percent,
328 | "min_memory_percent": request.min_memory_percent,
329 | "sort_by": request.sort_by,
330 | },
331 | "processes": limited_processes,
332 | "summary": {
333 | "total_matching": len(processes),
334 | "returned_count": len(limited_processes),
335 | "total_cpu_usage": round(total_cpu, 1),
336 | "total_memory_usage": round(total_memory, 1),
337 | "average_cpu_usage": round(avg_cpu, 1),
338 | "average_memory_usage": round(avg_memory, 1),
339 | },
340 | }
341 |
342 | except Exception as e:
343 | return {"error": f"Error finding resource intensive processes: {str(e)}"}
344 |
345 | @mcp.tool()
346 | async def monitor_process_health(process_name: str) -> Dict[str, Any]:
347 | """
348 | Monitor the health and status of a specific process.
349 |
350 | This tool provides detailed information about a specific process
351 | including resource usage, status, and potential issues.
352 | """
353 | try:
354 | matching_processes = []
355 |
356 | # Find all processes matching the name
357 | for proc in psutil.process_iter(
358 | [
359 | "pid",
360 | "name",
361 | "cpu_percent",
362 | "memory_percent",
363 | "memory_info",
364 | "create_time",
365 | "status",
366 | "cmdline",
367 | "num_threads",
368 | "connections",
369 | ]
370 | ):
371 | try:
372 | if process_name.lower() in proc.info["name"].lower():
373 | proc_info = proc.info.copy()
374 |
375 | # Get current CPU usage
376 | proc_info["current_cpu"] = proc.cpu_percent(interval=0.1)
377 |
378 | # Add memory in MB
379 | proc_info["memory_mb"] = (
380 | round(proc_info["memory_info"].rss / (1024 * 1024), 1)
381 | if proc_info["memory_info"]
382 | else 0
383 | )
384 |
385 | # Get process age
386 | from datetime import datetime
387 |
388 | create_time = datetime.fromtimestamp(proc_info["create_time"])
389 | proc_info["age"] = str(datetime.now() - create_time).split(".")[
390 | 0
391 | ]
392 |
393 | # Count network connections
394 | try:
395 | connections = proc.connections()
396 | proc_info["network_connections"] = len(connections)
397 | except (psutil.AccessDenied, psutil.NoSuchProcess):
398 | proc_info["network_connections"] = "Access denied"
399 |
400 | matching_processes.append(proc_info)
401 |
402 | except (psutil.NoSuchProcess, psutil.AccessDenied):
403 | continue
404 |
405 | if not matching_processes:
406 | return {
407 | "process_name": process_name,
408 | "found": False,
409 | "message": f"No processes found matching '{process_name}'",
410 | }
411 |
412 | # Health assessment
413 | health_issues = []
414 | total_cpu = sum(p.get("current_cpu", 0) for p in matching_processes)
415 | total_memory = sum(p.get("memory_percent", 0) for p in matching_processes)
416 |
417 | if total_cpu > 50:
418 | health_issues.append(f"High CPU usage: {total_cpu:.1f}%")
419 | if total_memory > 20:
420 | health_issues.append(f"High memory usage: {total_memory:.1f}%")
421 |
422 | # Check for multiple instances
423 | if len(matching_processes) > 1:
424 | health_issues.append(
425 | f"Multiple instances running: {len(matching_processes)}"
426 | )
427 |
428 | health_status = "healthy" if not health_issues else "issues_detected"
429 |
430 | return {
431 | "process_name": process_name,
432 | "found": True,
433 | "health_status": health_status,
434 | "health_issues": health_issues,
435 | "process_count": len(matching_processes),
436 | "processes": matching_processes,
437 | "summary": {
438 | "total_cpu_usage": round(total_cpu, 1),
439 | "total_memory_usage": round(total_memory, 1),
440 | "total_memory_mb": sum(
441 | p.get("memory_mb", 0) for p in matching_processes
442 | ),
443 | },
444 | }
445 |
446 | except Exception as e:
447 | return {"error": f"Error monitoring process health: {str(e)}"}
448 |
449 | @mcp.tool()
450 | async def get_system_health_summary() -> Dict[str, Any]:
451 | """
452 | Get overall system health summary.
453 |
454 | This tool provides a comprehensive overview of system health
455 | including resource usage, top processes, and potential issues.
456 | """
457 | try:
458 | from datetime import datetime
459 |
460 | # System resource summary
461 | cpu_percent = psutil.cpu_percent(interval=1.0)
462 | memory = psutil.virtual_memory()
463 | disk = psutil.disk_usage("/")
464 |
465 | # Get top processes by CPU and memory
466 | processes = []
467 | for proc in psutil.process_iter(
468 | ["pid", "name", "cpu_percent", "memory_percent"]
469 | ):
470 | try:
471 | proc_info = proc.info
472 | if proc_info["cpu_percent"] is None:
473 | proc_info["cpu_percent"] = proc.cpu_percent(interval=0.1)
474 | processes.append(proc_info)
475 | except (psutil.NoSuchProcess, psutil.AccessDenied):
476 | continue
477 |
478 | # Top CPU consumers
479 | top_cpu = sorted(
480 | processes, key=lambda x: x.get("cpu_percent", 0), reverse=True
481 | )[:5]
482 |
483 | # Top memory consumers
484 | top_memory = sorted(
485 | processes, key=lambda x: x.get("memory_percent", 0), reverse=True
486 | )[:5]
487 |
488 | # Health assessment
489 | health_score = 100
490 | issues = []
491 |
492 | if cpu_percent > 80:
493 | health_score -= 30
494 | issues.append(f"High CPU usage: {cpu_percent}%")
495 | elif cpu_percent > 60:
496 | health_score -= 15
497 | issues.append(f"Moderate CPU usage: {cpu_percent}%")
498 |
499 | if memory.percent > 90:
500 | health_score -= 25
501 | issues.append(f"High memory usage: {memory.percent}%")
502 | elif memory.percent > 75:
503 | health_score -= 10
504 | issues.append(f"Moderate memory usage: {memory.percent}%")
505 |
506 | disk_percent = (disk.used / disk.total) * 100
507 | if disk_percent > 90:
508 | health_score -= 20
509 | issues.append(f"High disk usage: {disk_percent:.1f}%")
510 | elif disk_percent > 80:
511 | health_score -= 10
512 | issues.append(f"Moderate disk usage: {disk_percent:.1f}%")
513 |
514 | # Determine overall health status
515 | if health_score >= 80:
516 | health_status = "excellent"
517 | elif health_score >= 60:
518 | health_status = "good"
519 | elif health_score >= 40:
520 | health_status = "fair"
521 | else:
522 | health_status = "poor"
523 |
524 | return {
525 | "health_status": health_status,
526 | "health_score": max(0, health_score),
527 | "issues": issues,
528 | "system_resources": {
529 | "cpu_usage_percent": cpu_percent,
530 | "memory_usage_percent": memory.percent,
531 | "disk_usage_percent": round(disk_percent, 1),
532 | "process_count": len(processes),
533 | },
534 | "top_processes": {
535 | "cpu_consumers": top_cpu,
536 | "memory_consumers": top_memory,
537 | },
538 | "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
539 | }
540 |
541 | except Exception as e:
542 | return {"error": f"Error getting system health summary: {str(e)}"}
543 |
```
--------------------------------------------------------------------------------
/src/mcp_log_analyzer/parsers/etl_cached_parser.py:
--------------------------------------------------------------------------------
```python
1 | """ETL parser with CSV caching to avoid repeated conversions."""
2 |
3 | import csv
4 | import hashlib
5 | import json
6 | import logging
7 | import os
8 | import platform
9 | import subprocess
10 | import tempfile
11 | from datetime import datetime
12 | from pathlib import Path
13 | from typing import Any, Dict, Iterator, List, Optional, Union
14 |
15 | from ..core.models import LogRecord, LogSource, LogType
16 | from .base import BaseParser
17 |
18 | logger = logging.getLogger(__name__)
19 |
20 |
21 | class EtlCachedParser(BaseParser):
22 | """ETL parser that caches CSV conversions for performance."""
23 |
24 | # Class-level cache directory
25 | _cache_dir: Optional[str] = None
26 | _cache_registry: Dict[str, Dict[str, Any]] = {} # Maps ETL file paths to cached CSV paths
27 | _conversion_locks: Dict[str, Any] = {} # Prevents concurrent conversions of same file
28 |
29 | def __init__(self, config: Optional[Dict[str, Any]] = None):
30 | """Initialize ETL cached parser.
31 |
32 | Args:
33 | config: Parser configuration.
34 | """
35 | super().__init__(config)
36 | self.tracerpt_path = self._find_tracerpt()
37 | self._init_cache_dir()
38 |
39 | @classmethod
40 | def _init_cache_dir(cls) -> None:
41 | """Initialize the cache directory if not already done."""
42 | if cls._cache_dir is None:
43 | # Create cache directory in temp
44 | cls._cache_dir = os.path.join(tempfile.gettempdir(), "mcp_etl_cache")
45 | os.makedirs(cls._cache_dir, exist_ok=True)
46 |
47 | # Load cache registry if it exists
48 | registry_file = os.path.join(cls._cache_dir, "cache_registry.json")
49 | if os.path.exists(registry_file):
50 | try:
51 | with open(registry_file, "r") as f:
52 | cls._cache_registry = json.load(f)
53 | # Clean up stale entries
54 | cls._cleanup_stale_cache()
55 | except Exception:
56 | cls._cache_registry = {}
57 |
58 | @classmethod
59 | def _save_cache_registry(cls) -> None:
60 | """Save the cache registry to disk."""
61 | if cls._cache_dir is None:
62 | return
63 | registry_file = os.path.join(cls._cache_dir, "cache_registry.json")
64 | try:
65 | with open(registry_file, "w") as f:
66 | json.dump(cls._cache_registry, f, indent=2)
67 | except Exception as e:
68 | logger.error(f"Failed to save cache registry: {e}")
69 |
70 | @classmethod
71 | def _cleanup_stale_cache(cls) -> None:
72 | """Remove cache entries for files that no longer exist."""
73 | stale_entries = []
74 | for normalized_path, cache_info in cls._cache_registry.items():
75 | # Check if the CSV file still exists
76 | csv_exists = os.path.exists(cache_info.get("csv_path", ""))
77 |
78 | # For ETL file, try to check if it exists (normalized path might not be exact)
79 | # Just check if CSV is missing, since ETL path might have changed
80 | if not csv_exists:
81 | stale_entries.append(normalized_path)
82 |
83 | for entry in stale_entries:
84 | del cls._cache_registry[entry]
85 |
86 | if stale_entries:
87 | cls._save_cache_registry()
88 |
89 | def _find_tracerpt(self) -> Optional[str]:
90 | """Find tracerpt.exe on the system."""
91 | if platform.system() != "Windows":
92 | return None
93 |
94 | # Common locations for tracerpt.exe
95 | possible_paths = [
96 | r"C:\Windows\System32\tracerpt.exe",
97 | r"C:\Windows\SysWOW64\tracerpt.exe",
98 | ]
99 |
100 | for path in possible_paths:
101 | if os.path.exists(path):
102 | return path
103 |
104 | # Try to find it in PATH
105 | try:
106 | result = subprocess.run(
107 | ["where", "tracerpt.exe"], capture_output=True, text=True, check=False
108 | )
109 | if result.returncode == 0 and result.stdout.strip():
110 | return result.stdout.strip().split("\n")[0]
111 | except Exception:
112 | pass
113 |
114 | return None
115 |
116 | def is_available(self) -> bool:
117 | """Check if ETL parsing is available."""
118 | return self.tracerpt_path is not None
119 |
120 | def _get_cache_key(self, file_path: str) -> str:
121 | """Generate a cache key for an ETL file based on path and size."""
122 | path = Path(file_path)
123 | # Normalize the path to ensure consistency
124 | normalized_path = str(path.resolve()).lower()
125 | stat = path.stat()
126 | # Include normalized file path and size in key (not mtime to preserve cache)
127 | key_data = f"{normalized_path}|{stat.st_size}"
128 | return hashlib.md5(key_data.encode()).hexdigest()
129 |
130 | def _get_cached_csv(self, file_path: str) -> Optional[str]:
131 | """Get cached CSV path if it exists and is valid."""
132 | # Normalize the path to match how we store in registry
133 | normalized_path = str(Path(file_path).resolve()).lower()
134 |
135 | if normalized_path not in self._cache_registry:
136 | return None
137 |
138 | cache_info = self._cache_registry[normalized_path]
139 | cache_key = self._get_cache_key(file_path)
140 |
141 | # Check if cache is still valid
142 | if cache_info.get("cache_key") != cache_key:
143 | # File has changed, invalidate cache
144 | logger.info(f"ETL file has changed, invalidating cache for {file_path}")
145 | self._remove_cache_entry(file_path)
146 | return None
147 |
148 | csv_path = cache_info.get("csv_path")
149 | if csv_path and os.path.exists(csv_path):
150 | logger.info(f"Using cached CSV for {file_path}: {csv_path}")
151 | return str(csv_path)
152 |
153 | # CSV file missing, remove entry
154 | self._remove_cache_entry(file_path)
155 | return None
156 |
157 | def _remove_cache_entry(self, file_path: str) -> None:
158 | """Remove a cache entry and its CSV file."""
159 | # Normalize the path to match how we store in registry
160 | normalized_path = str(Path(file_path).resolve()).lower()
161 |
162 | if normalized_path in self._cache_registry:
163 | cache_info = self._cache_registry[normalized_path]
164 | csv_path = cache_info.get("csv_path")
165 | if csv_path and os.path.exists(csv_path):
166 | try:
167 | os.remove(csv_path)
168 | logger.info(f"Removed cached CSV: {csv_path}")
169 | except Exception as e:
170 | logger.error(f"Failed to remove cached CSV: {e}")
171 | del self._cache_registry[normalized_path]
172 | self._save_cache_registry()
173 |
174 | def _convert_etl_to_csv_sync(self, etl_path: str) -> str:
175 | """Convert ETL to CSV using tracerpt, with locking to prevent concurrent conversions."""
176 | import threading
177 |
178 | # Use threading lock to prevent concurrent conversions of same file
179 | if etl_path not in self._conversion_locks:
180 | self._conversion_locks[etl_path] = threading.Lock()
181 |
182 | with self._conversion_locks[etl_path]:
183 | # Check again if CSV was created while waiting for lock
184 | cached_csv = self._get_cached_csv(etl_path)
185 | if cached_csv:
186 | return cached_csv
187 |
188 | # Generate output filename
189 | cache_key = self._get_cache_key(etl_path)
190 | csv_filename = f"etl_{cache_key}.csv"
191 | csv_path = os.path.join(self._cache_dir or tempfile.gettempdir(), csv_filename)
192 |
193 | # Check if the CSV file already exists in cache directory (missed by registry)
194 | if os.path.exists(csv_path):
195 | logger.info(f"Found existing CSV file (missed by registry): {csv_path}")
196 | # Update cache registry with normalized path
197 | normalized_path = str(Path(etl_path).resolve()).lower()
198 | file_size_mb = Path(etl_path).stat().st_size / (1024 * 1024)
199 | self._cache_registry[normalized_path] = {
200 | "csv_path": csv_path,
201 | "cache_key": cache_key,
202 | "converted_at": datetime.now().isoformat(),
203 | "file_size_mb": file_size_mb,
204 | "conversion_duration_s": 0, # Unknown
205 | }
206 | self._save_cache_registry()
207 | return csv_path
208 |
209 | logger.info(f"Converting ETL to CSV: {etl_path} -> {csv_path}")
210 |
211 | # Get file size for logging
212 | file_size_mb = Path(etl_path).stat().st_size / (1024 * 1024)
213 | logger.info(f"ETL file size: {file_size_mb:.1f} MB")
214 |
215 | # Run tracerpt
216 | if self.tracerpt_path is None:
217 | raise RuntimeError("tracerpt.exe not found")
218 | cmd = [
219 | self.tracerpt_path,
220 | etl_path,
221 | "-o",
222 | csv_path,
223 | "-of",
224 | "CSV",
225 | "-y", # Overwrite without prompting
226 | "-lr", # Less restrictive; attempt to process badly-formed events
227 | ]
228 |
229 | start_time = datetime.now()
230 | logger.info(f"Starting tracerpt conversion at {start_time}")
231 | logger.info(f"Converting ETL file: {etl_path}")
232 | logger.info(f"Output CSV: {csv_path}")
233 |
234 | try:
235 | # Start tracerpt process
236 | import threading
237 | import time
238 |
239 | process = subprocess.Popen(
240 | cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
241 | )
242 |
243 | # Monitor thread for progress updates
244 | def monitor_conversion() -> None:
245 | elapsed = 0
246 | while process.poll() is None: # While process is running
247 | time.sleep(30) # Check every 30 seconds
248 | elapsed += 30
249 | if os.path.exists(csv_path):
250 | try:
251 | csv_size_mb = os.path.getsize(csv_path) / (1024 * 1024)
252 | logger.info(
253 | f"ETL conversion in progress... {elapsed}s elapsed, CSV size: {csv_size_mb:.1f} MB"
254 | )
255 | except Exception:
256 | logger.info(
257 | f"ETL conversion in progress... {elapsed}s elapsed"
258 | )
259 | else:
260 | logger.info(
261 | f"ETL conversion in progress... {elapsed}s elapsed, waiting for CSV creation..."
262 | )
263 |
264 | # Start monitoring in background thread
265 | monitor_thread = threading.Thread(
266 | target=monitor_conversion, daemon=True
267 | )
268 | monitor_thread.start()
269 |
270 | try:
271 | # Wait for process to complete with timeout
272 | stdout, stderr = process.communicate(
273 | timeout=600
274 | ) # 10 minute timeout
275 |
276 | if process.returncode != 0:
277 | raise RuntimeError(
278 | f"tracerpt failed with code {process.returncode}: {stderr}"
279 | )
280 |
281 | except subprocess.TimeoutExpired:
282 | # Kill the process if it times out
283 | process.terminate()
284 | try:
285 | process.wait(timeout=5)
286 | except subprocess.TimeoutExpired:
287 | process.kill()
288 | raise RuntimeError("tracerpt conversion timed out after 10 minutes")
289 |
290 | end_time = datetime.now()
291 | duration = (end_time - start_time).total_seconds()
292 | logger.info(f"Tracerpt completed in {duration:.1f} seconds")
293 |
294 | if process.returncode != 0:
295 | raise RuntimeError(
296 | f"tracerpt failed with code {process.returncode}: {stderr}"
297 | )
298 |
299 | # Verify CSV was created
300 | if not os.path.exists(csv_path):
301 | raise RuntimeError("tracerpt completed but produced no output file")
302 |
303 | # Update cache registry with normalized path
304 | normalized_path = str(Path(etl_path).resolve()).lower()
305 | self._cache_registry[normalized_path] = {
306 | "csv_path": csv_path,
307 | "cache_key": cache_key,
308 | "converted_at": datetime.now().isoformat(),
309 | "file_size_mb": file_size_mb,
310 | "conversion_duration_s": duration,
311 | }
312 | self._save_cache_registry()
313 |
314 | logger.info(f"Successfully cached ETL conversion: {csv_path}")
315 | return csv_path
316 |
317 | except subprocess.TimeoutExpired:
318 | raise RuntimeError("tracerpt conversion timed out after 10 minutes")
319 | except Exception as e:
320 | # Clean up partial file if it exists
321 | if os.path.exists(csv_path):
322 | try:
323 | os.remove(csv_path)
324 | except Exception:
325 | pass
326 | raise
327 |
328 | def parse_file(
329 | self, source: LogSource, file_path: Union[str, Path]
330 | ) -> Iterator[LogRecord]:
331 | """Parse ETL log records from a file using cached CSV.
332 |
333 | Args:
334 | source: The log source information.
335 | file_path: Path to the ETL file.
336 |
337 | Yields:
338 | LogRecord objects parsed from the ETL file.
339 | """
340 | if not self.is_available():
341 | raise RuntimeError(
342 | "Windows ETL parsing is not available. tracerpt.exe not found."
343 | )
344 |
345 | path = str(Path(file_path))
346 | if not os.path.exists(path):
347 | raise FileNotFoundError(f"ETL file not found: {file_path}")
348 |
349 | # Convert to CSV (cached)
350 | csv_path = self._convert_etl_to_csv_sync(path)
351 |
352 | # Parse CSV file
353 | yield from self._parse_csv_file(source, csv_path)
354 |
355 | def _parse_csv_file(
356 | self, source: LogSource, csv_path: str, limit: int = 10000, offset: int = 0
357 | ) -> Iterator[LogRecord]:
358 | """Parse records from the cached CSV file.
359 |
360 | Args:
361 | source: The log source information.
362 | csv_path: Path to the CSV file.
363 | limit: Maximum number of records to yield.
364 | offset: Number of records to skip.
365 |
366 | Yields:
367 | LogRecord objects.
368 | """
369 | records_yielded = 0
370 | records_skipped = 0
371 |
372 | with open(csv_path, "r", encoding="utf-8", errors="ignore") as f:
373 | reader = csv.DictReader(f)
374 |
375 | for row_num, row in enumerate(reader):
376 | # Handle offset
377 | if records_skipped < offset:
378 | records_skipped += 1
379 | continue
380 |
381 | # Convert and yield record
382 | log_record = self._convert_csv_row(source, row)
383 | if log_record:
384 | yield log_record
385 | records_yielded += 1
386 |
387 | # Check limit
388 | if records_yielded >= limit:
389 | break
390 |
391 | def _convert_csv_row(
392 | self, source: LogSource, row: Dict[str, str]
393 | ) -> Optional[LogRecord]:
394 | """Convert a CSV row from tracerpt to a LogRecord.
395 |
396 | Args:
397 | source: The log source information.
398 | row: CSV row dictionary.
399 |
400 | Returns:
401 | LogRecord or None if conversion fails.
402 | """
403 | try:
404 | # Clean up field names (remove alignment underscores)
405 | clean_data = {}
406 |
407 | for key, value in row.items():
408 | if key and value:
409 | # Remove leading/trailing underscores and spaces
410 | clean_key = key.strip().strip("_").lower().replace(" ", "_")
411 | clean_value = value.strip()
412 | if clean_key and clean_value:
413 | clean_data[clean_key] = clean_value
414 |
415 | # Try to parse timestamp from clock_time
416 | timestamp = None
417 | if "clock_time" in clean_data:
418 | # Clock time is in Windows FILETIME format (100-nanosecond intervals since 1601)
419 | try:
420 | filetime = int(clean_data["clock_time"])
421 | # Convert to Unix timestamp
422 | unix_timestamp = (filetime - 116444736000000000) / 10000000.0
423 | timestamp = datetime.fromtimestamp(unix_timestamp)
424 | except Exception:
425 | pass
426 |
427 | return LogRecord(
428 | source_id=source.id,
429 | timestamp=timestamp,
430 | data=clean_data,
431 | )
432 |
433 | except Exception as e:
434 | if self.config.get("verbose", False):
435 | logger.error(f"Failed to convert CSV row: {e}")
436 | return None
437 |
438 | def parse(
439 | self,
440 | path: str,
441 | filters: Optional[Dict[str, Any]] = None,
442 | start_time: Optional[datetime] = None,
443 | end_time: Optional[datetime] = None,
444 | limit: int = 1000,
445 | offset: int = 0,
446 | ) -> List[LogRecord]:
447 | """Parse ETL file with filtering and pagination using cache.
448 |
449 | Args:
450 | path: Path to the ETL file.
451 | filters: Optional filters to apply.
452 | start_time: Optional start time filter.
453 | end_time: Optional end time filter.
454 | limit: Maximum number of records to return.
455 | offset: Number of records to skip.
456 |
457 | Returns:
458 | List of LogRecord objects.
459 | """
460 | # Create a temporary log source for parsing
461 | temp_source = LogSource(
462 | name="temp_etl", type=LogType.ETL, path=path, metadata={}
463 | )
464 |
465 | records: List[LogRecord] = []
466 |
467 | for record in self.parse_file(temp_source, path):
468 | # Apply time filters
469 | if start_time and record.timestamp and record.timestamp < start_time:
470 | continue
471 | if end_time and record.timestamp and record.timestamp > end_time:
472 | continue
473 |
474 | # Apply custom filters
475 | if filters:
476 | if not self._match_filters(record, filters):
477 | continue
478 |
479 | # We need to handle offset/limit at this level since parse_file
480 | # doesn't know about filters
481 | if len(records) < offset:
482 | continue
483 |
484 | records.append(record)
485 |
486 | if len(records) >= limit + offset:
487 | break
488 |
489 | # Apply offset by slicing
490 | if offset > 0 and len(records) > offset:
491 | return records[offset : offset + limit]
492 | else:
493 | return records[:limit]
494 |
495 | def _match_filters(self, record: LogRecord, filters: Dict[str, Any]) -> bool:
496 | """Check if a record matches the provided filters.
497 |
498 | Args:
499 | record: The log record to check.
500 | filters: Dictionary of filters to apply.
501 |
502 | Returns:
503 | True if record matches all filters.
504 | """
505 | for key, value in filters.items():
506 | record_value = record.data.get(key)
507 |
508 | if isinstance(value, list):
509 | if record_value not in value:
510 | return False
511 | else:
512 | if record_value != value:
513 | return False
514 |
515 | return True
516 |
517 | def parse_content(self, source: LogSource, content: str) -> Iterator[LogRecord]:
518 | """Parse ETL log records from content string.
519 |
520 | Note: ETL files are binary and cannot be parsed from string content.
521 |
522 | Args:
523 | source: The log source information.
524 | content: String content (not supported for ETL).
525 |
526 | Raises:
527 | NotImplementedError: ETL files must be parsed from file.
528 | """
529 | raise NotImplementedError(
530 | "ETL files are binary and must be parsed from file, not string content"
531 | )
532 |
533 | def validate_file(self, file_path: Union[str, Path]) -> bool:
534 | """Validate if the file can be parsed by this parser.
535 |
536 | Args:
537 | file_path: Path to validate.
538 |
539 | Returns:
540 | True if file appears to be an ETL file.
541 | """
542 | path = Path(file_path)
543 |
544 | # Check file extension
545 | if not str(path).lower().endswith(".etl"):
546 | return False
547 |
548 | # Check if file exists and is readable
549 | if not path.exists() or not path.is_file():
550 | return False
551 |
552 | # Check if we have tracerpt available
553 | if not self.is_available():
554 | return False
555 |
556 | return True
557 |
558 | @classmethod
559 | def cleanup_cache_for_source(cls, source_path: str) -> None:
560 | """Clean up cached CSV for a specific ETL source.
561 |
562 | Args:
563 | source_path: Path to the ETL file whose cache should be removed.
564 | """
565 | logger.info(f"Cleaning up cache for ETL source: {source_path}")
566 |
567 | # Ensure cache is initialized
568 | cls._init_cache_dir()
569 |
570 | # Remove cache entry (normalize path first)
571 | normalized_path = str(Path(source_path).resolve()).lower()
572 | if normalized_path in cls._cache_registry:
573 | cache_info = cls._cache_registry[normalized_path]
574 | csv_path = cache_info.get("csv_path")
575 |
576 | # Remove CSV file
577 | if csv_path and os.path.exists(csv_path):
578 | try:
579 | os.remove(csv_path)
580 | logger.info(f"Removed cached CSV file: {csv_path}")
581 | except Exception as e:
582 | logger.error(f"Failed to remove cached CSV: {e}")
583 |
584 | # Remove from registry
585 | del cls._cache_registry[normalized_path]
586 | cls._save_cache_registry()
587 | logger.info(f"Removed cache registry entry for: {source_path}")
588 |
589 | @classmethod
590 | def cleanup_all_cache(cls) -> None:
591 | """Clean up all cached CSV files."""
592 | logger.info("Cleaning up all ETL cache")
593 |
594 | # Ensure cache is initialized
595 | cls._init_cache_dir()
596 |
597 | # Remove all CSV files
598 | for etl_path, cache_info in list(cls._cache_registry.items()):
599 | cls.cleanup_cache_for_source(etl_path)
600 |
601 | # Clear registry
602 | cls._cache_registry = {}
603 | cls._save_cache_registry()
604 |
```