#
tokens: 9648/50000 1/114 files (page 6/6)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 6 of 6. Use http://codebase.md/threatflux/yaraflux?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .dockerignore
├── .env
├── .env.example
├── .github
│   ├── dependabot.yml
│   └── workflows
│       ├── ci.yml
│       ├── codeql.yml
│       ├── publish-release.yml
│       ├── safety_scan.yml
│       ├── update-actions.yml
│       └── version-bump.yml
├── .gitignore
├── .pylintrc
├── .safety-project.ini
├── bandit.yaml
├── codecov.yml
├── docker-compose.yml
├── docker-entrypoint.sh
├── Dockerfile
├── docs
│   ├── api_mcp_architecture.md
│   ├── api.md
│   ├── architecture_diagram.md
│   ├── cli.md
│   ├── examples.md
│   ├── file_management.md
│   ├── installation.md
│   ├── mcp.md
│   ├── README.md
│   └── yara_rules.md
├── entrypoint.sh
├── examples
│   ├── claude_desktop_config.json
│   └── install_via_smithery.sh
├── glama.json
├── images
│   ├── architecture.svg
│   ├── architecture.txt
│   ├── image copy.png
│   └── image.png
├── LICENSE
├── Makefile
├── mypy.ini
├── pyproject.toml
├── pytest.ini
├── README.md
├── requirements-dev.txt
├── requirements.txt
├── SECURITY.md
├── setup.py
├── src
│   └── yaraflux_mcp_server
│       ├── __init__.py
│       ├── __main__.py
│       ├── app.py
│       ├── auth.py
│       ├── claude_mcp_tools.py
│       ├── claude_mcp.py
│       ├── config.py
│       ├── mcp_server.py
│       ├── mcp_tools
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── file_tools.py
│       │   ├── rule_tools.py
│       │   ├── scan_tools.py
│       │   └── storage_tools.py
│       ├── models.py
│       ├── routers
│       │   ├── __init__.py
│       │   ├── auth.py
│       │   ├── files.py
│       │   ├── rules.py
│       │   └── scan.py
│       ├── run_mcp.py
│       ├── storage
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── factory.py
│       │   ├── local.py
│       │   └── minio.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── error_handling.py
│       │   ├── logging_config.py
│       │   ├── param_parsing.py
│       │   └── wrapper_generator.py
│       └── yara_service.py
├── test.txt
├── tests
│   ├── conftest.py
│   ├── functional
│   │   └── __init__.py
│   ├── integration
│   │   └── __init__.py
│   └── unit
│       ├── __init__.py
│       ├── test_app.py
│       ├── test_auth_fixtures
│       │   ├── test_token_auth.py
│       │   └── test_user_management.py
│       ├── test_auth.py
│       ├── test_claude_mcp_tools.py
│       ├── test_cli
│       │   ├── __init__.py
│       │   ├── test_main.py
│       │   └── test_run_mcp.py
│       ├── test_config.py
│       ├── test_mcp_server.py
│       ├── test_mcp_tools
│       │   ├── test_file_tools_extended.py
│       │   ├── test_file_tools.py
│       │   ├── test_init.py
│       │   ├── test_rule_tools_extended.py
│       │   ├── test_rule_tools.py
│       │   ├── test_scan_tools_extended.py
│       │   ├── test_scan_tools.py
│       │   ├── test_storage_tools_enhanced.py
│       │   └── test_storage_tools.py
│       ├── test_mcp_tools.py
│       ├── test_routers
│       │   ├── test_auth_router.py
│       │   ├── test_files.py
│       │   ├── test_rules.py
│       │   └── test_scan.py
│       ├── test_storage
│       │   ├── test_factory.py
│       │   ├── test_local_storage.py
│       │   └── test_minio_storage.py
│       ├── test_storage_base.py
│       ├── test_utils
│       │   ├── __init__.py
│       │   ├── test_error_handling.py
│       │   ├── test_logging_config.py
│       │   ├── test_param_parsing.py
│       │   └── test_wrapper_generator.py
│       ├── test_yara_rule_compilation.py
│       └── test_yara_service.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/src/yaraflux_mcp_server/yara_service.py:
--------------------------------------------------------------------------------

```python
  1 | """YARA integration service for YaraFlux MCP Server.
  2 | 
  3 | This module provides functionality for working with YARA rules, including:
  4 | - Rule compilation and validation
  5 | - Rule management (add, update, delete)
  6 | - File scanning with rules
  7 | - Integration with ThreatFlux YARA-Rules repository
  8 | """
  9 | 
 10 | import hashlib
 11 | import logging
 12 | import os
 13 | import time
 14 | from concurrent.futures import ThreadPoolExecutor
 15 | from datetime import UTC, datetime
 16 | from typing import Any, BinaryIO, Callable, Dict, List, Optional, Union
 17 | from urllib.parse import urlparse
 18 | 
 19 | import httpx
 20 | import yara
 21 | 
 22 | from yaraflux_mcp_server.config import settings
 23 | from yaraflux_mcp_server.models import YaraMatch, YaraRuleMetadata, YaraScanResult
 24 | from yaraflux_mcp_server.storage import StorageClient, StorageError, get_storage_client
 25 | 
 26 | # Configure logging
 27 | logger = logging.getLogger(__name__)
 28 | 
 29 | 
 30 | class YaraError(Exception):
 31 |     """Custom exception for YARA-related errors."""
 32 | 
 33 | 
 34 | class YaraService:
 35 |     """Service for YARA rule compilation, management, and scanning."""
 36 | 
 37 |     def __init__(self, storage_client: Optional[StorageClient] = None):
 38 |         """Initialize the YARA service.
 39 | 
 40 |         Args:
 41 |             storage_client: Optional storage client to use
 42 |         """
 43 |         self.storage = storage_client or get_storage_client()
 44 |         self._rules_cache: Dict[str, yara.Rules] = {}
 45 |         self._rule_include_callbacks: Dict[str, Callable[[str, str], bytes]] = {}
 46 | 
 47 |         # Initialize executor for scanning
 48 |         self._executor = ThreadPoolExecutor(max_workers=4)
 49 | 
 50 |         logger.info("YARA service initialized")
 51 | 
 52 |     def load_rules(self, include_default_rules: bool = True) -> None:
 53 |         """Load all YARA rules from storage.
 54 | 
 55 |         Args:
 56 |             include_default_rules: Whether to include default ThreatFlux rules
 57 |         """
 58 |         # Clear existing cache
 59 |         self._rules_cache.clear()
 60 | 
 61 |         # List all available rules
 62 |         rules_metadata = self.storage.list_rules()
 63 | 
 64 |         # Group rules by source
 65 |         rules_by_source: Dict[str, List[Dict[str, Any]]] = {}
 66 |         for rule in rules_metadata:
 67 |             source = rule.get("source", "custom")
 68 |             if source not in rules_by_source:
 69 |                 rules_by_source[source] = []
 70 |             rules_by_source[source].append(rule)
 71 | 
 72 |         # First, load all rules individually (this populates include callbacks)
 73 |         for rule in rules_metadata:
 74 |             try:
 75 |                 source = rule.get("source", "custom")
 76 |                 rule_name = rule.get("name")
 77 | 
 78 |                 # Skip loading community rules individually if they'll be loaded as a whole
 79 |                 if include_default_rules and source == "community":
 80 |                     continue
 81 | 
 82 |                 self._compile_rule(rule_name, source)
 83 |                 logger.debug(f"Loaded rule: {rule_name} from {source}")
 84 |             except Exception as e:
 85 |                 logger.warning(f"Failed to load rule {rule.get('name')}: {str(e)}")
 86 | 
 87 |         # Then, try to load community rules as a single ruleset if requested
 88 |         if include_default_rules and "community" in rules_by_source:
 89 |             try:
 90 |                 self._compile_community_rules()
 91 |                 logger.info("Loaded community rules as combined ruleset")
 92 |             except Exception as e:
 93 |                 logger.warning(f"Failed to load community rules as combined ruleset: {str(e)}")
 94 | 
 95 |         logger.info(f"Loaded {len(self._rules_cache)} rule sets")
 96 | 
 97 |     def _compile_rule(self, rule_name: str, source: str = "custom") -> yara.Rules:
 98 |         """Compile a single YARA rule from storage.
 99 | 
100 |         Args:
101 |             rule_name: Name of the rule
102 |             source: Source of the rule
103 | 
104 |         Returns:
105 |             Compiled YARA rules object
106 | 
107 |         Raises:
108 |             YaraError: If rule compilation fails
109 |         """
110 |         # Check for an existing compiled rule
111 |         cache_key = f"{source}:{rule_name}"
112 |         if cache_key in self._rules_cache:
113 |             return self._rules_cache[cache_key]
114 | 
115 |         try:
116 |             # Get the rule content from storage
117 |             rule_content = self.storage.get_rule(rule_name, source)
118 | 
119 |             # Register an include callback for this rule
120 |             self._register_include_callback(source, rule_name)
121 | 
122 |             # Compile the rule
123 |             compiled_rule = yara.compile(
124 |                 source=rule_content,
125 |                 includes=True,
126 |                 include_callback=self._get_include_callback(source),
127 |                 error_on_warning=True,
128 |             )
129 | 
130 |             # Cache the compiled rule
131 |             self._rules_cache[cache_key] = compiled_rule
132 | 
133 |             return compiled_rule
134 |         except yara.Error as e:
135 |             logger.error(f"YARA compilation error for rule {rule_name}: {str(e)}")
136 |             raise YaraError(f"Failed to compile rule {rule_name}: {str(e)}") from e
137 |         except StorageError as e:
138 |             logger.error(f"Storage error getting rule {rule_name}: {str(e)}")
139 |             raise YaraError(f"Failed to load rule {rule_name}: {str(e)}") from e
140 | 
141 |     def _compile_community_rules(self) -> yara.Rules:
142 |         """Compile all community YARA rules as a single ruleset.
143 | 
144 |         Returns:
145 |             Compiled YARA rules object
146 | 
147 |         Raises:
148 |             YaraError: If rule compilation fails
149 |         """
150 |         cache_key = "community:all"
151 |         if cache_key in self._rules_cache:
152 |             return self._rules_cache[cache_key]
153 | 
154 |         try:
155 |             # Get all community rules
156 |             rules_metadata = self.storage.list_rules("community")
157 | 
158 |             # Create a combined source with imports for all rules
159 |             combined_source = ""
160 |             for rule in rules_metadata:
161 |                 rule_name = rule.get("name")
162 |                 if not rule_name.endswith(".yar"):
163 |                     continue
164 |                 combined_source += f'include "{rule_name}"\n'
165 | 
166 |             # Skip if no rules found
167 |             if not combined_source:
168 |                 raise YaraError("No community rules found")
169 | 
170 |             # Register include callbacks for all community rules
171 |             for rule in rules_metadata:
172 |                 self._register_include_callback("community", rule.get("name"))
173 | 
174 |             # Compile the combined ruleset
175 |             compiled_rule = yara.compile(
176 |                 source=combined_source,
177 |                 includes=True,
178 |                 include_callback=self._get_include_callback("community"),
179 |                 error_on_warning=True,
180 |             )
181 | 
182 |             # Cache the compiled rule
183 |             self._rules_cache[cache_key] = compiled_rule
184 | 
185 |             return compiled_rule
186 |         except yara.Error as e:
187 |             logger.error(f"YARA compilation error for community rules: {str(e)}")
188 |             raise YaraError(f"Failed to compile community rules: {str(e)}") from e
189 |         except StorageError as e:
190 |             logger.error(f"Storage error getting community rules: {str(e)}")
191 |             raise YaraError(f"Failed to load community rules: {str(e)}") from e
192 | 
193 |     def _register_include_callback(self, source: str, rule_name: str) -> None:
194 |         """Register an include callback for a rule.
195 | 
196 |         Args:
197 |             source: Source of the rule
198 |             rule_name: Name of the rule
199 |         """
200 |         callback_key = f"{source}:{rule_name}"
201 | 
202 |         # Define the include callback for this rule
203 |         def include_callback(requested_filename: str, namespace: str) -> bytes:
204 |             """Include callback for YARA rules.
205 | 
206 |             Args:
207 |                 requested_filename: Filename requested by the include directive
208 |                 namespace: Namespace for the included content
209 | 
210 |             Returns:
211 |                 Content of the included file
212 | 
213 |             Raises:
214 |                 yara.Error: If include file cannot be found
215 |             """
216 |             logger.debug(f"Include requested: {requested_filename} in namespace {namespace}")
217 | 
218 |             try:
219 |                 # Try to load from the same source
220 |                 include_content = self.storage.get_rule(requested_filename, source)
221 |                 return include_content.encode("utf-8")
222 |             except StorageError:
223 |                 # If not found in the same source, try custom rules
224 |                 try:
225 |                     if source != "custom":
226 |                         include_content = self.storage.get_rule(requested_filename, "custom")
227 |                         return include_content.encode("utf-8")
228 |                 except StorageError:
229 |                     # If not found in custom rules either, try community rules
230 |                     try:
231 |                         if source != "community":
232 |                             include_content = self.storage.get_rule(requested_filename, "community")
233 |                             return include_content.encode("utf-8")
234 |                     except StorageError as e:
235 |                         # If not found anywhere, raise an error
236 |                         logger.warning(f"Include file not found: {requested_filename}")
237 |                         raise yara.Error(f"Include file not found: {requested_filename}") from e
238 | 
239 |             # If all attempts fail, raise an error
240 |             raise yara.Error(f"Include file not found: {requested_filename}")
241 | 
242 |         # Register the callback
243 |         self._rule_include_callbacks[callback_key] = include_callback
244 | 
245 |     def _get_include_callback(self, source: str) -> Callable[[str, str], bytes]:
246 |         """Get the include callback for a source.
247 | 
248 |         Args:
249 |             source: Source of the rules
250 | 
251 |         Returns:
252 |             Include callback function
253 |         """
254 | 
255 |         def combined_callback(requested_filename: str, namespace: str) -> bytes:
256 |             """Combined include callback that tries all registered callbacks.
257 | 
258 |             Args:
259 |                 requested_filename: Filename requested by the include directive
260 |                 namespace: Namespace for the included content
261 | 
262 |             Returns:
263 |                 Content of the included file
264 | 
265 |             Raises:
266 |                 yara.Error: If include file cannot be found
267 |             """
268 |             # Try all callbacks associated with this source
269 |             for key, callback in self._rule_include_callbacks.items():
270 |                 if key.startswith(f"{source}:"):
271 |                     try:
272 |                         return callback(requested_filename, namespace)
273 |                     except yara.Error:
274 |                         # Try the next callback
275 |                         continue
276 | 
277 |             # If no callback succeeds, raise an error
278 |             logger.warning(f"Include file not found by any callback: {requested_filename}")
279 |             raise yara.Error(f"Include file not found: {requested_filename}")
280 | 
281 |         return combined_callback
282 | 
283 |     def add_rule(self, rule_name: str, content: str, source: str = "custom") -> YaraRuleMetadata:
284 |         """Add a new YARA rule.
285 | 
286 |         Args:
287 |             rule_name: Name of the rule
288 |             content: YARA rule content
289 |             source: Source of the rule
290 | 
291 |         Returns:
292 |             Metadata for the added rule
293 | 
294 |         Raises:
295 |             YaraError: If rule validation or compilation fails
296 |         """
297 |         # Ensure rule_name has .yar extension
298 |         if not rule_name.endswith(".yar"):
299 |             rule_name = f"{rule_name}.yar"
300 | 
301 |         # Validate the rule by compiling it
302 |         try:
303 |             # Try to compile without includes first for basic validation
304 |             yara.compile(source=content, error_on_warning=True)
305 | 
306 |             # Then compile with includes to validate imports
307 |             yara.compile(
308 |                 source=content,
309 |                 includes=True,
310 |                 include_callback=self._get_include_callback(source),
311 |                 error_on_warning=True,
312 |             )
313 |         except yara.Error as e:
314 |             logger.error(f"YARA validation error for rule {rule_name}: {str(e)}")
315 |             raise YaraError(f"Invalid YARA rule: {str(e)}") from e
316 | 
317 |         # Save the rule
318 |         try:
319 |             self.storage.save_rule(rule_name, content, source)
320 |             logger.info(f"Added rule {rule_name} from {source}")
321 | 
322 |             # Compile and cache the rule
323 |             compiled_rule = self._compile_rule(rule_name, source)
324 |             if compiled_rule:
325 |                 cache_key = f"{source}:{rule_name}"
326 |                 self._rules_cache[cache_key] = compiled_rule
327 |             # Return metadata
328 |             return YaraRuleMetadata(name=rule_name, source=source, created=datetime.now(UTC), is_compiled=True)
329 |         except StorageError as e:
330 |             logger.error(f"Storage error saving rule {rule_name}: {str(e)}")
331 |             raise YaraError(f"Failed to save rule: {str(e)}") from e
332 | 
333 |     def update_rule(self, rule_name: str, content: str, source: str = "custom") -> YaraRuleMetadata:
334 |         """Update an existing YARA rule.
335 | 
336 |         Args:
337 |             rule_name: Name of the rule
338 |             content: Updated YARA rule content
339 |             source: Source of the rule
340 | 
341 |         Returns:
342 |             Metadata for the updated rule
343 | 
344 |         Raises:
345 |             YaraError: If rule validation, compilation, or update fails
346 |         """
347 |         # Ensure rule exists
348 |         try:
349 |             self.storage.get_rule(rule_name, source)
350 |         except StorageError as e:
351 |             logger.error(f"Rule not found: {rule_name} from {source}")
352 |             raise YaraError(f"Rule not found: {rule_name}") from e
353 | 
354 |         # Add the rule (this will validate and save it)
355 |         metadata = self.add_rule(rule_name, content, source)
356 | 
357 |         # Set modified timestamp
358 |         metadata.modified = datetime.now(UTC)
359 | 
360 |         # Clear cache for this rule
361 |         cache_key = f"{source}:{rule_name}"
362 |         if cache_key in self._rules_cache:
363 |             del self._rules_cache[cache_key]
364 | 
365 |         # Also clear combined community rules cache if this was a community rule
366 |         if source == "community" and "community:all" in self._rules_cache:
367 |             del self._rules_cache["community:all"]
368 | 
369 |         return metadata
370 | 
371 |     def delete_rule(self, rule_name: str, source: str = "custom") -> bool:
372 |         """Delete a YARA rule.
373 | 
374 |         Args:
375 |             rule_name: Name of the rule
376 |             source: Source of the rule
377 | 
378 |         Returns:
379 |             True if rule was deleted, False if not found
380 | 
381 |         Raises:
382 |             YaraError: If rule deletion fails
383 |         """
384 |         try:
385 |             result = self.storage.delete_rule(rule_name, source)
386 | 
387 |             if result:
388 |                 # Clear cache for this rule
389 |                 cache_key = f"{source}:{rule_name}"
390 |                 if cache_key in self._rules_cache:
391 |                     del self._rules_cache[cache_key]
392 | 
393 |                 # Also clear combined community rules cache if this was a community rule
394 |                 if source == "community" and "community:all" in self._rules_cache:
395 |                     del self._rules_cache["community:all"]
396 | 
397 |                 logger.info(f"Deleted rule {rule_name} from {source}")
398 | 
399 |             return result
400 |         except StorageError as e:
401 |             logger.error(f"Storage error deleting rule {rule_name}: {str(e)}")
402 |             raise YaraError(f"Failed to delete rule: {str(e)}") from e
403 | 
404 |     def get_rule(self, rule_name: str, source: str = "custom") -> str:
405 |         """Get a YARA rule's content.
406 | 
407 |         Args:
408 |             rule_name: Name of the rule
409 |             source: Source of the rule
410 | 
411 |         Returns:
412 |             Rule content
413 | 
414 |         Raises:
415 |             YaraError: If rule not found
416 |         """
417 |         try:
418 |             return self.storage.get_rule(rule_name, source)
419 |         except StorageError as e:
420 |             logger.error(f"Storage error getting rule {rule_name}: {str(e)}")
421 |             raise YaraError(f"Failed to get rule: {str(e)}") from e
422 | 
423 |     def list_rules(self, source: Optional[str] = None) -> List[YaraRuleMetadata]:
424 |         """List all YARA rules.
425 | 
426 |         Args:
427 |             source: Optional filter by source
428 | 
429 |         Returns:
430 |             List of rule metadata
431 |         """
432 |         try:
433 |             rules_data = self.storage.list_rules(source)
434 | 
435 |             # Convert to YaraRuleMetadata objects
436 |             rules_metadata = []
437 |             for rule in rules_data:
438 |                 try:
439 |                     # Check if rule is compiled
440 |                     is_compiled = False
441 |                     rule_source = rule.get("source", "custom")
442 |                     rule_name = rule.get("name")
443 |                     cache_key = f"{rule_source}:{rule_name}"
444 | 
445 |                     # Rule is compiled if it's in the cache
446 |                     is_compiled = cache_key in self._rules_cache
447 | 
448 |                     # Rule is also compiled if it's a community rule and community:all is compiled
449 |                     if rule_source == "community" and "community:all" in self._rules_cache:
450 |                         is_compiled = True
451 | 
452 |                     # Create metadata object
453 |                     created = rule.get("created")
454 |                     if isinstance(created, str):
455 |                         created = datetime.fromisoformat(created)
456 |                     elif not isinstance(created, datetime):
457 |                         created = datetime.now(UTC)
458 | 
459 |                     modified = rule.get("modified")
460 |                     if isinstance(modified, str):
461 |                         modified = datetime.fromisoformat(modified)
462 | 
463 |                     metadata = YaraRuleMetadata(
464 |                         name=rule.get("name"),
465 |                         source=rule.get("source", "custom"),
466 |                         created=created,
467 |                         modified=modified,
468 |                         is_compiled=is_compiled,
469 |                     )
470 | 
471 |                     rules_metadata.append(metadata)
472 |                 except Exception as e:
473 |                     logger.warning(f"Error processing rule metadata: {str(e)}")
474 | 
475 |             return rules_metadata
476 |         except StorageError as e:
477 |             logger.error(f"Storage error listing rules: {str(e)}")
478 |             raise YaraError(f"Failed to list rules: {str(e)}") from e
479 | 
480 |     def match_file(
481 |         self,
482 |         file_path: str,
483 |         *,
484 |         rule_names: Optional[List[str]] = None,
485 |         sources: Optional[List[str]] = None,
486 |         timeout: Optional[int] = None,
487 |     ) -> YaraScanResult:
488 |         """Match YARA rules against a file.
489 | 
490 |         Args:
491 |             file_path: Path to the file to scan
492 |             rule_names: Optional list of rule names to match (if None, match all)
493 |             sources: Optional list of sources to match rules from (if None, match all)
494 |             timeout: Optional timeout in seconds (if None, use default)
495 | 
496 |         Returns:
497 |             Scan result
498 | 
499 |         Raises:
500 |             YaraError: If scanning fails
501 |         """
502 |         # Resolve timeout
503 |         if timeout is None:
504 |             timeout = settings.YARA_SCAN_TIMEOUT
505 | 
506 |         # Get file information
507 |         try:
508 |             file_size = os.path.getsize(file_path)
509 |             if file_size > settings.YARA_MAX_FILE_SIZE:
510 |                 logger.warning(f"File too large: {file_path} ({file_size} bytes)")
511 |                 raise YaraError(f"File too large: {file_size} bytes (max {settings.YARA_MAX_FILE_SIZE} bytes)")
512 | 
513 |             # Calculate file hash
514 |             with open(file_path, "rb") as f:
515 |                 file_hash = hashlib.sha256(f.read()).hexdigest()
516 | 
517 |             # Get filename from path
518 |             file_name = os.path.basename(file_path)
519 | 
520 |             # Prepare the scan
521 |             scan_start = time.time()
522 |             timeout_reached = False
523 |             error = None
524 | 
525 |             # Collect rules to match
526 |             rules_to_match = self._collect_rules(rule_names, sources)
527 | 
528 |             # Match rules against the file
529 |             matches: List[yara.Match] = []
530 |             for rule in rules_to_match:
531 |                 try:
532 |                     # Match with timeout
533 |                     rule_matches = rule.match(file_path, timeout=timeout)
534 |                     matches.extend(rule_matches)
535 |                 except yara.TimeoutError:
536 |                     logger.warning(f"YARA scan timeout for file {file_path}")
537 |                     timeout_reached = True
538 |                     break
539 |                 except yara.Error as e:
540 |                     logger.error(f"YARA scan error for file {file_path}: {str(e)}")
541 |                     error = str(e)
542 |                     break
543 | 
544 |             # Calculate scan time
545 |             scan_time = time.time() - scan_start
546 | 
547 |             # Process matches
548 |             yara_matches = self._process_matches(matches)
549 | 
550 |             # Create scan result
551 |             result = YaraScanResult(
552 |                 file_name=file_name,
553 |                 file_size=file_size,
554 |                 file_hash=file_hash,
555 |                 matches=yara_matches,
556 |                 scan_time=scan_time,
557 |                 timeout_reached=timeout_reached,
558 |                 error=error,
559 |             )
560 | 
561 |             # Save the result
562 |             result_id = result.scan_id
563 |             self.storage.save_result(str(result_id), result.model_dump())
564 | 
565 |             return result
566 |         except (IOError, OSError) as e:
567 |             logger.error(f"File error scanning {file_path}: {str(e)}")
568 |             raise YaraError(f"Failed to scan file: {str(e)}") from e
569 | 
570 |     def match_data(
571 |         self,
572 |         data: Union[bytes, BinaryIO],
573 |         file_name: str,
574 |         *,
575 |         rule_names: Optional[List[str]] = None,
576 |         sources: Optional[List[str]] = None,
577 |         timeout: Optional[int] = None,
578 |     ) -> YaraScanResult:
579 |         """Match YARA rules against in-memory data.
580 | 
581 |         Args:
582 |             data: Bytes or file-like object to scan
583 |             file_name: Name of the file for reference
584 |             rule_names: Optional list of rule names to match (if None, match all)
585 |             sources: Optional list of sources to match rules from (if None, match all)
586 |             timeout: Optional timeout in seconds (if None, use default)
587 | 
588 |         Returns:
589 |             Scan result
590 | 
591 |         Raises:
592 |             YaraError: If scanning fails
593 |         """
594 |         # Resolve timeout
595 |         if timeout is None:
596 |             timeout = settings.YARA_SCAN_TIMEOUT
597 | 
598 |         # Ensure data is bytes
599 |         if hasattr(data, "read"):
600 |             # It's a file-like object, read it into memory
601 |             data_bytes = data.read()
602 |             if hasattr(data, "seek"):
603 |                 data.seek(0)  # Reset for potential future reads
604 |         else:
605 |             data_bytes = data
606 | 
607 |         # Check file size
608 |         file_size = len(data_bytes)
609 |         if file_size > settings.YARA_MAX_FILE_SIZE:
610 |             logger.warning(f"Data too large: {file_name} ({file_size} bytes)")
611 |             raise YaraError(f"Data too large: {file_size} bytes (max {settings.YARA_MAX_FILE_SIZE} bytes)")
612 | 
613 |         # Calculate data hash
614 |         file_hash = hashlib.sha256(data_bytes).hexdigest()
615 | 
616 |         try:
617 |             # Prepare the scan
618 |             scan_start = time.time()
619 |             timeout_reached = False
620 |             error = None
621 | 
622 |             # Collect rules to match
623 |             rules_to_match = self._collect_rules(rule_names, sources)
624 | 
625 |             # Match rules against the data
626 |             matches: List[yara.Match] = []
627 |             for rule in rules_to_match:
628 |                 try:
629 |                     # Match with timeout
630 |                     rule_matches = rule.match(data=data_bytes, timeout=timeout)
631 |                     matches.extend(rule_matches)
632 |                 except yara.TimeoutError:
633 |                     logger.warning(f"YARA scan timeout for data {file_name}")
634 |                     timeout_reached = True
635 |                     break
636 |                 except yara.Error as e:
637 |                     logger.error(f"YARA scan error for data {file_name}: {str(e)}")
638 |                     error = str(e)
639 |                     break
640 | 
641 |             # Calculate scan time
642 |             scan_time = time.time() - scan_start
643 | 
644 |             # Process matches
645 |             yara_matches = self._process_matches(matches)
646 | 
647 |             # Create scan result
648 |             result = YaraScanResult(
649 |                 file_name=file_name,
650 |                 file_size=file_size,
651 |                 file_hash=file_hash,
652 |                 matches=yara_matches,
653 |                 scan_time=scan_time,
654 |                 timeout_reached=timeout_reached,
655 |                 error=error,
656 |             )
657 | 
658 |             # Save the result
659 |             result_id = result.scan_id
660 |             self.storage.save_result(str(result_id), result.model_dump())
661 | 
662 |             return result
663 |         except Exception as e:
664 |             logger.error(f"Error scanning data {file_name}: {str(e)}")
665 |             raise YaraError(f"Failed to scan data: {str(e)}") from e
666 | 
667 |     def fetch_and_scan(
668 |         self,
669 |         url: str,
670 |         *,
671 |         rule_names: Optional[List[str]] = None,
672 |         sources: Optional[List[str]] = None,
673 |         timeout: Optional[int] = None,
674 |         download_timeout: int = 30,
675 |     ) -> YaraScanResult:
676 |         """Fetch a file from a URL and scan it with YARA rules.
677 | 
678 |         Args:
679 |             url: URL to fetch
680 |             rule_names: Optional list of rule names to match (if None, match all)
681 |             sources: Optional list of sources to match rules from (if None, match all)
682 |             timeout: Optional timeout in seconds for YARA scan (if None, use default)
683 |             download_timeout: Timeout in seconds for downloading the file
684 | 
685 |         Returns:
686 |             Scan result
687 | 
688 |         Raises:
689 |             YaraError: If fetching or scanning fails
690 |         """
691 |         # Parse URL to get filename
692 |         parsed_url = urlparse(url)
693 |         file_name = os.path.basename(parsed_url.path)
694 |         if not file_name:
695 |             file_name = "downloaded_file"
696 | 
697 |         # Create a temporary file
698 |         temp_file = None
699 |         try:
700 |             # Download the file
701 |             logger.info(f"Fetching file from URL: {url}")
702 |             with httpx.Client(timeout=download_timeout) as client:
703 |                 response = client.get(url, follow_redirects=True)
704 |                 response.raise_for_status()  # Raise exception for error status codes
705 | 
706 |                 # Get content
707 |                 content = response.content
708 | 
709 |                 # Check file size
710 |                 file_size = len(content)
711 |                 if file_size > settings.YARA_MAX_FILE_SIZE:
712 |                     logger.warning(f"Downloaded file too large: {file_name} ({file_size} bytes)")
713 |                     raise YaraError(
714 |                         f"Downloaded file too large: {file_size} bytes (max {settings.YARA_MAX_FILE_SIZE} bytes)"
715 |                     ) from None
716 | 
717 |                 # Try to get a better filename from Content-Disposition header if available
718 |                 content_disposition = response.headers.get("Content-Disposition")
719 |                 if content_disposition and "filename=" in content_disposition:
720 |                     import re  # pylint: disable=import-outside-toplevel
721 | 
722 |                     filename_match = re.search(r'filename="?([^";]+)"?', content_disposition)
723 |                     if filename_match:
724 |                         file_name = filename_match.group(1)
725 | 
726 |                 # Save to storage
727 |                 file_path, file_hash = self.storage.save_sample(filename=file_name, content=content)
728 |                 logger.info("Downloaded file saved to storage with hash: %s", file_hash)
729 |                 # Scan the file
730 |                 if os.path.exists(file_path):
731 |                     # If file_path is a real file on disk, use match_file
732 |                     return self.match_file(file_path, rule_names=rule_names, sources=sources, timeout=timeout)
733 |                 # Otherwise, use match_data
734 |                 return self.match_data(
735 |                     data=content, file_name=file_name, rule_names=rule_names, sources=sources, timeout=timeout
736 |                 )
737 |         except httpx.RequestError as e:
738 |             logger.error(f"HTTP request error fetching {url}: {str(e)}")
739 |             raise YaraError(f"Failed to fetch file: {str(e)}") from e
740 |         except httpx.HTTPStatusError as e:
741 |             logger.error(f"HTTP error fetching {url}: {e.response.status_code}")
742 |             raise YaraError(f"Failed to fetch file: HTTP {e.response.status_code}") from e
743 |         finally:
744 |             # Clean up temporary file if created
745 |             if temp_file:
746 |                 try:
747 |                     temp_file.close()
748 |                     os.unlink(temp_file.name)
749 |                 except (IOError, OSError):
750 |                     pass
751 | 
752 |     def _collect_rules(
753 |         self, rule_names: Optional[List[str]] = None, sources: Optional[List[str]] = None
754 |     ) -> List[yara.Rules]:
755 |         """Collect YARA rules to match.
756 | 
757 |         Args:
758 |             rule_names: Optional list of rule names to match (if None, match all)
759 |             sources: Optional list of sources to match rules from (if None, match all)
760 | 
761 |         Returns:
762 |             List of YARA rules objects
763 | 
764 |         Raises:
765 |             YaraError: If no rules are found
766 |         """
767 |         rules_to_match: List[yara.Rules] = []
768 | 
769 |         # If specific rules are requested
770 |         if rule_names:
771 |             for rule_name in rule_names:
772 |                 # Try to find the rule in all sources if sources not specified
773 |                 if not sources:
774 |                     available_sources = ["custom", "community"]
775 |                 else:
776 |                     available_sources = sources
777 | 
778 |                 found = False
779 |                 for source in available_sources:
780 |                     try:
781 |                         rule = self._compile_rule(rule_name, source)
782 |                         rules_to_match.append(rule)
783 |                         found = True
784 |                         break
785 |                     except YaraError:
786 |                         continue
787 | 
788 |                 if not found:
789 |                     logger.warning(f"Rule not found: {rule_name}")
790 | 
791 |             if not rules_to_match:
792 |                 raise YaraError("No requested rules found")
793 |         else:
794 |             # No specific rules requested, use all available rules
795 | 
796 |             # Check if we have a community:all ruleset
797 |             if not sources or "community" in sources:
798 |                 try:
799 |                     community_rules = self._compile_community_rules()
800 |                     rules_to_match.append(community_rules)
801 |                 except YaraError:
802 |                     # Community rules not available as combined set, try individual rules
803 |                     if not sources:
804 |                         sources = ["custom", "community"]
805 | 
806 |                     # For each source, get all rules
807 |                     for source in sources:
808 |                         try:
809 |                             rules = self.list_rules(source)
810 |                             for rule in rules:
811 |                                 try:
812 |                                     compiled_rule = self._compile_rule(rule.name, rule.source)
813 |                                     rules_to_match.append(compiled_rule)
814 |                                 except YaraError:
815 |                                     continue
816 |                         except YaraError:
817 |                             continue
818 |             else:
819 |                 # Use only specified sources
820 |                 for source in sources:
821 |                     try:
822 |                         rules = self.list_rules(source)
823 |                         for rule in rules:
824 |                             try:
825 |                                 compiled_rule = self._compile_rule(rule.name, rule.source)
826 |                                 rules_to_match.append(compiled_rule)
827 |                             except YaraError:
828 |                                 continue
829 |                     except YaraError:
830 |                         continue
831 | 
832 |         # Ensure we have at least one rule
833 |         if not rules_to_match:
834 |             raise YaraError("No YARA rules available")
835 | 
836 |         return rules_to_match
837 | 
838 |     def _process_matches(self, matches: List[yara.Match]) -> List[YaraMatch]:
839 |         """Process YARA matches into YaraMatch objects.
840 | 
841 |         Args:
842 |             matches: List of YARA match objects
843 | 
844 |         Returns:
845 |             List of YaraMatch objects
846 |         """
847 |         result: List[YaraMatch] = []
848 | 
849 |         for match in matches:
850 |             try:
851 |                 # Extract rule name
852 |                 rule_name = match.rule
853 | 
854 |                 # Extract namespace
855 |                 namespace = match.namespace
856 | 
857 |                 # Extract tags
858 |                 tags = match.tags
859 | 
860 |                 # Extract metadata
861 |                 meta = match.meta
862 | 
863 |                 # Create empty strings list - we're skipping string processing due to compatibility issues
864 |                 strings = []
865 | 
866 |                 # Create YaraMatch object
867 |                 yara_match = YaraMatch(rule=rule_name, namespace=namespace, tags=tags, meta=meta, strings=strings)
868 | 
869 |                 result.append(yara_match)
870 |             except Exception as e:
871 |                 logger.error(f"Error processing YARA match: {str(e)}")
872 |                 continue
873 | 
874 |         return result
875 | 
876 | 
877 | # Create a singleton instance
878 | yara_service = YaraService()
879 | 
```
Page 6/6FirstPrevNextLast