This is page 3 of 4. Use http://codebase.md/vibheksoni/stealth-browser-mcp?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .dockerignore ├── .github │ ├── FUNDING.yml │ ├── ISSUE_TEMPLATE │ │ ├── bug_report.md │ │ ├── config.yml │ │ ├── feature_request.md │ │ └── showcase.yml │ ├── labeler.yml │ ├── pull_request_template.md │ └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── Checklist.md ├── CODE_OF_CONDUCT.md ├── CODEOWNERS ├── COMPARISON.md ├── CONTRIBUTING.md ├── demo │ ├── augment-hero-clone.md │ ├── augment-hero-recreation.html │ └── README.md ├── Dockerfile ├── examples │ └── claude_prompts.md ├── HALL_OF_FAME.md ├── LICENSE ├── media │ ├── AugmentHeroClone.PNG │ ├── Showcase Stealth Browser Mcp.mp4 │ ├── showcase-demo-full.gif │ ├── showcase-demo.gif │ └── UndetectedStealthBrowser.png ├── pyproject.toml ├── README.md ├── requirements.txt ├── ROADMAP.md ├── run_server.bat ├── run_server.sh ├── SECURITY.md ├── smithery.yaml └── src ├── __init__.py ├── browser_manager.py ├── cdp_element_cloner.py ├── cdp_function_executor.py ├── comprehensive_element_cloner.py ├── debug_logger.py ├── dom_handler.py ├── dynamic_hook_ai_interface.py ├── dynamic_hook_system.py ├── element_cloner.py ├── file_based_element_cloner.py ├── hook_learning_system.py ├── js │ ├── comprehensive_element_extractor.js │ ├── extract_animations.js │ ├── extract_assets.js │ ├── extract_events.js │ ├── extract_related_files.js │ ├── extract_structure.js │ └── extract_styles.js ├── models.py ├── network_interceptor.py ├── persistent_storage.py ├── platform_utils.py ├── process_cleanup.py ├── progressive_element_cloner.py ├── response_handler.py ├── response_stage_hooks.py └── server.py ``` # Files -------------------------------------------------------------------------------- /src/file_based_element_cloner.py: -------------------------------------------------------------------------------- ```python 1 | import asyncio 2 | import json 3 | import os 4 | import sys 5 | import uuid 6 | from datetime import datetime 7 | from pathlib import Path 8 | from typing import Dict, List, Any, Optional 9 | 10 | try: 11 | from .debug_logger import debug_logger 12 | except ImportError: 13 | from debug_logger import debug_logger 14 | 15 | project_root = Path(__file__).parent.parent 16 | sys.path.append(str(project_root)) 17 | 18 | from comprehensive_element_cloner import ComprehensiveElementCloner 19 | from element_cloner import element_cloner 20 | 21 | class FileBasedElementCloner: 22 | """Element cloner that saves data to files and returns file paths.""" 23 | 24 | def __init__(self, output_dir: str = "element_clones"): 25 | """ 26 | Initialize with output directory for clone files. 27 | 28 | Args: 29 | output_dir (str): Directory to save clone files. 30 | """ 31 | self.output_dir = Path(output_dir) 32 | self.output_dir.mkdir(exist_ok=True) 33 | self.comprehensive_cloner = ComprehensiveElementCloner() 34 | 35 | def _safe_process_framework_handlers(self, framework_handlers): 36 | """Safely process framework handlers that might be dict or list.""" 37 | if isinstance(framework_handlers, dict): 38 | return {k: len(v) if isinstance(v, list) else str(v) for k, v in framework_handlers.items()} 39 | elif isinstance(framework_handlers, list): 40 | return {"handlers": len(framework_handlers)} 41 | else: 42 | return {"value": str(framework_handlers)} 43 | 44 | def _generate_filename(self, prefix: str, extension: str = "json") -> str: 45 | """ 46 | Generate unique filename with timestamp. 47 | 48 | Args: 49 | prefix (str): Prefix for the filename. 50 | extension (str): File extension. 51 | 52 | Returns: 53 | str: Generated filename. 54 | """ 55 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") 56 | unique_id = str(uuid.uuid4())[:8] 57 | return f"{prefix}_{timestamp}_{unique_id}.{extension}" 58 | 59 | async def extract_element_styles_to_file( 60 | self, 61 | tab, 62 | selector: str, 63 | include_computed: bool = True, 64 | include_css_rules: bool = True, 65 | include_pseudo: bool = True, 66 | include_inheritance: bool = False 67 | ) -> Dict[str, Any]: 68 | """ 69 | Extract element styles and save to file, returning file path. 70 | 71 | Args: 72 | tab: Browser tab instance 73 | selector (str): CSS selector for the element 74 | include_computed (bool): Include computed styles 75 | include_css_rules (bool): Include matching CSS rules 76 | include_pseudo (bool): Include pseudo-element styles 77 | include_inheritance (bool): Include style inheritance chain 78 | 79 | Returns: 80 | Dict[str, Any]: File path and summary of extracted styles 81 | """ 82 | try: 83 | debug_logger.log_info("file_element_cloner", "extract_styles_to_file", 84 | f"Starting style extraction for selector: {selector}") 85 | 86 | # Extract styles using element_cloner 87 | style_data = await element_cloner.extract_element_styles( 88 | tab, 89 | selector=selector, 90 | include_computed=include_computed, 91 | include_css_rules=include_css_rules, 92 | include_pseudo=include_pseudo, 93 | include_inheritance=include_inheritance 94 | ) 95 | 96 | # Generate filename and save 97 | filename = self._generate_filename("styles") 98 | file_path = self._save_to_file(style_data, filename) 99 | 100 | # Create summary 101 | summary = { 102 | "file_path": str(file_path), 103 | "extraction_type": "styles", 104 | "selector": selector, 105 | "url": getattr(tab, 'url', 'unknown'), 106 | "components": { 107 | "computed_styles_count": len(style_data.get('computed_styles', {})), 108 | "css_rules_count": len(style_data.get('css_rules', [])), 109 | "pseudo_elements_count": len(style_data.get('pseudo_elements', {})), 110 | "custom_properties_count": len(style_data.get('custom_properties', {})) 111 | } 112 | } 113 | 114 | debug_logger.log_info("file_element_cloner", "extract_styles_to_file", 115 | f"Styles saved to {file_path}") 116 | return summary 117 | 118 | except Exception as e: 119 | debug_logger.log_error("file_element_cloner", "extract_styles_to_file", e) 120 | return {"error": str(e)} 121 | 122 | def _save_to_file(self, data: Dict[str, Any], filename: str) -> str: 123 | """ 124 | Save data to file and return absolute path. 125 | 126 | Args: 127 | data (Dict[str, Any]): Data to save. 128 | filename (str): Name of the file. 129 | 130 | Returns: 131 | str: Absolute path to the saved file. 132 | """ 133 | file_path = self.output_dir / filename 134 | with open(file_path, 'w', encoding='utf-8') as f: 135 | json.dump(data, f, indent=2, ensure_ascii=False) 136 | return str(file_path.absolute()) 137 | 138 | async def extract_complete_element_to_file( 139 | self, 140 | tab, 141 | selector: str, 142 | include_children: bool = True 143 | ) -> Dict[str, Any]: 144 | """ 145 | Extract complete element using working comprehensive cloner and save to file. 146 | 147 | Args: 148 | tab: Browser tab object. 149 | selector (str): CSS selector for the element. 150 | include_children (bool): Whether to include children. 151 | 152 | Returns: 153 | Dict[str, Any]: Summary of extraction and file path. 154 | """ 155 | try: 156 | complete_data = await self.comprehensive_cloner.extract_complete_element( 157 | tab, selector, include_children 158 | ) 159 | complete_data['_metadata'] = { 160 | 'extraction_type': 'complete_comprehensive', 161 | 'selector': selector, 162 | 'timestamp': datetime.now().isoformat(), 163 | 'include_children': include_children 164 | } 165 | filename = self._generate_filename("complete_comprehensive") 166 | file_path = self._save_to_file(complete_data, filename) 167 | debug_logger.log_info("file_element_cloner", "extract_complete_to_file", 168 | f"Saved complete element data to {file_path}") 169 | summary = { 170 | "file_path": file_path, 171 | "extraction_type": "complete_comprehensive", 172 | "selector": selector, 173 | "url": complete_data.get('url', 'unknown'), 174 | "summary": { 175 | "tag_name": complete_data.get('html', {}).get('tagName', 'unknown'), 176 | "computed_styles_count": len(complete_data.get('styles', {})), 177 | "attributes_count": len(complete_data.get('html', {}).get('attributes', [])), 178 | "event_listeners_count": len(complete_data.get('eventListeners', [])), 179 | "children_count": len(complete_data.get('children', [])) if include_children else 0, 180 | "has_pseudo_elements": bool(complete_data.get('pseudoElements')), 181 | "css_rules_count": len(complete_data.get('cssRules', [])), 182 | "animations_count": len(complete_data.get('animations', [])), 183 | "file_size_kb": round(len(json.dumps(complete_data)) / 1024, 2) 184 | } 185 | } 186 | return summary 187 | except Exception as e: 188 | debug_logger.log_error("file_element_cloner", "extract_complete_to_file", e) 189 | return {"error": str(e)} 190 | 191 | async def extract_element_structure_to_file( 192 | self, 193 | tab, 194 | element=None, 195 | selector: str = None, 196 | include_children: bool = False, 197 | include_attributes: bool = True, 198 | include_data_attributes: bool = True, 199 | max_depth: int = 3 200 | ) -> Dict[str, str]: 201 | """ 202 | Extract structure and save to file, return file path. 203 | 204 | Args: 205 | tab: Browser tab object. 206 | element: DOM element object. 207 | selector (str): CSS selector for the element. 208 | include_children (bool): Whether to include children. 209 | include_attributes (bool): Whether to include attributes. 210 | include_data_attributes (bool): Whether to include data attributes. 211 | max_depth (int): Maximum depth for extraction. 212 | 213 | Returns: 214 | Dict[str, str]: Summary of extraction and file path. 215 | """ 216 | try: 217 | structure_data = await element_cloner.extract_element_structure( 218 | tab, element, selector, include_children, 219 | include_attributes, include_data_attributes, max_depth 220 | ) 221 | structure_data['_metadata'] = { 222 | 'extraction_type': 'structure', 223 | 'selector': selector, 224 | 'timestamp': datetime.now().isoformat(), 225 | 'options': { 226 | 'include_children': include_children, 227 | 'include_attributes': include_attributes, 228 | 'include_data_attributes': include_data_attributes, 229 | 'max_depth': max_depth 230 | } 231 | } 232 | filename = self._generate_filename("structure") 233 | file_path = self._save_to_file(structure_data, filename) 234 | debug_logger.log_info("file_element_cloner", "extract_structure_to_file", 235 | f"Saved structure data to {file_path}") 236 | return { 237 | "file_path": file_path, 238 | "extraction_type": "structure", 239 | "selector": selector, 240 | "summary": { 241 | "tag_name": structure_data.get('tag_name'), 242 | "attributes_count": len(structure_data.get('attributes', {})), 243 | "data_attributes_count": len(structure_data.get('data_attributes', {})), 244 | "children_count": len(structure_data.get('children', [])), 245 | "dom_path": structure_data.get('dom_path') 246 | } 247 | } 248 | except Exception as e: 249 | debug_logger.log_error("file_element_cloner", "extract_structure_to_file", e) 250 | return {"error": str(e)} 251 | 252 | async def extract_element_events_to_file( 253 | self, 254 | tab, 255 | element=None, 256 | selector: str = None, 257 | include_inline: bool = True, 258 | include_listeners: bool = True, 259 | include_framework: bool = True, 260 | analyze_handlers: bool = True 261 | ) -> Dict[str, str]: 262 | """ 263 | Extract events and save to file, return file path. 264 | 265 | Args: 266 | tab: Browser tab object. 267 | element: DOM element object. 268 | selector (str): CSS selector for the element. 269 | include_inline (bool): Include inline event handlers. 270 | include_listeners (bool): Include event listeners. 271 | include_framework (bool): Include framework event handlers. 272 | analyze_handlers (bool): Analyze event handlers. 273 | 274 | Returns: 275 | Dict[str, str]: Summary of extraction and file path. 276 | """ 277 | try: 278 | event_data = await element_cloner.extract_element_events( 279 | tab, element, selector, include_inline, 280 | include_listeners, include_framework, analyze_handlers 281 | ) 282 | event_data['_metadata'] = { 283 | 'extraction_type': 'events', 284 | 'selector': selector, 285 | 'timestamp': datetime.now().isoformat(), 286 | 'options': { 287 | 'include_inline': include_inline, 288 | 'include_listeners': include_listeners, 289 | 'include_framework': include_framework, 290 | 'analyze_handlers': analyze_handlers 291 | } 292 | } 293 | filename = self._generate_filename("events") 294 | file_path = self._save_to_file(event_data, filename) 295 | debug_logger.log_info("file_element_cloner", "extract_events_to_file", 296 | f"Saved events data to {file_path}") 297 | return { 298 | "file_path": file_path, 299 | "extraction_type": "events", 300 | "selector": selector, 301 | "summary": { 302 | "inline_handlers_count": len(event_data.get('inline_handlers', [])), 303 | "event_listeners_count": len(event_data.get('event_listeners', [])), 304 | "detected_frameworks": event_data.get('detected_frameworks', []), 305 | "framework_handlers": self._safe_process_framework_handlers(event_data.get('framework_handlers', {})) 306 | } 307 | } 308 | except Exception as e: 309 | debug_logger.log_error("file_element_cloner", "extract_events_to_file", e) 310 | return {"error": str(e)} 311 | 312 | async def extract_element_animations_to_file( 313 | self, 314 | tab, 315 | element=None, 316 | selector: str = None, 317 | include_css_animations: bool = True, 318 | include_transitions: bool = True, 319 | include_transforms: bool = True, 320 | analyze_keyframes: bool = True 321 | ) -> Dict[str, str]: 322 | """ 323 | Extract animations and save to file, return file path. 324 | 325 | Args: 326 | tab: Browser tab object. 327 | element: DOM element object. 328 | selector (str): CSS selector for the element. 329 | include_css_animations (bool): Include CSS animations. 330 | include_transitions (bool): Include transitions. 331 | include_transforms (bool): Include transforms. 332 | analyze_keyframes (bool): Analyze keyframes. 333 | 334 | Returns: 335 | Dict[str, str]: Summary of extraction and file path. 336 | """ 337 | try: 338 | animation_data = await element_cloner.extract_element_animations( 339 | tab, element, selector, include_css_animations, 340 | include_transitions, include_transforms, analyze_keyframes 341 | ) 342 | animation_data['_metadata'] = { 343 | 'extraction_type': 'animations', 344 | 'selector': selector, 345 | 'timestamp': datetime.now().isoformat(), 346 | 'options': { 347 | 'include_css_animations': include_css_animations, 348 | 'include_transitions': include_transitions, 349 | 'include_transforms': include_transforms, 350 | 'analyze_keyframes': analyze_keyframes 351 | } 352 | } 353 | filename = self._generate_filename("animations") 354 | file_path = self._save_to_file(animation_data, filename) 355 | debug_logger.log_info("file_element_cloner", "extract_animations_to_file", 356 | f"Saved animations data to {file_path}") 357 | return { 358 | "file_path": file_path, 359 | "extraction_type": "animations", 360 | "selector": selector, 361 | "summary": { 362 | "has_animations": animation_data.get('animations', {}).get('animation_name', 'none') != 'none', 363 | "has_transitions": animation_data.get('transitions', {}).get('transition_property', 'none') != 'none', 364 | "has_transforms": animation_data.get('transforms', {}).get('transform', 'none') != 'none', 365 | "keyframes_count": len(animation_data.get('keyframes', [])) 366 | } 367 | } 368 | except Exception as e: 369 | debug_logger.log_error("file_element_cloner", "extract_animations_to_file", e) 370 | return {"error": str(e)} 371 | 372 | async def extract_element_assets_to_file( 373 | self, 374 | tab, 375 | element=None, 376 | selector: str = None, 377 | include_images: bool = True, 378 | include_backgrounds: bool = True, 379 | include_fonts: bool = True, 380 | fetch_external: bool = False 381 | ) -> Dict[str, str]: 382 | """ 383 | Extract assets and save to file, return file path. 384 | 385 | Args: 386 | tab: Browser tab object. 387 | element: DOM element object. 388 | selector (str): CSS selector for the element. 389 | include_images (bool): Include images. 390 | include_backgrounds (bool): Include background images. 391 | include_fonts (bool): Include fonts. 392 | fetch_external (bool): Fetch external assets. 393 | 394 | Returns: 395 | Dict[str, str]: Summary of extraction and file path. 396 | """ 397 | try: 398 | asset_data = await element_cloner.extract_element_assets( 399 | tab, element, selector, include_images, 400 | include_backgrounds, include_fonts, fetch_external 401 | ) 402 | asset_data['_metadata'] = { 403 | 'extraction_type': 'assets', 404 | 'selector': selector, 405 | 'timestamp': datetime.now().isoformat(), 406 | 'options': { 407 | 'include_images': include_images, 408 | 'include_backgrounds': include_backgrounds, 409 | 'include_fonts': include_fonts, 410 | 'fetch_external': fetch_external 411 | } 412 | } 413 | filename = self._generate_filename("assets") 414 | file_path = self._save_to_file(asset_data, filename) 415 | debug_logger.log_info("file_element_cloner", "extract_assets_to_file", 416 | f"Saved assets data to {file_path}") 417 | return { 418 | "file_path": file_path, 419 | "extraction_type": "assets", 420 | "selector": selector, 421 | "summary": { 422 | "images_count": len(asset_data.get('images', [])), 423 | "background_images_count": len(asset_data.get('background_images', [])), 424 | "font_family": asset_data.get('fonts', {}).get('family'), 425 | "custom_fonts_count": len(asset_data.get('fonts', {}).get('custom_fonts', [])), 426 | "icons_count": len(asset_data.get('icons', [])), 427 | "videos_count": len(asset_data.get('videos', [])), 428 | "audio_count": len(asset_data.get('audio', [])) 429 | } 430 | } 431 | except Exception as e: 432 | debug_logger.log_error("file_element_cloner", "extract_assets_to_file", e) 433 | return {"error": str(e)} 434 | 435 | async def extract_related_files_to_file( 436 | self, 437 | tab, 438 | element=None, 439 | selector: str = None, 440 | analyze_css: bool = True, 441 | analyze_js: bool = True, 442 | follow_imports: bool = False, 443 | max_depth: int = 2 444 | ) -> Dict[str, str]: 445 | """ 446 | Extract related files and save to file, return file path. 447 | 448 | Args: 449 | tab: Browser tab object. 450 | element: DOM element object. 451 | selector (str): CSS selector for the element. 452 | analyze_css (bool): Analyze CSS files. 453 | analyze_js (bool): Analyze JS files. 454 | follow_imports (bool): Follow imports. 455 | max_depth (int): Maximum depth for import following. 456 | 457 | Returns: 458 | Dict[str, str]: Summary of extraction and file path. 459 | """ 460 | try: 461 | file_data = await element_cloner.extract_related_files( 462 | tab, element, selector, analyze_css, analyze_js, follow_imports, max_depth 463 | ) 464 | file_data['_metadata'] = { 465 | 'extraction_type': 'related_files', 466 | 'selector': selector, 467 | 'timestamp': datetime.now().isoformat(), 468 | 'options': { 469 | 'analyze_css': analyze_css, 470 | 'analyze_js': analyze_js, 471 | 'follow_imports': follow_imports, 472 | 'max_depth': max_depth 473 | } 474 | } 475 | filename = self._generate_filename("related_files") 476 | file_path = self._save_to_file(file_data, filename) 477 | debug_logger.log_info("file_element_cloner", "extract_related_files_to_file", 478 | f"Saved related files data to {file_path}") 479 | return { 480 | "file_path": file_path, 481 | "extraction_type": "related_files", 482 | "selector": selector, 483 | "summary": { 484 | "stylesheets_count": len(file_data.get('stylesheets', [])), 485 | "scripts_count": len(file_data.get('scripts', [])), 486 | "imports_count": len(file_data.get('imports', [])), 487 | "modules_count": len(file_data.get('modules', [])) 488 | } 489 | } 490 | except Exception as e: 491 | debug_logger.log_error("file_element_cloner", "extract_related_files_to_file", e) 492 | return {"error": str(e)} 493 | 494 | async def clone_element_complete_to_file( 495 | self, 496 | tab, 497 | element=None, 498 | selector: str = None, 499 | extraction_options: Dict[str, Any] = None 500 | ) -> Dict[str, Any]: 501 | """ 502 | Master function that extracts all element data and saves to file. 503 | Returns file path instead of full data. 504 | 505 | Args: 506 | tab: Browser tab object. 507 | element: DOM element object. 508 | selector (str): CSS selector for the element. 509 | extraction_options (Dict[str, Any]): Extraction options. 510 | 511 | Returns: 512 | Dict[str, Any]: Summary of extraction and file path. 513 | """ 514 | try: 515 | complete_data = await element_cloner.clone_element_complete( 516 | tab, element, selector, extraction_options 517 | ) 518 | if 'error' in complete_data: 519 | return complete_data 520 | complete_data['_metadata'] = { 521 | 'extraction_type': 'complete_clone', 522 | 'selector': selector, 523 | 'timestamp': datetime.now().isoformat(), 524 | 'extraction_options': extraction_options 525 | } 526 | filename = self._generate_filename("complete_clone") 527 | file_path = self._save_to_file(complete_data, filename) 528 | summary = { 529 | "file_path": file_path, 530 | "extraction_type": "complete_clone", 531 | "selector": selector, 532 | "url": complete_data.get('url'), 533 | "components": {} 534 | } 535 | if 'styles' in complete_data: 536 | styles = complete_data['styles'] 537 | summary['components']['styles'] = { 538 | 'computed_styles_count': len(styles.get('computed_styles', {})), 539 | 'css_rules_count': len(styles.get('css_rules', [])), 540 | 'pseudo_elements_count': len(styles.get('pseudo_elements', {})) 541 | } 542 | if 'structure' in complete_data: 543 | structure = complete_data['structure'] 544 | summary['components']['structure'] = { 545 | 'tag_name': structure.get('tag_name'), 546 | 'attributes_count': len(structure.get('attributes', {})), 547 | 'children_count': len(structure.get('children', [])) 548 | } 549 | if 'events' in complete_data: 550 | events = complete_data['events'] 551 | summary['components']['events'] = { 552 | 'inline_handlers_count': len(events.get('inline_handlers', [])), 553 | 'detected_frameworks': events.get('detected_frameworks', []) 554 | } 555 | if 'animations' in complete_data: 556 | animations = complete_data['animations'] 557 | summary['components']['animations'] = { 558 | 'has_animations': animations.get('animations', {}).get('animation_name', 'none') != 'none', 559 | 'keyframes_count': len(animations.get('keyframes', [])) 560 | } 561 | if 'assets' in complete_data: 562 | assets = complete_data['assets'] 563 | summary['components']['assets'] = { 564 | 'images_count': len(assets.get('images', [])), 565 | 'background_images_count': len(assets.get('background_images', [])) 566 | } 567 | if 'related_files' in complete_data: 568 | files = complete_data['related_files'] 569 | summary['components']['related_files'] = { 570 | 'stylesheets_count': len(files.get('stylesheets', [])), 571 | 'scripts_count': len(files.get('scripts', [])) 572 | } 573 | debug_logger.log_info("file_element_cloner", "clone_complete_to_file", 574 | f"Saved complete clone data to {file_path}") 575 | return summary 576 | except Exception as e: 577 | debug_logger.log_error("file_element_cloner", "clone_complete_to_file", e) 578 | return {"error": str(e)} 579 | 580 | def list_clone_files(self) -> List[Dict[str, Any]]: 581 | """ 582 | List all clone files in the output directory. 583 | 584 | Returns: 585 | List[Dict[str, Any]]: List of file info dictionaries. 586 | """ 587 | files = [] 588 | for file_path in self.output_dir.glob("*.json"): 589 | try: 590 | file_info = { 591 | "file_path": str(file_path.absolute()), 592 | "filename": file_path.name, 593 | "size": file_path.stat().st_size, 594 | "created": datetime.fromtimestamp(file_path.stat().st_ctime).isoformat(), 595 | "modified": datetime.fromtimestamp(file_path.stat().st_mtime).isoformat() 596 | } 597 | try: 598 | with open(file_path, 'r', encoding='utf-8') as f: 599 | data = json.load(f) 600 | if '_metadata' in data: 601 | file_info['metadata'] = data['_metadata'] 602 | except: 603 | pass 604 | files.append(file_info) 605 | except Exception as e: 606 | debug_logger.log_warning("file_element_cloner", "list_files", f"Error reading {file_path}: {e}") 607 | files.sort(key=lambda x: x['created'], reverse=True) 608 | return files 609 | 610 | def cleanup_old_files(self, max_age_hours: int = 24) -> int: 611 | """ 612 | Clean up clone files older than specified hours. 613 | 614 | Args: 615 | max_age_hours (int): Maximum age of files in hours. 616 | 617 | Returns: 618 | int: Number of deleted files. 619 | """ 620 | import time 621 | cutoff_time = time.time() - (max_age_hours * 3600) 622 | deleted_count = 0 623 | for file_path in self.output_dir.glob("*.json"): 624 | try: 625 | if file_path.stat().st_ctime < cutoff_time: 626 | file_path.unlink() 627 | deleted_count += 1 628 | debug_logger.log_info("file_element_cloner", "cleanup", f"Deleted old file: {file_path.name}") 629 | except Exception as e: 630 | debug_logger.log_warning("file_element_cloner", "cleanup", f"Error deleting {file_path}: {e}") 631 | return deleted_count 632 | 633 | file_based_element_cloner = FileBasedElementCloner() ``` -------------------------------------------------------------------------------- /src/element_cloner.py: -------------------------------------------------------------------------------- ```python 1 | """Advanced element cloning system with complete styling and JS extraction.""" 2 | 3 | import asyncio 4 | import json 5 | import re 6 | from typing import Dict, List, Any, Optional, Set, Union 7 | from urllib.parse import urljoin, urlparse 8 | from pathlib import Path 9 | import requests 10 | 11 | try: 12 | from .debug_logger import debug_logger 13 | except ImportError: 14 | from debug_logger import debug_logger 15 | 16 | class ElementCloner: 17 | """Advanced element cloning with full fidelity extraction.""" 18 | 19 | def __init__(self): 20 | self.extracted_files = {} 21 | self.framework_patterns = { 22 | 'react': [r'_react', r'__reactInternalInstance', r'__reactFiber'], 23 | 'vue': [r'__vue__', r'_vnode', r'$el'], 24 | 'angular': [r'ng-', r'__ngContext__', r'ɵ'], 25 | 'jquery': [r'jQuery', r'\$\.', r'__jquery'] 26 | } 27 | 28 | async def extract_element_styles( 29 | self, 30 | tab, 31 | element=None, 32 | selector: str = None, 33 | include_computed: bool = True, 34 | include_css_rules: bool = True, 35 | include_pseudo: bool = True, 36 | include_inheritance: bool = False 37 | ) -> Dict[str, Any]: 38 | """ 39 | Extract complete styling information from an element. 40 | 41 | Args: 42 | tab (Any): Browser tab instance 43 | element (Any): Element object or None to use selector 44 | selector (str): CSS selector if element is None 45 | include_computed (bool): Include computed styles 46 | include_css_rules (bool): Include matching CSS rules 47 | include_pseudo (bool): Include pseudo-element styles 48 | include_inheritance (bool): Include style inheritance chain 49 | 50 | Returns: 51 | Dict[str, Any]: Dict with styling data 52 | """ 53 | try: 54 | return await self.extract_element_styles_cdp( 55 | tab=tab, 56 | element=element, 57 | selector=selector, 58 | include_computed=include_computed, 59 | include_css_rules=include_css_rules, 60 | include_pseudo=include_pseudo, 61 | include_inheritance=include_inheritance 62 | ) 63 | except Exception as e: 64 | debug_logger.log_error("element_cloner", "extract_styles", e) 65 | return {"error": str(e)} 66 | 67 | def _load_js_file(self, filename: str, selector: str, options: dict) -> str: 68 | """Load and prepare JavaScript file with template substitution""" 69 | js_dir = Path(__file__).parent / "js" 70 | js_file = js_dir / filename 71 | 72 | if not js_file.exists(): 73 | raise FileNotFoundError(f"JavaScript file not found: {js_file}") 74 | 75 | with open(js_file, 'r', encoding='utf-8') as f: 76 | js_code = f.read() 77 | 78 | js_code = js_code.replace('$SELECTOR$', selector) 79 | js_code = js_code.replace('$SELECTOR', selector) 80 | js_code = js_code.replace('$OPTIONS$', json.dumps(options)) 81 | js_code = js_code.replace('$OPTIONS', json.dumps(options)) 82 | 83 | for key, value in options.items(): 84 | placeholder_key = f'${key.upper()}' 85 | placeholder_value = 'true' if value else 'false' 86 | js_code = js_code.replace(placeholder_key, placeholder_value) 87 | 88 | return js_code 89 | 90 | def _convert_nodriver_result(self, data): 91 | """Convert nodriver's array format back to dict""" 92 | if isinstance(data, list) and len(data) > 0 and isinstance(data[0], list): 93 | result = {} 94 | for item in data: 95 | if isinstance(item, list) and len(item) == 2: 96 | key = item[0] 97 | value_obj = item[1] 98 | if isinstance(value_obj, dict) and 'type' in value_obj: 99 | if value_obj['type'] == 'string': 100 | result[key] = value_obj.get('value', '') 101 | elif value_obj['type'] == 'number': 102 | result[key] = value_obj.get('value', 0) 103 | elif value_obj['type'] == 'null': 104 | result[key] = None 105 | elif value_obj['type'] == 'array': 106 | result[key] = value_obj.get('value', []) 107 | elif value_obj['type'] == 'object': 108 | result[key] = self._convert_nodriver_result(value_obj.get('value', [])) 109 | else: 110 | result[key] = value_obj.get('value') 111 | else: 112 | result[key] = value_obj 113 | return result 114 | return data 115 | 116 | async def extract_element_structure( 117 | self, 118 | tab, 119 | element=None, 120 | selector: str = None, 121 | include_children: bool = False, 122 | include_attributes: bool = True, 123 | include_data_attributes: bool = True, 124 | max_depth: int = 3 125 | ) -> Dict[str, Any]: 126 | """ 127 | Extract complete HTML structure and DOM information. 128 | 129 | Args: 130 | tab (Any): Browser tab instance 131 | element (Any): Element object or None to use selector 132 | selector (str): CSS selector if element is None 133 | include_children (bool): Include child elements 134 | include_attributes (bool): Include all attributes 135 | include_data_attributes (bool): Include data-* attributes specifically 136 | max_depth (int): Maximum depth for children extraction 137 | 138 | Returns: 139 | Dict[str, Any]: Dict with structure data 140 | """ 141 | try: 142 | if not selector: 143 | return {"error": "Selector is required"} 144 | 145 | options = { 146 | 'include_children': include_children, 147 | 'include_attributes': include_attributes, 148 | 'include_data_attributes': include_data_attributes, 149 | 'max_depth': max_depth 150 | } 151 | 152 | js_code = self._load_js_file('extract_structure.js', selector, options) 153 | structure_data = await tab.evaluate(js_code) 154 | 155 | if hasattr(structure_data, 'exception_details'): 156 | return {"error": f"JavaScript error: {structure_data.exception_details}"} 157 | elif isinstance(structure_data, dict): 158 | debug_logger.log_info("element_cloner", "extract_structure", f"Extracted structure for {structure_data.get('tag_name', 'unknown')} element") 159 | return structure_data 160 | elif isinstance(structure_data, list): 161 | result = self._convert_nodriver_result(structure_data) 162 | debug_logger.log_info("element_cloner", "extract_structure", f"Extracted structure for {result.get('tag_name', 'unknown')} element") 163 | return result 164 | else: 165 | debug_logger.log_warning("element_cloner", "extract_structure", f"Got unexpected type: {type(structure_data)}") 166 | return {"error": f"Unexpected return type: {type(structure_data)}", "raw_data": str(structure_data)} 167 | except Exception as e: 168 | debug_logger.log_error("element_cloner", "extract_structure", e) 169 | return {"error": str(e)} 170 | 171 | async def extract_element_events( 172 | self, 173 | tab, 174 | element=None, 175 | selector: str = None, 176 | include_inline: bool = True, 177 | include_listeners: bool = True, 178 | include_framework: bool = True, 179 | analyze_handlers: bool = True 180 | ) -> Dict[str, Any]: 181 | """ 182 | Extract complete event listener and JavaScript handler information. 183 | 184 | Args: 185 | tab (Any): Browser tab instance 186 | element (Any): Element object or None to use selector 187 | selector (str): CSS selector if element is None 188 | include_inline (bool): Include inline event handlers (onclick, etc.) 189 | include_listeners (bool): Include addEventListener attached handlers 190 | include_framework (bool): Include framework-specific handlers (React, Vue, etc.) 191 | analyze_handlers (bool): Analyze handler functions for details 192 | 193 | Returns: 194 | Dict[str, Any]: Dict with event data 195 | """ 196 | try: 197 | if not selector: 198 | return {"error": "Selector is required"} 199 | 200 | options = { 201 | 'include_inline': include_inline, 202 | 'include_listeners': include_listeners, 203 | 'include_framework': include_framework, 204 | 'analyze_handlers': analyze_handlers 205 | } 206 | 207 | js_code = self._load_js_file('extract_events.js', selector, options) 208 | event_data = await tab.evaluate(js_code) 209 | 210 | if hasattr(event_data, 'exception_details'): 211 | return {"error": f"JavaScript error: {event_data.exception_details}"} 212 | elif isinstance(event_data, dict): 213 | debug_logger.log_info("element_cloner", "extract_events", f"Extracted events for element") 214 | return event_data 215 | elif isinstance(event_data, list): 216 | result = self._convert_nodriver_result(event_data) 217 | debug_logger.log_info("element_cloner", "extract_events", f"Extracted events for element") 218 | return result 219 | else: 220 | debug_logger.log_warning("element_cloner", "extract_events", f"Got unexpected type: {type(event_data)}") 221 | return {"error": f"Unexpected return type: {type(event_data)}", "raw_data": str(event_data)} 222 | except Exception as e: 223 | debug_logger.log_error("element_cloner", "extract_events", e) 224 | return {"error": str(e)} 225 | 226 | async def extract_element_animations( 227 | self, 228 | tab, 229 | element=None, 230 | selector: str = None, 231 | include_css_animations: bool = True, 232 | include_transitions: bool = True, 233 | include_transforms: bool = True, 234 | analyze_keyframes: bool = True 235 | ) -> Dict[str, Any]: 236 | """ 237 | Extract CSS animations, transitions, and transforms. 238 | 239 | Args: 240 | tab (Any): Browser tab instance 241 | element (Any): Element object or None to use selector 242 | selector (str): CSS selector if element is None 243 | include_css_animations (bool): Include CSS @keyframes animations 244 | include_transitions (bool): Include CSS transitions 245 | include_transforms (bool): Include CSS transforms 246 | analyze_keyframes (bool): Analyze keyframe rules 247 | 248 | Returns: 249 | Dict[str, Any]: Dict with animation data 250 | """ 251 | try: 252 | if not selector: 253 | return {"error": "Selector is required"} 254 | 255 | options = { 256 | 'include_css_animations': include_css_animations, 257 | 'include_transitions': include_transitions, 258 | 'include_transforms': include_transforms, 259 | 'analyze_keyframes': analyze_keyframes 260 | } 261 | 262 | js_code = self._load_js_file('extract_animations.js', selector, options) 263 | animation_data = await tab.evaluate(js_code) 264 | 265 | if hasattr(animation_data, 'exception_details'): 266 | return {"error": f"JavaScript error: {animation_data.exception_details}"} 267 | elif isinstance(animation_data, dict): 268 | debug_logger.log_info("element_cloner", "extract_animations", f"Extracted animations for element") 269 | return animation_data 270 | elif isinstance(animation_data, list): 271 | result = self._convert_nodriver_result(animation_data) 272 | debug_logger.log_info("element_cloner", "extract_animations", f"Extracted animations for element") 273 | return result 274 | else: 275 | debug_logger.log_warning("element_cloner", "extract_animations", f"Got unexpected type: {type(animation_data)}") 276 | return {"error": f"Unexpected return type: {type(animation_data)}", "raw_data": str(animation_data)} 277 | except Exception as e: 278 | debug_logger.log_error("element_cloner", "extract_animations", e) 279 | return {"error": str(e)} 280 | 281 | async def extract_element_assets( 282 | self, 283 | tab, 284 | element=None, 285 | selector: str = None, 286 | include_images: bool = True, 287 | include_backgrounds: bool = True, 288 | include_fonts: bool = True, 289 | fetch_external: bool = False 290 | ) -> Dict[str, Any]: 291 | """ 292 | Extract all assets related to an element (images, fonts, etc.). 293 | 294 | Args: 295 | tab (Any): Browser tab instance 296 | element (Any): Element object or None to use selector 297 | selector (str): CSS selector if element is None 298 | include_images (bool): Include img src and related images 299 | include_backgrounds (bool): Include background images 300 | include_fonts (bool): Include font information 301 | fetch_external (bool): Whether to fetch external assets for analysis 302 | 303 | Returns: 304 | Dict[str, Any]: Dict with asset data 305 | """ 306 | try: 307 | if not selector: 308 | return {"error": "Selector is required"} 309 | 310 | js_dir = Path(__file__).parent / "js" 311 | js_file = js_dir / "extract_assets.js" 312 | 313 | if not js_file.exists(): 314 | return {"error": f"JavaScript file not found: {js_file}"} 315 | 316 | with open(js_file, 'r', encoding='utf-8') as f: 317 | js_code = f.read() 318 | 319 | js_code = js_code.replace('$SELECTOR', selector) 320 | js_code = js_code.replace('$INCLUDE_IMAGES', 'true' if include_images else 'false') 321 | js_code = js_code.replace('$INCLUDE_BACKGROUNDS', 'true' if include_backgrounds else 'false') 322 | js_code = js_code.replace('$INCLUDE_FONTS', 'true' if include_fonts else 'false') 323 | js_code = js_code.replace('$FETCH_EXTERNAL', 'true' if fetch_external else 'false') 324 | 325 | asset_data = await tab.evaluate(js_code) 326 | if hasattr(asset_data, 'exception_details'): 327 | return {"error": f"JavaScript error: {asset_data.exception_details}"} 328 | elif isinstance(asset_data, dict): 329 | pass 330 | elif isinstance(asset_data, list): 331 | # Convert nodriver's array format back to dict 332 | asset_data = self._convert_nodriver_result(asset_data) 333 | else: 334 | debug_logger.log_warning("element_cloner", "extract_assets", f"Got unexpected type: {type(asset_data)}") 335 | return {"error": f"Unexpected return type: {type(asset_data)}", "raw_data": str(asset_data)} 336 | 337 | if fetch_external and isinstance(asset_data, dict): 338 | asset_data['external_assets'] = {} 339 | for bg_img in asset_data.get('background_images', []): 340 | try: 341 | url = bg_img.get('url', '') 342 | if url.startswith('http'): 343 | response = requests.get(url, timeout=5) 344 | asset_data['external_assets'][url] = { 345 | 'content_type': response.headers.get('content-type'), 346 | 'size': len(response.content), 347 | 'status': response.status_code 348 | } 349 | except Exception as e: 350 | debug_logger.log_warning("element_cloner", "extract_assets", f"Could not fetch asset {url}: {e}") 351 | 352 | debug_logger.log_info("element_cloner", "extract_assets", f"Extracted assets for element") 353 | return asset_data 354 | except Exception as e: 355 | debug_logger.log_error("element_cloner", "extract_assets", e) 356 | return {"error": str(e)} 357 | 358 | async def extract_related_files( 359 | self, 360 | tab, 361 | element=None, 362 | selector: str = None, 363 | analyze_css: bool = True, 364 | analyze_js: bool = True, 365 | follow_imports: bool = False, 366 | max_depth: int = 2 367 | ) -> Dict[str, Any]: 368 | """ 369 | Discover and analyze related CSS/JS files for context. 370 | 371 | Args: 372 | tab (Any): Browser tab instance 373 | element (Any): Element object or None to use selector 374 | selector (str): CSS selector if element is None 375 | analyze_css (bool): Analyze linked CSS files 376 | analyze_js (bool): Analyze linked JS files 377 | follow_imports (bool): Follow @import and module imports 378 | max_depth (int): Maximum depth for following imports 379 | 380 | Returns: 381 | Dict[str, Any]: Dict with related file data 382 | """ 383 | try: 384 | js_dir = Path(__file__).parent / "js" 385 | js_file = js_dir / "extract_related_files.js" 386 | 387 | if not js_file.exists(): 388 | return {"error": f"JavaScript file not found: {js_file}"} 389 | 390 | with open(js_file, 'r', encoding='utf-8') as f: 391 | js_code = f.read() 392 | 393 | js_code = js_code.replace('$ANALYZE_CSS', 'true' if analyze_css else 'false') 394 | js_code = js_code.replace('$ANALYZE_JS', 'true' if analyze_js else 'false') 395 | js_code = js_code.replace('$FOLLOW_IMPORTS', 'true' if follow_imports else 'false') 396 | js_code = js_code.replace('$MAX_DEPTH', str(max_depth)) 397 | 398 | file_data = await tab.evaluate(js_code) 399 | if hasattr(file_data, 'exception_details'): 400 | return {"error": f"JavaScript error: {file_data.exception_details}"} 401 | elif isinstance(file_data, dict): 402 | pass 403 | elif isinstance(file_data, list): 404 | file_data = self._convert_nodriver_result(file_data) 405 | else: 406 | debug_logger.log_warning("element_cloner", "extract_related_files", f"Got unexpected type: {type(file_data)}") 407 | return {"error": f"Unexpected return type: {type(file_data)}", "raw_data": str(file_data)} 408 | 409 | if follow_imports and max_depth > 0 and isinstance(file_data, dict): 410 | await self._fetch_and_analyze_files(file_data, tab.url, max_depth) 411 | 412 | debug_logger.log_info("element_cloner", "extract_related_files", f"Found related files") 413 | return file_data 414 | except Exception as e: 415 | debug_logger.log_error("element_cloner", "extract_related_files", e) 416 | return {"error": str(e)} 417 | 418 | async def _fetch_and_analyze_files(self, file_data: Dict, base_url: str, max_depth: int) -> None: 419 | """ 420 | Fetch and analyze external CSS/JS files for additional context. 421 | 422 | Args: 423 | file_data (Dict): Data structure containing file info 424 | base_url (str): Base URL for resolving relative paths 425 | max_depth (int): Maximum depth for following imports 426 | 427 | Returns: 428 | None 429 | """ 430 | for stylesheet in file_data['stylesheets']: 431 | if stylesheet.get('href') and stylesheet['href'] not in self.extracted_files: 432 | try: 433 | response = requests.get(stylesheet['href'], timeout=10) 434 | if response.status_code == 200: 435 | content = response.text 436 | self.extracted_files[stylesheet['href']] = content 437 | imports = re.findall(r'@import\s+["\']([^"\']+)["\']', content) 438 | stylesheet['imports'] = [] 439 | for imp in imports: 440 | absolute_url = urljoin(stylesheet['href'], imp) 441 | stylesheet['imports'].append(absolute_url) 442 | css_vars = re.findall(r'--[\w-]+:\s*[^;]+', content) 443 | stylesheet['custom_properties'] = css_vars 444 | except Exception as e: 445 | debug_logger.log_warning("element_cloner", "fetch_css", f"Could not fetch CSS file {stylesheet.get('href')}: {e}") 446 | for script in file_data['scripts']: 447 | if script.get('src') and script['src'] not in self.extracted_files: 448 | try: 449 | response = requests.get(script['src'], timeout=10) 450 | if response.status_code == 200: 451 | content = response.text 452 | self.extracted_files[script['src']] = content 453 | script['detected_frameworks'] = [] 454 | for framework, patterns in self.framework_patterns.items(): 455 | for pattern in patterns: 456 | if re.search(pattern, content, re.IGNORECASE): 457 | if framework not in script['detected_frameworks']: 458 | script['detected_frameworks'].append(framework) 459 | imports = re.findall(r'import.*from\s+["\']([^"\']+)["\']', content) 460 | script['module_imports'] = imports 461 | except Exception as e: 462 | debug_logger.log_warning("element_cloner", "fetch_js", f"Could not fetch JS file {script.get('src')}: {e}") 463 | 464 | async def clone_element_complete( 465 | self, 466 | tab, 467 | element=None, 468 | selector: str = None, 469 | extraction_options: Dict[str, Any] = None 470 | ) -> Dict[str, Any]: 471 | """ 472 | Master function that extracts all element data using specialized functions. 473 | 474 | Args: 475 | tab (Any): Browser tab instance 476 | element (Any): Element object or None to use selector 477 | selector (str): CSS selector if element is None 478 | extraction_options (Dict[str, Any]): Dict specifying what to extract and options for each 479 | Example: { 480 | 'styles': {'include_computed': True, 'include_pseudo': True}, 481 | 'structure': {'include_children': True, 'max_depth': 2}, 482 | 'events': {'include_framework': True, 'analyze_handlers': True}, 483 | 'animations': {'analyze_keyframes': True}, 484 | 'assets': {'fetch_external': True}, 485 | 'related_files': {'follow_imports': True, 'max_depth': 1} 486 | } 487 | 488 | Returns: 489 | Dict[str, Any]: Complete element clone data 490 | """ 491 | try: 492 | default_options = { 493 | 'styles': {'include_computed': True, 'include_css_rules': True, 'include_pseudo': True}, 494 | 'structure': {'include_children': False, 'include_attributes': True}, 495 | 'events': {'include_framework': True, 'analyze_handlers': False}, 496 | 'animations': {'analyze_keyframes': True}, 497 | 'assets': {'fetch_external': False}, 498 | 'related_files': {'follow_imports': False} 499 | } 500 | if extraction_options: 501 | for key, value in extraction_options.items(): 502 | if key in default_options: 503 | default_options[key].update(value) 504 | else: 505 | default_options[key] = value 506 | if element is None and selector: 507 | element = await tab.select(selector) 508 | if not element: 509 | return {"error": "Element not found"} 510 | result = { 511 | "url": tab.url, 512 | "timestamp": asyncio.get_event_loop().time(), 513 | "selector": selector, 514 | "extraction_options": default_options 515 | } 516 | tasks = [] 517 | if 'styles' in default_options: 518 | tasks.append(('styles', self.extract_element_styles(tab, element, **default_options['styles']))) 519 | if 'structure' in default_options: 520 | tasks.append(('structure', self.extract_element_structure(tab, element, **default_options['structure']))) 521 | if 'events' in default_options: 522 | tasks.append(('events', self.extract_element_events(tab, element, **default_options['events']))) 523 | if 'animations' in default_options: 524 | tasks.append(('animations', self.extract_element_animations(tab, element, **default_options['animations']))) 525 | if 'assets' in default_options: 526 | tasks.append(('assets', self.extract_element_assets(tab, element, **default_options['assets']))) 527 | if 'related_files' in default_options: 528 | tasks.append(('related_files', self.extract_related_files(tab, **default_options['related_files']))) 529 | results = await asyncio.gather(*[task[1] for task in tasks], return_exceptions=True) 530 | for i, (name, _) in enumerate(tasks): 531 | if isinstance(results[i], Exception): 532 | result[name] = {"error": str(results[i])} 533 | else: 534 | result[name] = results[i] 535 | debug_logger.log_info("element_cloner", "clone_complete", f"Complete element clone extracted with {len(tasks)} data types") 536 | return result 537 | except Exception as e: 538 | debug_logger.log_error("element_cloner", "clone_complete", e) 539 | return {"error": str(e)} 540 | 541 | async def extract_element_styles_cdp( 542 | self, 543 | tab, 544 | element=None, 545 | selector: str = None, 546 | include_computed: bool = True, 547 | include_css_rules: bool = True, 548 | include_pseudo: bool = True, 549 | include_inheritance: bool = False 550 | ) -> Dict[str, Any]: 551 | """ 552 | Extract complete styling information using direct CDP calls (no JavaScript evaluation). 553 | This prevents hanging issues by using nodriver's native CDP methods. 554 | 555 | Args: 556 | tab (Any): Browser tab instance 557 | element (Any): Element object or None to use selector 558 | selector (str): CSS selector if element is None 559 | include_computed (bool): Include computed styles 560 | include_css_rules (bool): Include matching CSS rules 561 | include_pseudo (bool): Include pseudo-element styles 562 | include_inheritance (bool): Include style inheritance chain 563 | 564 | Returns: 565 | Dict[str, Any]: Dict with styling data 566 | """ 567 | try: 568 | import nodriver.cdp as cdp 569 | 570 | await tab.send(cdp.dom.enable()) 571 | await tab.send(cdp.css.enable()) 572 | 573 | if element is None and selector: 574 | element = await tab.select(selector) 575 | if not element: 576 | return {"error": "Element not found"} 577 | 578 | if hasattr(element, 'node_id'): 579 | node_id = element.node_id 580 | elif hasattr(element, 'backend_node_id'): 581 | node_info = await tab.send(cdp.dom.describe_node(backend_node_id=element.backend_node_id)) 582 | node_id = node_info.node.node_id 583 | else: 584 | return {"error": "Could not get node ID from element"} 585 | 586 | result = {"method": "cdp_direct"} 587 | 588 | if include_computed: 589 | debug_logger.log_info("element_cloner", "extract_styles_cdp", "Getting computed styles via CDP") 590 | computed_styles_list = await tab.send(cdp.css.get_computed_style_for_node(node_id)) 591 | result["computed_styles"] = {prop.name: prop.value for prop in computed_styles_list} 592 | 593 | if include_css_rules: 594 | debug_logger.log_info("element_cloner", "extract_styles_cdp", "Getting matched styles via CDP") 595 | matched_styles = await tab.send(cdp.css.get_matched_styles_for_node(node_id)) 596 | 597 | # Extract CSS rules from matched styles 598 | result["css_rules"] = [] 599 | if matched_styles[2]: # matchedCSSRules 600 | for rule_match in matched_styles[2]: 601 | if rule_match.rule and rule_match.rule.style: 602 | result["css_rules"].append({ 603 | "selector": rule_match.rule.selector_list.text if rule_match.rule.selector_list else "unknown", 604 | "css_text": rule_match.rule.style.css_text or "", 605 | "source": rule_match.rule.origin.value if rule_match.rule.origin else "unknown" 606 | }) 607 | 608 | # Add inline styles if present 609 | if matched_styles[0]: # inlineStyle 610 | result["inline_style"] = { 611 | "css_text": matched_styles[0].css_text or "", 612 | "properties": len(matched_styles[0].css_properties) if matched_styles[0].css_properties else 0 613 | } 614 | 615 | # Add attribute styles if present 616 | if matched_styles[1]: # attributesStyle 617 | result["attributes_style"] = { 618 | "css_text": matched_styles[1].css_text or "", 619 | "properties": len(matched_styles[1].css_properties) if matched_styles[1].css_properties else 0 620 | } 621 | 622 | # Handle pseudo elements (if available in matched_styles) 623 | if include_pseudo and len(matched_styles) > 3 and matched_styles[3]: 624 | result["pseudo_elements"] = {} 625 | for pseudo_match in matched_styles[3]: 626 | if pseudo_match.pseudo_type: 627 | result["pseudo_elements"][pseudo_match.pseudo_type.value] = { 628 | "matches": len(pseudo_match.matches) if pseudo_match.matches else 0 629 | } 630 | 631 | # Handle inheritance (if available in matched_styles) 632 | if include_inheritance and len(matched_styles) > 4 and matched_styles[4]: 633 | result["inheritance_chain"] = [] 634 | for inherited_entry in matched_styles[4]: 635 | if inherited_entry.inline_style: 636 | result["inheritance_chain"].append({ 637 | "inline_css": inherited_entry.inline_style.css_text or "", 638 | "properties": len(inherited_entry.inline_style.css_properties) if inherited_entry.inline_style.css_properties else 0 639 | }) 640 | 641 | debug_logger.log_info("element_cloner", "extract_styles_cdp", f"CDP extraction completed with {len(result.get('css_rules', []))} CSS rules") 642 | return result 643 | 644 | except Exception as e: 645 | debug_logger.log_error("element_cloner", "extract_styles_cdp", e) 646 | return {"error": f"CDP extraction failed: {str(e)}"} 647 | 648 | element_cloner = ElementCloner() 649 | ``` -------------------------------------------------------------------------------- /src/cdp_function_executor.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | CDP Function Executor - Direct JavaScript function execution via Chrome DevTools Protocol 3 | 4 | This module provides comprehensive function execution capabilities using nodriver's CDP access: 5 | 1. Direct CDP command execution 6 | 2. JavaScript function discovery and execution 7 | 3. Dynamic script injection and execution 8 | 4. Python-JavaScript bridge functionality 9 | """ 10 | 11 | import asyncio 12 | import json 13 | import uuid 14 | import inspect 15 | from typing import Dict, List, Any, Optional, Callable, Union 16 | from datetime import datetime 17 | 18 | import nodriver as uc 19 | from nodriver import Tab 20 | 21 | from debug_logger import debug_logger 22 | 23 | 24 | class ExecutionContext: 25 | """Represents a JavaScript execution context.""" 26 | 27 | def __init__(self, id: str, name: str, origin: str, unique_id: str, aux_data: dict = None): 28 | """ 29 | Args: 30 | id (str): Execution context identifier. 31 | name (str): Name of the context. 32 | origin (str): Origin URL of the context. 33 | unique_id (str): Unique identifier for the context. 34 | aux_data (dict, optional): Auxiliary data for the context. 35 | """ 36 | self.id = id 37 | self.name = name 38 | self.origin = origin 39 | self.unique_id = unique_id 40 | self.aux_data = aux_data or {} 41 | 42 | 43 | class FunctionInfo: 44 | """Information about a discovered JavaScript function.""" 45 | 46 | def __init__(self, name: str, path: str, signature: str = None, description: str = None): 47 | """ 48 | Args: 49 | name (str): Function name. 50 | path (str): Path to the function (e.g., "window.document.getElementById"). 51 | signature (str, optional): Function signature. 52 | description (str, optional): Description of the function. 53 | """ 54 | self.name = name 55 | self.path = path 56 | self.signature = signature 57 | self.description = description 58 | 59 | 60 | class FunctionCall: 61 | """Represents a function call to be executed.""" 62 | 63 | def __init__(self, function_path: str, args: List[Any] = None, context_id: str = None): 64 | """ 65 | Args: 66 | function_path (str): Path to the function. 67 | args (List[Any], optional): Arguments to pass to the function. 68 | context_id (str, optional): Execution context identifier. 69 | """ 70 | self.function_path = function_path 71 | self.args = args or [] 72 | self.context_id = context_id 73 | 74 | 75 | class CDPFunctionExecutor: 76 | """Main class for CDP-based function execution.""" 77 | 78 | def __init__(self): 79 | """ 80 | Initializes the CDPFunctionExecutor instance. 81 | """ 82 | self._python_bindings: Dict[str, Callable] = {} 83 | self._persistent_functions: Dict[str, Dict[str, str]] = {} 84 | 85 | async def enable_runtime(self, tab: Tab) -> bool: 86 | """ 87 | Enables CDP Runtime domain for a tab. 88 | 89 | Args: 90 | tab (Tab): The browser tab. 91 | 92 | Returns: 93 | bool: True if enabled, False otherwise. 94 | """ 95 | try: 96 | await tab.send(uc.cdp.runtime.enable()) 97 | debug_logger.log_info("cdp_function_executor", "enable_runtime", f"Runtime enabled for tab") 98 | return True 99 | except Exception as e: 100 | debug_logger.log_error("cdp_function_executor", "enable_runtime", e) 101 | return False 102 | 103 | async def list_cdp_commands(self) -> List[str]: 104 | """ 105 | Lists all available CDP Runtime commands. 106 | 107 | Returns: 108 | List[str]: List of command names. 109 | """ 110 | commands = [ 111 | "evaluate", "callFunctionOn", "addBinding", "removeBinding", 112 | "compileScript", "runScript", "awaitPromise", "getProperties", 113 | "getExceptionDetails", "globalLexicalScopeNames", "queryObjects", 114 | "releaseObject", "releaseObjectGroup", "terminateExecution", 115 | "setAsyncCallStackDepth", "setCustomObjectFormatterEnabled", 116 | "setMaxCallStackSizeToCapture", "runIfWaitingForDebugger", 117 | "discardConsoleEntries", "getHeapUsage", "getIsolateId" 118 | ] 119 | return commands 120 | 121 | async def execute_cdp_command(self, tab: Tab, command: str, params: Dict[str, Any]) -> Dict[str, Any]: 122 | """ 123 | Executes any CDP Runtime command with given parameters. 124 | 125 | Args: 126 | tab (Tab): The browser tab. 127 | command (str): CDP command name. 128 | params (Dict[str, Any]): Parameters for the command. 129 | 130 | Returns: 131 | Dict[str, Any]: Result of the command execution. 132 | """ 133 | try: 134 | await self.enable_runtime(tab) 135 | cdp_method = getattr(uc.cdp.runtime, command, None) 136 | if not cdp_method: 137 | raise ValueError(f"Unknown CDP command: {command}") 138 | result = await tab.send(cdp_method(**params)) 139 | debug_logger.log_info("cdp_function_executor", "execute_cdp_command", f"Executed {command} with params: {params}") 140 | return { 141 | "success": True, 142 | "result": result, 143 | "command": command, 144 | "params": params 145 | } 146 | except Exception as e: 147 | debug_logger.log_error("cdp_function_executor", "execute_cdp_command", e) 148 | return { 149 | "success": False, 150 | "error": str(e), 151 | "command": command, 152 | "params": params 153 | } 154 | 155 | async def get_execution_contexts(self, tab: Tab) -> List[ExecutionContext]: 156 | """ 157 | Gets all available execution contexts. 158 | 159 | Args: 160 | tab (Tab): The browser tab. 161 | 162 | Returns: 163 | List[ExecutionContext]: List of execution contexts. 164 | """ 165 | try: 166 | await self.enable_runtime(tab) 167 | script = """ 168 | (function() { 169 | return { 170 | location: window.location.href, 171 | title: document.title, 172 | readyState: document.readyState, 173 | contexts: [{ 174 | name: 'main', 175 | origin: window.location.origin, 176 | url: window.location.href 177 | }] 178 | }; 179 | })() 180 | """ 181 | result = await tab.send(uc.cdp.runtime.evaluate( 182 | expression=script, 183 | return_by_value=True, 184 | await_promise=True 185 | )) 186 | if result and result[0] and result[0].value: 187 | context_data = result[0].value 188 | contexts = [] 189 | for i, ctx in enumerate(context_data.get('contexts', [])): 190 | contexts.append(ExecutionContext( 191 | id=str(i), 192 | name=ctx['name'], 193 | origin=ctx['origin'], 194 | unique_id=f"{ctx['origin']}_{i}" 195 | )) 196 | return contexts 197 | return [] 198 | except Exception as e: 199 | debug_logger.log_error("cdp_function_executor", "get_execution_contexts", e) 200 | return [] 201 | 202 | async def discover_global_functions(self, tab: Tab, context_id: str = None) -> List[FunctionInfo]: 203 | """ 204 | Discovers all global JavaScript functions. 205 | 206 | Args: 207 | tab (Tab): The browser tab. 208 | context_id (str, optional): Execution context identifier. 209 | 210 | Returns: 211 | List[FunctionInfo]: List of discovered functions. 212 | """ 213 | try: 214 | await self.enable_runtime(tab) 215 | discovery_script = """ 216 | (function() { 217 | const functions = []; 218 | function isFunction(obj) { 219 | return typeof obj === 'function'; 220 | } 221 | function discoverFunctions(obj, path = '', depth = 0) { 222 | if (depth > 3) return; 223 | try { 224 | for (const key of Object.getOwnPropertyNames(obj)) { 225 | if (key.startsWith('_') || key === 'constructor') continue; 226 | try { 227 | const value = obj[key]; 228 | const fullPath = path ? `${path}.${key}` : key; 229 | if (isFunction(value)) { 230 | functions.push({ 231 | name: key, 232 | path: fullPath, 233 | signature: value.toString().split('{')[0].trim(), 234 | description: `Function at ${fullPath}` 235 | }); 236 | } else if (typeof value === 'object' && value !== null && depth < 2) { 237 | discoverFunctions(value, fullPath, depth + 1); 238 | } 239 | } catch (e) { 240 | } 241 | } 242 | } catch (e) { 243 | } 244 | } 245 | discoverFunctions(window, 'window'); 246 | discoverFunctions(document, 'document'); 247 | discoverFunctions(console, 'console'); 248 | const globalFuncs = ['setTimeout', 'setInterval', 'clearTimeout', 'clearInterval', 249 | 'fetch', 'alert', 'confirm', 'prompt', 'parseInt', 'parseFloat']; 250 | for (const funcName of globalFuncs) { 251 | if (typeof window[funcName] === 'function') { 252 | functions.push({ 253 | name: funcName, 254 | path: funcName, 255 | signature: window[funcName].toString().split('{')[0].trim(), 256 | description: `Global function ${funcName}` 257 | }); 258 | } 259 | } 260 | return functions; 261 | })() 262 | """ 263 | result = await tab.send(uc.cdp.runtime.evaluate( 264 | expression=discovery_script, 265 | return_by_value=True, 266 | await_promise=True 267 | )) 268 | if result and result[0] and result[0].value: 269 | functions_data = result[0].value 270 | functions = [] 271 | for func_data in functions_data: 272 | functions.append(FunctionInfo( 273 | name=func_data['name'], 274 | path=func_data['path'], 275 | signature=func_data.get('signature'), 276 | description=func_data.get('description') 277 | )) 278 | return functions 279 | return [] 280 | except Exception as e: 281 | debug_logger.log_error("cdp_function_executor", "discover_global_functions", e) 282 | return [] 283 | 284 | async def discover_object_methods(self, tab: Tab, object_path: str) -> List[FunctionInfo]: 285 | """ 286 | Discovers methods of a specific JavaScript object. 287 | 288 | Args: 289 | tab (Tab): The browser tab. 290 | object_path (str): Path to the JavaScript object. 291 | 292 | Returns: 293 | List[FunctionInfo]: List of discovered methods. 294 | """ 295 | try: 296 | await self.enable_runtime(tab) 297 | 298 | object_result = await tab.send(uc.cdp.runtime.evaluate( 299 | expression=object_path, 300 | return_by_value=False 301 | )) 302 | 303 | if not object_result or not object_result[0] or not object_result[0].object_id: 304 | debug_logger.log_warning("cdp_function_executor", "discover_object_methods", f"Could not get object reference for {object_path}") 305 | return [] 306 | 307 | object_id = object_result[0].object_id 308 | 309 | properties_result = await tab.send(uc.cdp.runtime.get_properties( 310 | object_id=object_id, 311 | own_properties=False, 312 | accessor_properties_only=False 313 | )) 314 | 315 | if not properties_result or not properties_result[0]: 316 | debug_logger.log_warning("cdp_function_executor", "discover_object_methods", f"No properties returned for {object_path}") 317 | return [] 318 | 319 | properties = properties_result[0] 320 | methods = [] 321 | 322 | for prop in properties: 323 | try: 324 | if prop.value and prop.value.type_ == "function": 325 | methods.append(FunctionInfo( 326 | name=prop.name, 327 | path=f'{object_path}.{prop.name}', 328 | signature=prop.value.description or f"function {prop.name}()", 329 | description=f"Method {prop.name} of {object_path}" 330 | )) 331 | except Exception as e: 332 | debug_logger.log_warning("cdp_function_executor", "discover_object_methods", f"Error processing property {prop.name}: {e}") 333 | continue 334 | 335 | debug_logger.log_info("cdp_function_executor", "discover_object_methods", f"Found {len(methods)} methods for {object_path}") 336 | return methods 337 | except Exception as e: 338 | debug_logger.log_error("cdp_function_executor", "discover_object_methods", e) 339 | return [] 340 | 341 | async def call_discovered_function(self, tab: Tab, function_path: str, args: List[Any]) -> Dict[str, Any]: 342 | """ 343 | Calls a discovered JavaScript function with arguments. 344 | 345 | Args: 346 | tab (Tab): The browser tab. 347 | function_path (str): Path to the function. 348 | args (List[Any]): Arguments to pass. 349 | 350 | Returns: 351 | Dict[str, Any]: Result of the function call. 352 | """ 353 | try: 354 | await self.enable_runtime(tab) 355 | js_args = json.dumps(args) if args else '[]' 356 | call_script = f""" 357 | (function() {{ 358 | try {{ 359 | const pathParts = '{function_path}'.split('.'); 360 | let context = window; 361 | let func = window; 362 | 363 | for (let i = 0; i < pathParts.length; i++) {{ 364 | if (i === pathParts.length - 1) {{ 365 | func = context[pathParts[i]]; 366 | }} else {{ 367 | context = context[pathParts[i]]; 368 | func = context; 369 | }} 370 | }} 371 | 372 | if (typeof func !== 'function') {{ 373 | throw new Error('Not a function: {function_path}'); 374 | }} 375 | 376 | const args = {js_args}; 377 | const result = func.apply(context, args); 378 | return {{ 379 | success: true, 380 | result: result, 381 | function_path: '{function_path}', 382 | args: args 383 | }}; 384 | }} catch (error) {{ 385 | return {{ 386 | success: false, 387 | error: error.message, 388 | function_path: '{function_path}', 389 | args: {js_args} 390 | }}; 391 | }} 392 | }})() 393 | """ 394 | result = await tab.send(uc.cdp.runtime.evaluate( 395 | expression=call_script, 396 | return_by_value=True, 397 | await_promise=True 398 | )) 399 | if result and result[0] and result[0].value: 400 | return result[0].value 401 | elif result and result[1]: 402 | return { 403 | "success": False, 404 | "error": f"Runtime exception: {result[1].text}", 405 | "function_path": function_path, 406 | "args": args 407 | } 408 | return { 409 | "success": False, 410 | "error": "No result returned", 411 | "function_path": function_path, 412 | "args": args 413 | } 414 | except Exception as e: 415 | debug_logger.log_error("cdp_function_executor", "call_discovered_function", e) 416 | return { 417 | "success": False, 418 | "error": str(e), 419 | "function_path": function_path, 420 | "args": args 421 | } 422 | 423 | async def inspect_function_signature(self, tab: Tab, function_path: str) -> Dict[str, Any]: 424 | """ 425 | Inspects a function's signature and details. 426 | 427 | Args: 428 | tab (Tab): The browser tab. 429 | function_path (str): Path to the function. 430 | 431 | Returns: 432 | Dict[str, Any]: Signature and details of the function. 433 | """ 434 | try: 435 | await self.enable_runtime(tab) 436 | inspect_script = f""" 437 | (function() {{ 438 | try {{ 439 | const func = {function_path}; 440 | if (typeof func !== 'function') {{ 441 | return {{ 442 | success: false, 443 | error: 'Not a function: {function_path}' 444 | }}; 445 | }} 446 | return {{ 447 | success: true, 448 | name: func.name || 'anonymous', 449 | path: '{function_path}', 450 | signature: func.toString(), 451 | length: func.length, 452 | is_async: func.constructor.name === 'AsyncFunction', 453 | is_generator: func.constructor.name === 'GeneratorFunction' 454 | }}; 455 | }} catch (error) {{ 456 | return {{ 457 | success: false, 458 | error: error.message 459 | }}; 460 | }} 461 | }})() 462 | """ 463 | result = await tab.send(uc.cdp.runtime.evaluate( 464 | expression=inspect_script, 465 | return_by_value=True, 466 | await_promise=True 467 | )) 468 | if result and result[0] and result[0].value: 469 | return result[0].value 470 | return {"success": False, "error": "No result returned"} 471 | except Exception as e: 472 | debug_logger.log_error("cdp_function_executor", "inspect_function_signature", e) 473 | return {"success": False, "error": str(e)} 474 | 475 | async def inject_and_execute_script(self, tab: Tab, script_code: str, context_id: str = None) -> Dict[str, Any]: 476 | """ 477 | Injects and executes custom JavaScript code. 478 | 479 | Args: 480 | tab (Tab): The browser tab. 481 | script_code (str): JavaScript code to execute. 482 | context_id (str, optional): Execution context identifier. 483 | 484 | Returns: 485 | Dict[str, Any]: Result of script execution. 486 | """ 487 | try: 488 | await self.enable_runtime(tab) 489 | wrapped_script = f""" 490 | (function() {{ 491 | try {{ 492 | const result = (function() {{ 493 | {script_code} 494 | }})(); 495 | return {{ 496 | success: true, 497 | result: result, 498 | executed_at: new Date().toISOString() 499 | }}; 500 | }} catch (error) {{ 501 | return {{ 502 | success: false, 503 | error: error.message, 504 | stack: error.stack, 505 | executed_at: new Date().toISOString() 506 | }}; 507 | }} 508 | }})() 509 | """ 510 | result = await tab.send(uc.cdp.runtime.evaluate( 511 | expression=wrapped_script, 512 | return_by_value=True, 513 | await_promise=True, 514 | allow_unsafe_eval_blocked_by_csp=True 515 | )) 516 | if result and result[0] and result[0].value: 517 | return result[0].value 518 | elif result and result[1]: 519 | return { 520 | "success": False, 521 | "error": f"Runtime exception: {result[1].text}", 522 | "line_number": result[1].line_number, 523 | "column_number": result[1].column_number 524 | } 525 | return {"success": False, "error": "No result returned"} 526 | except Exception as e: 527 | debug_logger.log_error("cdp_function_executor", "inject_and_execute_script", e) 528 | return {"success": False, "error": str(e)} 529 | 530 | async def create_persistent_function(self, tab: Tab, function_name: str, function_code: str, instance_id: str) -> Dict[str, Any]: 531 | """ 532 | Creates a persistent JavaScript function that survives page reloads. 533 | 534 | Args: 535 | tab (Tab): The browser tab. 536 | function_name (str): Name of the function. 537 | function_code (str): JavaScript code for the function. 538 | instance_id (str): Instance identifier. 539 | 540 | Returns: 541 | Dict[str, Any]: Result of function creation. 542 | """ 543 | try: 544 | await self.enable_runtime(tab) 545 | if instance_id not in self._persistent_functions: 546 | self._persistent_functions[instance_id] = {} 547 | self._persistent_functions[instance_id][function_name] = function_code 548 | create_script = f""" 549 | (function() {{ 550 | try {{ 551 | window.{function_name} = {function_code}; 552 | return {{ 553 | success: true, 554 | function_name: '{function_name}', 555 | created_at: new Date().toISOString(), 556 | available_as: 'window.{function_name}' 557 | }}; 558 | }} catch (error) {{ 559 | return {{ 560 | success: false, 561 | error: error.message, 562 | function_name: '{function_name}' 563 | }}; 564 | }} 565 | }})() 566 | """ 567 | result = await tab.send(uc.cdp.runtime.evaluate( 568 | expression=create_script, 569 | return_by_value=True, 570 | await_promise=True 571 | )) 572 | if result and result[0] and result[0].value: 573 | return result[0].value 574 | return {"success": False, "error": "Failed to create function"} 575 | except Exception as e: 576 | debug_logger.log_error("cdp_function_executor", "create_persistent_function", e) 577 | return {"success": False, "error": str(e)} 578 | 579 | async def execute_function_sequence(self, tab: Tab, function_calls: List[FunctionCall]) -> List[Dict[str, Any]]: 580 | """ 581 | Executes a sequence of function calls. 582 | 583 | Args: 584 | tab (Tab): The browser tab. 585 | function_calls (List[FunctionCall]): List of function calls to execute. 586 | 587 | Returns: 588 | List[Dict[str, Any]]: Results of each function call. 589 | """ 590 | results = [] 591 | for i, func_call in enumerate(function_calls): 592 | try: 593 | debug_logger.log_info("cdp_function_executor", "execute_function_sequence", f"Executing call {i+1}/{len(function_calls)}: {func_call.function_path}") 594 | result = await self.call_discovered_function( 595 | tab, 596 | func_call.function_path, 597 | func_call.args 598 | ) 599 | results.append({ 600 | "sequence_index": i, 601 | "function_call": { 602 | "function_path": func_call.function_path, 603 | "args": func_call.args, 604 | "context_id": func_call.context_id 605 | }, 606 | "result": result 607 | }) 608 | except Exception as e: 609 | debug_logger.log_error("cdp_function_executor", "execute_function_sequence", e) 610 | results.append({ 611 | "sequence_index": i, 612 | "function_call": { 613 | "function_path": func_call.function_path, 614 | "args": func_call.args, 615 | "context_id": func_call.context_id 616 | }, 617 | "result": { 618 | "success": False, 619 | "error": str(e) 620 | } 621 | }) 622 | return results 623 | 624 | async def create_python_binding(self, tab: Tab, binding_name: str, python_function: Callable) -> Dict[str, Any]: 625 | """ 626 | Creates a binding that allows JavaScript to call Python functions. 627 | 628 | Args: 629 | tab (Tab): The browser tab. 630 | binding_name (str): Name of the binding. 631 | python_function (Callable): Python function to bind. 632 | 633 | Returns: 634 | Dict[str, Any]: Result of binding creation. 635 | """ 636 | try: 637 | await self.enable_runtime(tab) 638 | self._python_bindings[binding_name] = python_function 639 | await tab.send(uc.cdp.runtime.add_binding(name=binding_name)) 640 | wrapper_script = f""" 641 | (function() {{ 642 | if (!window.{binding_name}) {{ 643 | window.{binding_name} = function(...args) {{ 644 | return new Promise((resolve, reject) => {{ 645 | const callId = Math.random().toString(36).substr(2, 9); 646 | window.addEventListener(`{binding_name}_response_${{callId}}`, function(event) {{ 647 | if (event.detail.success) {{ 648 | resolve(event.detail.result); 649 | }} else {{ 650 | reject(new Error(event.detail.error)); 651 | }} 652 | }}, {{ once: true }}); 653 | window.chrome.runtime.sendMessage({{ 654 | binding: '{binding_name}', 655 | args: args, 656 | callId: callId 657 | }}); 658 | }}); 659 | }}; 660 | }} 661 | return {{ 662 | success: true, 663 | binding_name: '{binding_name}', 664 | available_as: 'window.{binding_name}' 665 | }}; 666 | }})() 667 | """ 668 | result = await tab.send(uc.cdp.runtime.evaluate( 669 | expression=wrapper_script, 670 | return_by_value=True, 671 | await_promise=True 672 | )) 673 | if result and result[0] and result[0].value: 674 | return result[0].value 675 | return {"success": False, "error": "Failed to create binding"} 676 | except Exception as e: 677 | debug_logger.log_error("cdp_function_executor", "create_python_binding", e) 678 | return {"success": False, "error": str(e)} 679 | 680 | async def execute_python_in_browser(self, tab: Tab, python_code: str) -> Dict[str, Any]: 681 | """ 682 | Executes Python code by translating it to JavaScript with timeout protection. 683 | 684 | Args: 685 | tab (Tab): The browser tab. 686 | python_code (str): Python code to execute. 687 | 688 | Returns: 689 | Dict[str, Any]: Result of execution. 690 | """ 691 | try: 692 | js_code = self._translate_python_to_js(python_code) 693 | debug_logger.log_info("cdp_function_executor", "execute_python_in_browser", f"Translated JS: {js_code}") 694 | 695 | import asyncio 696 | result = await asyncio.wait_for( 697 | self.inject_and_execute_script(tab, js_code), 698 | timeout=10.0 699 | ) 700 | return result 701 | except asyncio.TimeoutError: 702 | return {"success": False, "error": "Python execution timeout - code may have infinite loop or syntax error"} 703 | except Exception as e: 704 | debug_logger.log_error("cdp_function_executor", "execute_python_in_browser", e) 705 | return {"success": False, "error": str(e)} 706 | 707 | def _translate_python_to_js(self, python_code: str) -> str: 708 | """ 709 | Professional Python to JavaScript translation using py2js library. 710 | 711 | Args: 712 | python_code (str): Python code to translate. 713 | 714 | Returns: 715 | str: Translated JavaScript code. 716 | """ 717 | try: 718 | import py2js 719 | 720 | js_code = py2js.convert(python_code) 721 | debug_logger.log_info("cdp_function_executor", "_translate_python_to_js", f"py2js generated: {js_code}") 722 | 723 | lines = python_code.strip().split('\n') 724 | last_line = lines[-1].strip() if lines else "" 725 | 726 | if (last_line and 727 | '=' not in last_line and 728 | not last_line.startswith(('def ', 'class ', 'if ', 'for ', 'while ', 'try:', 'with ', 'import ', 'from '))): 729 | 730 | wrapped_code = f"(() => {{ {js_code}; return {last_line}; }})()" 731 | return wrapped_code 732 | else: 733 | return f"(() => {{ {js_code}; }})()" 734 | 735 | except ImportError: 736 | debug_logger.log_warning("cdp_function_executor", "_translate_python_to_js", "py2js not available, using fallback") 737 | return self._fallback_python_to_js(python_code) 738 | except Exception as e: 739 | debug_logger.log_error("cdp_function_executor", "_translate_python_to_js", e, {"python_code": python_code}) 740 | return self._fallback_python_to_js(python_code) 741 | 742 | def _fallback_python_to_js(self, python_code: str) -> str: 743 | """ 744 | Fallback Python to JavaScript translation for basic cases. 745 | 746 | Args: 747 | python_code (str): Python code to translate. 748 | 749 | Returns: 750 | str: Basic translated JavaScript code. 751 | """ 752 | import re 753 | 754 | lines = python_code.strip().split('\n') 755 | js_lines = [] 756 | 757 | for line in lines: 758 | js_line = line 759 | 760 | replacements = { 761 | "True": "true", 762 | "False": "false", 763 | "None": "null", 764 | "print(": "console.log(", 765 | ".append(": ".push(", 766 | } 767 | 768 | for py_syntax, js_syntax in replacements.items(): 769 | js_line = js_line.replace(py_syntax, js_syntax) 770 | 771 | if '=' in js_line and not js_line.strip().startswith('//'): 772 | if re.match(r'^\s*[a-zA-Z_][a-zA-Z0-9_]*\s*=', js_line): 773 | js_line = re.sub(r'^(\s*)([a-zA-Z_][a-zA-Z0-9_]*\s*=)', r'\1let \2', js_line) 774 | 775 | js_lines.append(js_line) 776 | 777 | js_code = ";\n".join(js_lines) + ";" 778 | 779 | last_line = lines[-1].strip() if lines else "" 780 | if last_line and '=' not in last_line and not last_line.endswith(':'): 781 | js_code = js_code.rsplit(';', 2)[0] + f"; return {last_line};" 782 | 783 | wrapped_code = f"(function() {{ {js_code} }})()" 784 | 785 | return wrapped_code 786 | 787 | async def call_python_from_js(self, binding_name: str, args: List[Any]) -> Dict[str, Any]: 788 | """ 789 | Handles JavaScript calls to Python functions. 790 | 791 | Args: 792 | binding_name (str): Name of the Python binding. 793 | args (List[Any]): Arguments to pass to the Python function. 794 | 795 | Returns: 796 | Dict[str, Any]: Result of the Python function call. 797 | """ 798 | try: 799 | if binding_name not in self._python_bindings: 800 | return {"success": False, "error": f"Unknown binding: {binding_name}"} 801 | python_function = self._python_bindings[binding_name] 802 | if asyncio.iscoroutinefunction(python_function): 803 | result = await python_function(*args) 804 | else: 805 | result = python_function(*args) 806 | return { 807 | "success": True, 808 | "result": result, 809 | "binding_name": binding_name, 810 | "args": args 811 | } 812 | except Exception as e: 813 | debug_logger.log_error("cdp_function_executor", "call_python_from_js", e) 814 | return { 815 | "success": False, 816 | "error": str(e), 817 | "binding_name": binding_name, 818 | "args": args 819 | } 820 | 821 | async def get_function_executor_info(self, instance_id: str = None) -> Dict[str, Any]: 822 | """ 823 | Gets information about the function executor state. 824 | 825 | Args: 826 | instance_id (str, optional): Instance identifier. 827 | 828 | Returns: 829 | Dict[str, Any]: Information about the executor. 830 | """ 831 | return { 832 | "python_bindings": list(self._python_bindings.keys()), 833 | "persistent_functions": self._persistent_functions.get(instance_id, {}) if instance_id else self._persistent_functions, 834 | "available_commands": await self.list_cdp_commands(), 835 | "executor_version": "1.0.0", 836 | "capabilities": [ 837 | "direct_cdp_execution", 838 | "function_discovery", 839 | "dynamic_script_injection", 840 | "python_js_bridge" 841 | ] 842 | } ```