This is page 2 of 4. Use http://codebase.md/vibheksoni/stealth-browser-mcp?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .dockerignore ├── .github │ ├── FUNDING.yml │ ├── ISSUE_TEMPLATE │ │ ├── bug_report.md │ │ ├── config.yml │ │ ├── feature_request.md │ │ └── showcase.yml │ ├── labeler.yml │ ├── pull_request_template.md │ └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── Checklist.md ├── CODE_OF_CONDUCT.md ├── CODEOWNERS ├── COMPARISON.md ├── CONTRIBUTING.md ├── demo │ ├── augment-hero-clone.md │ ├── augment-hero-recreation.html │ └── README.md ├── Dockerfile ├── examples │ └── claude_prompts.md ├── HALL_OF_FAME.md ├── LICENSE ├── media │ ├── AugmentHeroClone.PNG │ ├── Showcase Stealth Browser Mcp.mp4 │ ├── showcase-demo-full.gif │ ├── showcase-demo.gif │ └── UndetectedStealthBrowser.png ├── pyproject.toml ├── README.md ├── requirements.txt ├── ROADMAP.md ├── run_server.bat ├── run_server.sh ├── SECURITY.md ├── smithery.yaml └── src ├── __init__.py ├── browser_manager.py ├── cdp_element_cloner.py ├── cdp_function_executor.py ├── comprehensive_element_cloner.py ├── debug_logger.py ├── dom_handler.py ├── dynamic_hook_ai_interface.py ├── dynamic_hook_system.py ├── element_cloner.py ├── file_based_element_cloner.py ├── hook_learning_system.py ├── js │ ├── comprehensive_element_extractor.js │ ├── extract_animations.js │ ├── extract_assets.js │ ├── extract_events.js │ ├── extract_related_files.js │ ├── extract_structure.js │ └── extract_styles.js ├── models.py ├── network_interceptor.py ├── persistent_storage.py ├── platform_utils.py ├── process_cleanup.py ├── progressive_element_cloner.py ├── response_handler.py ├── response_stage_hooks.py └── server.py ``` # Files -------------------------------------------------------------------------------- /Checklist.md: -------------------------------------------------------------------------------- ```markdown 1 | # Browser Automation MCP Testing Checklist 2 | 3 | ## ✅ **TESTED AND WORKING** 4 | 5 | ### Core Browser Management 6 | - ✅ `spawn_browser` - Creates new browser instances (FIXED v0.2.4: root user support, flexible args parsing, platform-aware configuration) 7 | - ✅ `navigate` - Navigate to URLs 8 | - ✅ `close_instance` - Close browser instances 9 | - ✅ `list_instances` - List all browser instances 10 | - ✅ `get_instance_state` - Get browser instance details 11 | 12 | ### Element Extraction Functions 13 | - ✅ `extract_element_styles` - Extract CSS styles (CDP implementation, fixed hanging) 14 | - ✅ `extract_element_structure` - Extract DOM structure (fixed JS template issues) 15 | - ✅ `extract_element_events` - Extract event handlers (fixed JS template issues) 16 | - ✅ `extract_element_animations` - Extract CSS animations/transitions (created new JS file) 17 | - ✅ `extract_element_assets` - Extract element assets (fixed tab.evaluate() args, now uses external JS with file fallback) 18 | - ✅ `extract_related_files` - Extract related CSS/JS files (fixed tab.evaluate() args, now uses external JS with file fallback) 19 | 20 | ### File-Based Extraction Functions 21 | - ✅ `extract_element_styles_to_file` - Save styles to file 22 | - ✅ `extract_element_structure_to_file` - Save structure to file 23 | - ✅ `extract_element_events_to_file` - Save events to file (fixed list/dict error) 24 | - ✅ `extract_element_animations_to_file` - Save animations to file 25 | - ✅ `extract_element_assets_to_file` - Save assets to file 26 | 27 | ### Complete Element Cloning 28 | - ✅ `clone_element_complete` - Complete element cloning (with file fallback) 29 | - ✅ `extract_complete_element_to_file` - Complete extraction to file 30 | - ✅ `extract_complete_element_cdp` - CDP-based complete extraction 31 | 32 | ### Progressive Element Cloning 33 | - ✅ `clone_element_progressive` - Progressive cloning system 34 | - ✅ `expand_styles` - Expand styles data for stored element 35 | - ✅ `expand_events` - Expand events data 36 | - ✅ `expand_children` - Expand children data (fixed "unhashable type: 'slice'" error, now has response handler) 37 | - ✅ `expand_css_rules` - Expand CSS rules data 38 | - ✅ `expand_pseudo_elements` - Expand pseudo-elements data 39 | - ✅ `expand_animations` - Expand animations data 40 | - ✅ `list_stored_elements` - List stored elements 41 | - ✅ `clear_stored_element` - Clear specific stored element 42 | - ✅ `clear_all_elements` - Clear all stored elements 43 | 44 | ### CDP Function Executor 45 | - ✅ `discover_global_functions` - Discover JS functions (with file fallback, fixed schema) 46 | - ✅ `discover_object_methods` - Discover object methods (fixed to use CDP get_properties instead of JavaScript Object.getOwnPropertyNames, now returns 93+ methods, wrapped with response handler) 47 | - ✅ `call_javascript_function` - Call JS functions (fixed illegal invocation) 48 | - ✅ `inject_and_execute_script` - Execute custom JS code 49 | - ✅ `inspect_function_signature` - Inspect function details 50 | - ✅ `create_persistent_function` - Create persistent functions 51 | - ✅ `execute_function_sequence` - Execute function sequences (handles mixed success/failure) 52 | - ✅ `create_python_binding` - Create Python-JS bindings 53 | - ✅ `get_execution_contexts` - Get JS execution contexts 54 | - ✅ `list_cdp_commands` - List available CDP commands 55 | - ✅ `execute_cdp_command` - Execute raw CDP commands (IMPORTANT: use snake_case params like "return_by_value", not camelCase "returnByValue") 56 | - ✅ `get_function_executor_info` - Get executor info 57 | 58 | ### File Management 59 | - ✅ `list_clone_files` - List saved clone files 60 | - ✅ `cleanup_clone_files` - Clean up old files (deleted 15 files) 61 | 62 | ### System Functions 63 | - ✅ `hot_reload` - Hot reload modules (implied working) 64 | - ✅ `reload_status` - Check reload status (shows module load status) 65 | - ✅ `get_debug_view` - Get debug information (fixed with pagination) 66 | - ✅ `clear_debug_view` - Clear debug logs (fixed with timeout protection) 67 | - ✅ `validate_browser_environment_tool` - **NEW v0.2.4!** Environment diagnostics & platform validation 68 | 69 | ### Basic Browser Interactions 70 | - ✅ `go_back` - Navigate back in history 71 | - ✅ `go_forward` - Navigate forward in history 72 | - ✅ `reload_page` - Reload current page 73 | 74 | ### Element Interaction 75 | - ✅ `query_elements` - Find elements by selector 76 | - ✅ `click_element` - Click on elements 77 | - ✅ `type_text` - Type text into input fields (ENHANCED: added parse_newlines parameter for Enter key handling) 78 | - ✅ `paste_text` - **NEW!** Instant text pasting via CDP insert_text (10x faster than typing) 79 | - ✅ `select_option` - Select dropdown options (fixed string index conversion & proper nodriver usage) 80 | - ✅ `get_element_state` - Get element properties 81 | - ✅ `wait_for_element` - Wait for element to appear 82 | 83 | ### Page Interaction 84 | - ✅ `scroll_page` - Scroll the page 85 | - ✅ `execute_script` - Execute JavaScript 86 | - ✅ `get_page_content` - Get page HTML/text (with large response file handling) 87 | - ✅ `take_screenshot` - Take page screenshots 88 | 89 | ### Network Operations 90 | - ✅ `list_network_requests` - List captured network requests 91 | - ✅ `get_request_details` - Get request details (working properly) 92 | - ✅ `get_response_details` - Get response details (working properly) 93 | - ✅ `get_response_content` - Get response body (fixed RequestId object) 94 | - ✅ `modify_headers` - Modify request headers (fixed Headers object) 95 | 96 | ### Cookie Management 97 | - ✅ `get_cookies` - Get page cookies 98 | - ✅ `set_cookie` - Set cookie values (fixed url/domain requirement per nodriver docs) 99 | - ✅ `clear_cookies` - Clear cookies (fixed proper CDP methods) 100 | 101 | ### Tab Management 102 | - ✅ `list_tabs` - List all tabs 103 | - ✅ `switch_tab` - Switch to specific tab 104 | - ✅ `get_active_tab` - Get active tab info 105 | - ✅ `new_tab` - Open new tab 106 | - ✅ `close_tab` - Close specific tab 107 | 108 | ## ✅ **ALL FUNCTIONS WORKING** 109 | 110 | ### CDP Advanced Functions 111 | - ✅ `execute_python_in_browser` - Execute Python in browser (FIXED! Now uses proper py2js transpiler - functions, loops work; classes have minor edge cases) 112 | 113 | ### File Management 114 | - ✅ `export_debug_logs` - Export debug information (FIXED! Lock-free fallback with ownership tracking) 115 | 116 | ### Dynamic Network Hook System (NEW!) 117 | - ✅ `create_dynamic_hook` - Create AI-generated Python function hooks (tested with block, redirect, conditional logic) 118 | - ✅ `create_simple_dynamic_hook` - Create template-based hooks (block, redirect, add_headers, log actions) 119 | - ✅ `list_dynamic_hooks` - List all dynamic hooks with statistics (shows hook details and match counts) 120 | - ✅ `get_dynamic_hook_details` - Get detailed hook information (shows function code and config) 121 | - ✅ `remove_dynamic_hook` - Remove dynamic hooks (removes hook by ID) 122 | - ✅ `get_hook_documentation` - Get documentation for creating hook functions (AI learning) 123 | - ✅ `get_hook_examples` - Get example hook functions (10 detailed examples for AI) 124 | - ✅ `get_hook_requirements_documentation` - Get hook requirements docs (matching criteria) 125 | - ✅ `get_hook_common_patterns` - Get common hook patterns (ad blocking, API proxying, etc.) 126 | - ✅ `validate_hook_function` - Validate hook function code (syntax checking) 127 | 128 | **TESTED HOOK TYPES:** 129 | - ✅ **Block Hook** - Successfully blocks matching URLs (shows chrome-error page) 130 | - ✅ **Network-level Redirect** - Changes content while preserving original URL 131 | - ✅ **HTTP Redirect** - Proper 302 redirect with URL bar update 132 | - ✅ **Response Content Replacement** - Full response body modification (JSON → "Testing" text) 133 | - ✅ **Response Header Injection** - Add custom headers to responses 134 | - ✅ **Request/Response Stage Processing** - Both request and response interception working 135 | - ✅ **AI-Generated Functions** - Custom Python logic for complex request processing 136 | 137 | ## 🔧 **FIXED ISSUES** 138 | 139 | 1. **CSS Extraction Hanging** → Replaced with CDP implementation 140 | 2. **JavaScript Template Errors** → Fixed template substitution in external JS files 141 | 3. **Events File Extraction Error** → Fixed framework handlers list/dict processing 142 | 4. **Large Response Errors** → Added automatic file fallback system 143 | 5. **JavaScript Function Call Binding** → Fixed context binding for methods 144 | 6. **Schema Validation Error** → Fixed return types to match expected schemas 145 | 7. **Select Option Input Validation** → Fixed string to int conversion for index parameter 146 | 8. **Set Cookie URL/Domain Required** → Added url parameter and fallback logic per nodriver docs 147 | 9. **Get Page Content Large Response** → Wrapped with response handler for automatic file saving 148 | 10. **Get Response Content Error** → Fixed RequestId object creation and tuple result handling 149 | 11. **Modify Headers Error** → Fixed Headers object creation for CDP 150 | 12. **Clear Cookies List Error** → Fixed proper CDP methods and cookie object handling 151 | 13. **Extract Element Assets/Related Files Tab.evaluate() Args** → Fixed functions to use external JS files with template substitution instead of multiple arguments 152 | 14. **Large Response Auto-Save** → Added response handler wrapper to extract_element_assets and extract_related_files 153 | 15. **Debug Functions Hanging** → Added pagination and timeout protection (get_debug_view ✅, clear_debug_view ✅, export_debug_logs ✅) 154 | 16. **Execute Python in Browser Hanging & Translation Errors** → Fixed with proper py2js transpiler from am230/py2js - now handles functions, loops, variables correctly with only minor class edge cases 155 | 17. **Export Debug Logs Lock Deadlock** → Fixed with lock-free fallback and ownership tracking - now works perfectly ✅ 156 | 18. **Broken Network Hook Functions** → Removed 13 incomplete/broken functions (create_request_hook, create_response_hook, etc.) that called non-existent methods - moved to oldstuff/old_funcs.py for reference 157 | 19. **Root User Browser Spawning** → Fixed "Failed to connect to browser" when running as root/administrator with auto-detection ✅ 158 | 20. **Args Parameter JSON Validation** → Fixed "Input validation error" for JSON string args format with flexible parsing ✅ 159 | 21. **Container Environment Compatibility** → Added Docker/Kubernetes support with auto-detection and required arguments ✅ 160 | 22. **Cross-Platform Browser Configuration** → Enhanced Windows/Linux/macOS support with platform-aware argument merging ✅ 161 | 162 | ## 📊 **TESTING SUMMARY** 163 | 164 | - **Total Functions**: 90 functions 165 | - **Tested & Working**: 90 functions ✅ 166 | - **Functions with Issues**: 0 functions ❌ 167 | - **Major Issues Fixed**: 22 critical issues resolved 168 | - **Success Rate**: 100% 🎯 🚀 169 | 170 | **LATEST ACHIEVEMENTS:** 171 | ✅ **Cross-Platform Compatibility & Root Support (v0.2.4)** - Smart environment detection, automatic privilege handling, flexible args parsing, and comprehensive platform diagnostics 172 | 173 | ✅ **Advanced Text Input System (v0.2.3)** - Lightning-fast `paste_text()` via CDP and enhanced `type_text()` with newline parsing for complex multi-line form automation 174 | 175 | ✅ **Complete Dynamic Hook System with Response-Stage Processing** - AI-powered network interception system with real-time processing, no pending state, custom Python function support, and full response content modification capability 176 | 177 | ## 🎯 **POTENTIAL FUTURE ENHANCEMENTS** 178 | 179 | 1. **Advanced Hook Patterns** - More complex conditional logic examples 180 | 2. **Hook Performance Optimization** - Load testing with multiple patterns 181 | 3. **Machine Learning Integration** - AI-driven request pattern analysis 182 | 4. **Hook Templates** - Pre-built patterns for common use cases 183 | 5. **Multi-instance Hook Coordination** - Synchronized browser fleet management 184 | 185 | ## ✅ **COMPLETED ENHANCEMENTS (v0.2.4)** 186 | 187 | ### 🛡️ **Cross-Platform & Root User Support** 188 | - ✅ **Smart Environment Detection** - Auto-detects root/admin, containers, OS differences 189 | - ✅ **Platform-Aware Browser Configuration** - Automatic sandbox handling based on environment 190 | - ✅ **Flexible Args Parsing** - Supports JSON arrays, JSON strings, and single strings 191 | - ✅ **Container Compatibility** - Docker/Kubernetes detection with required arguments 192 | - ✅ **Chrome Discovery** - Automatic Chrome/Chromium executable detection 193 | - ✅ **Environment Diagnostics** - New validation tool for pre-flight checks 194 | - ✅ **Enhanced Error Messages** - Platform-specific guidance and solutions 195 | 196 | ### 📊 **Technical Implementation** 197 | - ✅ **`platform_utils.py` Module** - Comprehensive cross-platform utility functions 198 | - ✅ **`is_running_as_root()`** - Cross-platform privilege detection 199 | - ✅ **`is_running_in_container()`** - Container environment detection 200 | - ✅ **`merge_browser_args()`** - Smart argument merging with platform requirements 201 | - ✅ **`validate_browser_environment()`** - Complete environment validation 202 | - ✅ **Enhanced spawn_browser()** - Multi-format args parsing with platform integration 203 | 204 | ## ✅ **COMPLETED ENHANCEMENTS (v0.2.1)** 205 | 206 | - ✅ **Response-Stage Processing** - Content modification hooks (IMPLEMENTED & TESTED) 207 | - ✅ **Hook Chain Processing** - Multiple hooks on same request with priority system (IMPLEMENTED) 208 | - ✅ **Response Body Modification** - AI can completely replace response content (IMPLEMENTED & TESTED) 209 | - ✅ **Response Headers Parsing Fix** - Proper CDP response header handling (FIXED) 210 | - ✅ **Base64 Encoding Support** - Binary content support for fulfill requests (IMPLEMENTED) ``` -------------------------------------------------------------------------------- /src/cdp_element_cloner.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Enhanced Element Cloner using proper CDP methods 3 | ================================================= 4 | 5 | This module provides comprehensive element extraction using the full power of 6 | Chrome DevTools Protocol (CDP) through nodriver. It extracts: 7 | 8 | 1. Complete computed styles using CDP CSS.getComputedStyleForNode 9 | 2. Matched CSS rules using CDP CSS.getMatchedStylesForNode 10 | 3. Event listeners using CDP DOMDebugger.getEventListeners 11 | 4. All stylesheet information via CDP CSS domain 12 | 5. Complete DOM structure and attributes 13 | 14 | This provides 100% accurate element cloning by using CDP's native capabilities 15 | instead of limited JavaScript-based extraction. 16 | """ 17 | 18 | import asyncio 19 | import json 20 | from datetime import datetime 21 | from typing import Dict, List, Any, Optional 22 | 23 | import nodriver as uc 24 | from debug_logger import debug_logger 25 | 26 | 27 | class CDPElementCloner: 28 | """Enhanced element cloner using proper CDP methods for complete accuracy.""" 29 | 30 | def __init__(self): 31 | """Initialize the CDP element cloner.""" 32 | 33 | async def extract_complete_element_cdp( 34 | self, 35 | tab, 36 | selector: str, 37 | include_children: bool = True 38 | ) -> Dict[str, Any]: 39 | """ 40 | Extract complete element data using proper CDP methods. 41 | 42 | Args: 43 | tab (Any): The nodriver tab object for CDP communication. 44 | selector (str): CSS selector for the target element. 45 | include_children (bool): Whether to include child elements. 46 | 47 | Returns: 48 | Dict[str, Any]: Extraction result containing element data, styles, event listeners, and stats. 49 | 50 | This provides 100% accurate element cloning by using CDP's native 51 | capabilities for CSS rules, event listeners, and style information. 52 | """ 53 | try: 54 | debug_logger.log_info("cdp_cloner", "extract_complete", f"Starting CDP extraction for {selector}") 55 | await tab.send(uc.cdp.dom.enable()) 56 | await tab.send(uc.cdp.css.enable()) 57 | await tab.send(uc.cdp.runtime.enable()) 58 | doc = await tab.send(uc.cdp.dom.get_document()) 59 | nodes = await tab.send(uc.cdp.dom.query_selector_all(doc.node_id, selector)) 60 | if not nodes: 61 | return {"error": f"Element not found: {selector}"} 62 | node_id = nodes[0] 63 | element_html = await self._get_element_html(tab, node_id) 64 | computed_styles = await self._get_computed_styles_cdp(tab, node_id) 65 | matched_styles = await self._get_matched_styles_cdp(tab, node_id) 66 | event_listeners = await self._get_event_listeners_cdp(tab, node_id) 67 | children = [] 68 | if include_children: 69 | children = await self._get_children_cdp(tab, node_id) 70 | result = { 71 | "extraction_method": "CDP", 72 | "timestamp": datetime.now().isoformat(), 73 | "selector": selector, 74 | "url": tab.target.url, 75 | "element": { 76 | "html": element_html, 77 | "computed_styles": computed_styles, 78 | "matched_styles": matched_styles, 79 | "event_listeners": event_listeners, 80 | "children": children 81 | }, 82 | "extraction_stats": { 83 | "computed_styles_count": len(computed_styles), 84 | "css_rules_count": len(matched_styles.get("matchedCSSRules", [])), 85 | "event_listeners_count": len(event_listeners), 86 | "children_count": len(children) 87 | } 88 | } 89 | debug_logger.log_info("cdp_cloner", "extract_complete", f"CDP extraction completed successfully") 90 | return result 91 | except Exception as e: 92 | debug_logger.log_error("cdp_cloner", "extract_complete", f"CDP extraction failed: {str(e)}") 93 | return {"error": f"CDP extraction failed: {str(e)}"} 94 | 95 | async def _get_element_html(self, tab, node_id) -> Dict[str, Any]: 96 | """ 97 | Get element's HTML structure and attributes. 98 | 99 | Args: 100 | tab (Any): The nodriver tab object for CDP communication. 101 | node_id (Any): Node ID of the target element. 102 | 103 | Returns: 104 | Dict[str, Any]: Dictionary containing tag name, node info, outer HTML, and attributes. 105 | """ 106 | try: 107 | node_details = await tab.send(uc.cdp.dom.describe_node(node_id=node_id)) 108 | outer_html = await tab.send(uc.cdp.dom.get_outer_html(node_id=node_id)) 109 | return { 110 | "tagName": node_details.tag_name, 111 | "nodeId": int(node_id), 112 | "nodeName": node_details.node_name, 113 | "localName": node_details.local_name, 114 | "nodeValue": node_details.node_value, 115 | "outerHTML": outer_html, 116 | "attributes": [ 117 | {"name": node_details.attributes[i], "value": node_details.attributes[i+1]} 118 | for i in range(0, len(node_details.attributes or []), 2) 119 | ] if node_details.attributes else [] 120 | } 121 | except Exception as e: 122 | debug_logger.log_error("cdp_cloner", "_get_element_html", f"Failed: {str(e)}") 123 | return {"error": str(e)} 124 | 125 | async def _get_computed_styles_cdp(self, tab, node_id) -> Dict[str, str]: 126 | """ 127 | Get complete computed styles using CDP CSS.getComputedStyleForNode. 128 | 129 | Args: 130 | tab (Any): The nodriver tab object for CDP communication. 131 | node_id (Any): Node ID of the target element. 132 | 133 | Returns: 134 | Dict[str, str]: Dictionary of computed style properties and their values. 135 | """ 136 | try: 137 | computed_styles_list = await tab.send(uc.cdp.css.get_computed_style_for_node(node_id)) 138 | styles = {} 139 | for style_prop in computed_styles_list: 140 | styles[style_prop.name] = style_prop.value 141 | debug_logger.log_info("cdp_cloner", "_get_computed_styles", f"Got {len(styles)} computed styles") 142 | return styles 143 | except Exception as e: 144 | debug_logger.log_error("cdp_cloner", "_get_computed_styles", f"Failed: {str(e)}") 145 | return {} 146 | 147 | async def _get_matched_styles_cdp(self, tab, node_id) -> Dict[str, Any]: 148 | """ 149 | Get matched CSS rules using CDP CSS.getMatchedStylesForNode. 150 | 151 | Args: 152 | tab (Any): The nodriver tab object for CDP communication. 153 | node_id (Any): Node ID of the target element. 154 | 155 | Returns: 156 | Dict[str, Any]: Dictionary containing inline style, attribute style, matched rules, pseudo elements, and inherited styles. 157 | """ 158 | try: 159 | matched_result = await tab.send(uc.cdp.css.get_matched_styles_for_node(node_id)) 160 | inline_style, attributes_style, matched_rules, pseudo_elements, inherited = matched_result[:5] 161 | result = { 162 | "inlineStyle": self._css_style_to_dict(inline_style) if inline_style else None, 163 | "attributesStyle": self._css_style_to_dict(attributes_style) if attributes_style else None, 164 | "matchedCSSRules": [self._rule_match_to_dict(rule) for rule in (matched_rules or [])], 165 | "pseudoElements": [self._pseudo_element_to_dict(pe) for pe in (pseudo_elements or [])], 166 | "inherited": [self._inherited_style_to_dict(inh) for inh in (inherited or [])] 167 | } 168 | debug_logger.log_info("cdp_cloner", "_get_matched_styles", 169 | f"Got {len(result['matchedCSSRules'])} CSS rules") 170 | return result 171 | except Exception as e: 172 | debug_logger.log_error("cdp_cloner", "_get_matched_styles", f"Failed: {str(e)}") 173 | return {} 174 | 175 | async def _get_event_listeners_cdp(self, tab, node_id) -> List[Dict[str, Any]]: 176 | """ 177 | Get event listeners using CDP DOMDebugger.getEventListeners. 178 | 179 | Args: 180 | tab (Any): The nodriver tab object for CDP communication. 181 | node_id (Any): Node ID of the target element. 182 | 183 | Returns: 184 | List[Dict[str, Any]]: List of dictionaries describing event listeners. 185 | """ 186 | try: 187 | remote_object = await tab.send(uc.cdp.dom.resolve_node(node_id=node_id)) 188 | if not remote_object or not remote_object.object_id: 189 | return [] 190 | event_listeners = await tab.send( 191 | uc.cdp.dom_debugger.get_event_listeners(remote_object.object_id) 192 | ) 193 | result = [] 194 | for listener in event_listeners: 195 | result.append({ 196 | "type": listener.type_, 197 | "useCapture": listener.use_capture, 198 | "passive": listener.passive, 199 | "once": listener.once, 200 | "scriptId": str(listener.script_id), 201 | "lineNumber": listener.line_number, 202 | "columnNumber": listener.column_number, 203 | "hasHandler": listener.handler is not None, 204 | "hasOriginalHandler": listener.original_handler is not None, 205 | "backendNodeId": int(listener.backend_node_id) if listener.backend_node_id else None 206 | }) 207 | debug_logger.log_info("cdp_cloner", "_get_event_listeners", 208 | f"Got {len(result)} event listeners") 209 | return result 210 | except Exception as e: 211 | debug_logger.log_error("cdp_cloner", "_get_event_listeners", f"Failed: {str(e)}") 212 | return [] 213 | 214 | async def _get_children_cdp(self, tab, node_id) -> List[Dict[str, Any]]: 215 | """ 216 | Get child elements using CDP. 217 | 218 | Args: 219 | tab (Any): The nodriver tab object for CDP communication. 220 | node_id (Any): Node ID of the parent element. 221 | 222 | Returns: 223 | List[Dict[str, Any]]: List of dictionaries containing child element HTML and computed styles. 224 | """ 225 | try: 226 | await tab.send(uc.cdp.dom.request_child_nodes(node_id=node_id, depth=1)) 227 | node_details = await tab.send(uc.cdp.dom.describe_node(node_id=node_id, depth=1)) 228 | children = [] 229 | if node_details.children: 230 | for child in node_details.children: 231 | if child.node_type == 1: 232 | child_html = await self._get_element_html(tab, child.node_id) 233 | child_computed = await self._get_computed_styles_cdp(tab, child.node_id) 234 | children.append({ 235 | "html": child_html, 236 | "computed_styles": child_computed, 237 | "depth": 1 238 | }) 239 | return children 240 | except Exception as e: 241 | debug_logger.log_error("cdp_cloner", "_get_children", f"Failed: {str(e)}") 242 | return [] 243 | 244 | def _css_style_to_dict(self, css_style) -> Dict[str, Any]: 245 | """ 246 | Convert CDP CSSStyle to dictionary. 247 | 248 | Args: 249 | css_style (Any): CDP CSSStyle object. 250 | 251 | Returns: 252 | Dict[str, Any]: Dictionary containing cssText and list of properties. 253 | """ 254 | if not css_style: 255 | return {} 256 | return { 257 | "cssText": css_style.css_text_ or "", 258 | "properties": [ 259 | { 260 | "name": prop.name, 261 | "value": prop.value, 262 | "important": prop.important, 263 | "implicit": prop.implicit, 264 | "text": prop.text or "", 265 | "parsedOk": prop.parsed_ok, 266 | "disabled": prop.disabled 267 | } 268 | for prop in css_style.css_properties_ 269 | ] 270 | } 271 | 272 | def _rule_match_to_dict(self, rule_match) -> Dict[str, Any]: 273 | """ 274 | Convert CDP RuleMatch to dictionary. 275 | 276 | Args: 277 | rule_match (Any): CDP RuleMatch object. 278 | 279 | Returns: 280 | Dict[str, Any]: Dictionary describing the rule match. 281 | """ 282 | return { 283 | "matchingSelectors": rule_match.matching_selectors, 284 | "rule": { 285 | "selectorText": rule_match.rule.selector_list.text if rule_match.rule.selector_list else "", 286 | "origin": str(rule_match.rule.origin), 287 | "style": self._css_style_to_dict(rule_match.rule.style), 288 | "styleSheetId": str(rule_match.rule.style_sheet_id_) if rule_match.rule.style_sheet_id_ else None 289 | } 290 | } 291 | 292 | def _pseudo_element_to_dict(self, pseudo_element) -> Dict[str, Any]: 293 | """ 294 | Convert CDP PseudoElementMatches to dictionary. 295 | 296 | Args: 297 | pseudo_element (Any): CDP PseudoElementMatches object. 298 | 299 | Returns: 300 | Dict[str, Any]: Dictionary describing the pseudo element matches. 301 | """ 302 | return { 303 | "pseudoType": str(pseudo_element.pseudo_type), 304 | "pseudoIdentifier": pseudo_element.pseudo_identifier_, 305 | "matches": [self._rule_match_to_dict(match) for match in pseudo_element.matches_] 306 | } 307 | 308 | def _inherited_style_to_dict(self, inherited_style) -> Dict[str, Any]: 309 | """ 310 | Convert CDP InheritedStyleEntry to dictionary. 311 | 312 | Args: 313 | inherited_style (Any): CDP InheritedStyleEntry object. 314 | 315 | Returns: 316 | Dict[str, Any]: Dictionary describing inherited styles. 317 | """ 318 | return { 319 | "inlineStyle": self._css_style_to_dict(inherited_style.inline_style) if inherited_style.inline_style else None, 320 | "matchedCSSRules": [self._rule_match_to_dict(rule) for rule in inherited_style.matched_css_rules] 321 | } ``` -------------------------------------------------------------------------------- /src/network_interceptor.py: -------------------------------------------------------------------------------- ```python 1 | """Network interception and traffic monitoring using CDP.""" 2 | 3 | import asyncio 4 | import base64 5 | from datetime import datetime 6 | from typing import Any, Dict, List, Optional 7 | 8 | import nodriver as uc 9 | from nodriver import Tab 10 | 11 | from models import NetworkRequest, NetworkResponse 12 | 13 | 14 | class NetworkInterceptor: 15 | """Intercepts and manages network traffic for browser instances.""" 16 | 17 | def __init__(self): 18 | self._requests: Dict[str, NetworkRequest] = {} 19 | self._responses: Dict[str, NetworkResponse] = {} 20 | self._instance_requests: Dict[str, List[str]] = {} 21 | self._lock = asyncio.Lock() 22 | 23 | async def setup_interception(self, tab: Tab, instance_id: str, block_resources: List[str] = None): 24 | """ 25 | Set up network interception for a tab. 26 | 27 | tab: Tab - The browser tab to intercept. 28 | instance_id: str - The browser instance identifier. 29 | block_resources: List[str] - List of resource types or URL patterns to block. 30 | """ 31 | try: 32 | await tab.send(uc.cdp.network.enable()) 33 | 34 | if block_resources: 35 | # Convert resource types to URL patterns for blocking 36 | url_patterns = [] 37 | for resource_type in block_resources: 38 | # Map resource types to URL patterns that typically identify these resources 39 | resource_patterns = { 40 | 'image': ['*.jpg', '*.jpeg', '*.png', '*.gif', '*.webp', '*.svg', '*.bmp', '*.ico'], 41 | 'stylesheet': ['*.css'], 42 | 'font': ['*.woff', '*.woff2', '*.ttf', '*.otf', '*.eot'], 43 | 'script': ['*.js', '*.mjs'], 44 | 'media': ['*.mp4', '*.mp3', '*.wav', '*.avi', '*.webm'] 45 | } 46 | 47 | if resource_type.lower() in resource_patterns: 48 | url_patterns.extend(resource_patterns[resource_type.lower()]) 49 | print(f"[DEBUG] Added URL patterns for {resource_type}: {resource_patterns[resource_type.lower()]}") 50 | else: 51 | # Assume it's already a URL pattern 52 | url_patterns.append(resource_type) 53 | print(f"[DEBUG] Added custom URL pattern: {resource_type}") 54 | 55 | # Use network.set_blocked_ur_ls to block the URL patterns 56 | if url_patterns: 57 | await tab.send(uc.cdp.network.set_blocked_ur_ls(urls=url_patterns)) 58 | print(f"[DEBUG] Blocked {len(url_patterns)} URL patterns: {url_patterns}") 59 | 60 | tab.add_handler( 61 | uc.cdp.network.RequestWillBeSent, 62 | lambda event: asyncio.create_task(self._on_request(event, instance_id)), 63 | ) 64 | tab.add_handler( 65 | uc.cdp.network.ResponseReceived, 66 | lambda event: asyncio.create_task(self._on_response(event, instance_id)), 67 | ) 68 | 69 | async with self._lock: 70 | if instance_id not in self._instance_requests: 71 | self._instance_requests[instance_id] = [] 72 | except Exception as e: 73 | print(f"[DEBUG] Error in setup_interception: {e}") 74 | raise Exception(f"Failed to setup network interception: {str(e)}") 75 | 76 | async def _on_request(self, event, instance_id: str): 77 | """ 78 | Handle request event. 79 | 80 | event: Any - The event object containing request data. 81 | instance_id: str - The browser instance identifier. 82 | """ 83 | try: 84 | request_id = event.request_id 85 | request = event.request 86 | cookies = {} 87 | if hasattr(request, "headers") and "Cookie" in request.headers: 88 | cookie_str = request.headers["Cookie"] 89 | for cookie in cookie_str.split("; "): 90 | if "=" in cookie: 91 | key, value = cookie.split("=", 1) 92 | cookies[key] = value 93 | network_request = NetworkRequest( 94 | request_id=request_id, 95 | instance_id=instance_id, 96 | url=request.url, 97 | method=request.method, 98 | headers=dict(request.headers) if hasattr(request, "headers") else {}, 99 | cookies=cookies, 100 | post_data=request.post_data if hasattr(request, "post_data") else None, 101 | resource_type=event.type if hasattr(event, "type") else None, 102 | ) 103 | async with self._lock: 104 | self._requests[request_id] = network_request 105 | self._instance_requests[instance_id].append(request_id) 106 | except Exception: 107 | pass 108 | 109 | async def _on_response(self, event, instance_id: str): 110 | """ 111 | Handle response event. 112 | 113 | event: Any - The event object containing response data. 114 | instance_id: str - The browser instance identifier. 115 | """ 116 | try: 117 | request_id = event.request_id 118 | response = event.response 119 | network_response = NetworkResponse( 120 | request_id=request_id, 121 | status=response.status, 122 | headers=dict(response.headers) if hasattr(response, "headers") else {}, 123 | content_type=response.mime_type if hasattr(response, "mime_type") else None, 124 | ) 125 | async with self._lock: 126 | self._responses[request_id] = network_response 127 | except Exception: 128 | pass 129 | 130 | 131 | async def list_requests(self, instance_id: str, filter_type: Optional[str] = None) -> List[NetworkRequest]: 132 | """ 133 | List all requests for an instance. 134 | 135 | instance_id: str - The browser instance identifier. 136 | filter_type: Optional[str] - Filter requests by resource type. 137 | Returns: List[NetworkRequest] - List of network requests. 138 | """ 139 | async with self._lock: 140 | request_ids = self._instance_requests.get(instance_id, []) 141 | requests = [] 142 | for req_id in request_ids: 143 | if req_id in self._requests: 144 | request = self._requests[req_id] 145 | if filter_type: 146 | if request.resource_type and filter_type.lower() in request.resource_type.lower(): 147 | requests.append(request) 148 | else: 149 | requests.append(request) 150 | return requests 151 | 152 | async def get_request(self, request_id: str) -> Optional[NetworkRequest]: 153 | """ 154 | Get specific request by ID. 155 | 156 | request_id: str - The request identifier. 157 | Returns: Optional[NetworkRequest] - The network request object or None. 158 | """ 159 | async with self._lock: 160 | return self._requests.get(request_id) 161 | 162 | async def get_response(self, request_id: str) -> Optional[NetworkResponse]: 163 | """ 164 | Get response for a request. 165 | 166 | request_id: str - The request identifier. 167 | Returns: Optional[NetworkResponse] - The network response object or None. 168 | """ 169 | async with self._lock: 170 | return self._responses.get(request_id) 171 | 172 | async def get_response_body(self, tab: Tab, request_id: str) -> Optional[bytes]: 173 | """ 174 | Get response body content. 175 | 176 | tab: Tab - The browser tab. 177 | request_id: str - The request identifier. 178 | Returns: Optional[bytes] - The response body as bytes, or None. 179 | """ 180 | try: 181 | # Convert string to RequestId object 182 | request_id_obj = uc.cdp.network.RequestId(request_id) 183 | result = await tab.send(uc.cdp.network.get_response_body(request_id=request_id_obj)) 184 | if result: 185 | body, base64_encoded = result # Result is a tuple (body, base64Encoded) 186 | if base64_encoded: 187 | return base64.b64decode(body) 188 | else: 189 | return body.encode("utf-8") 190 | except Exception: 191 | pass 192 | return None 193 | 194 | async def modify_headers(self, tab: Tab, headers: Dict[str, str]): 195 | """ 196 | Modify request headers for future requests. 197 | 198 | tab: Tab - The browser tab. 199 | headers: Dict[str, str] - Headers to set. 200 | Returns: bool - True if successful. 201 | """ 202 | try: 203 | # Convert dict to Headers object 204 | headers_obj = uc.cdp.network.Headers(headers) 205 | await tab.send(uc.cdp.network.set_extra_http_headers(headers=headers_obj)) 206 | return True 207 | except Exception as e: 208 | raise Exception(f"Failed to modify headers: {str(e)}") 209 | 210 | async def set_user_agent(self, tab: Tab, user_agent: str): 211 | """ 212 | Set custom user agent. 213 | 214 | tab: Tab - The browser tab. 215 | user_agent: str - The user agent string to set. 216 | Returns: bool - True if successful. 217 | """ 218 | try: 219 | await tab.send(uc.cdp.network.set_user_agent_override(user_agent=user_agent)) 220 | return True 221 | except Exception as e: 222 | raise Exception(f"Failed to set user agent: {str(e)}") 223 | 224 | async def enable_cache(self, tab: Tab, enabled: bool = True): 225 | """ 226 | Enable or disable cache. 227 | 228 | tab: Tab - The browser tab. 229 | enabled: bool - True to enable cache, False to disable. 230 | Returns: bool - True if successful. 231 | """ 232 | try: 233 | await tab.send(uc.cdp.network.set_cache_disabled(cache_disabled=not enabled)) 234 | return True 235 | except Exception as e: 236 | raise Exception(f"Failed to set cache state: {str(e)}") 237 | 238 | async def clear_browser_cache(self, tab: Tab): 239 | """ 240 | Clear browser cache. 241 | 242 | tab: Tab - The browser tab. 243 | Returns: bool - True if successful. 244 | """ 245 | try: 246 | await tab.send(uc.cdp.network.clear_browser_cache()) 247 | return True 248 | except Exception as e: 249 | raise Exception(f"Failed to clear cache: {str(e)}") 250 | 251 | async def clear_cookies(self, tab: Tab, url: Optional[str] = None): 252 | """ 253 | Clear cookies. 254 | 255 | tab: Tab - The browser tab. 256 | url: Optional[str] - The URL for which to clear cookies, or None to clear all. 257 | Returns: bool - True if successful. 258 | """ 259 | try: 260 | if url: 261 | # For specific URL, get all cookies for that URL and delete them 262 | cookies = await tab.send(uc.cdp.network.get_cookies(urls=[url])) 263 | for cookie in cookies: 264 | await tab.send( 265 | uc.cdp.network.delete_cookies( 266 | name=cookie.name, 267 | url=url 268 | ) 269 | ) 270 | else: 271 | # Clear all browser cookies using the proper method 272 | await tab.send(uc.cdp.network.clear_browser_cookies()) 273 | return True 274 | except Exception as e: 275 | raise Exception(f"Failed to clear cookies: {str(e)}") 276 | 277 | async def set_cookie(self, tab: Tab, cookie: Dict[str, Any]): 278 | """ 279 | Set a cookie. 280 | 281 | tab: Tab - The browser tab. 282 | cookie: Dict[str, Any] - Cookie parameters. 283 | Returns: bool - True if successful. 284 | """ 285 | try: 286 | await tab.send(uc.cdp.network.set_cookie(**cookie)) 287 | return True 288 | except Exception as e: 289 | raise Exception(f"Failed to set cookie: {str(e)}") 290 | 291 | async def get_cookies(self, tab: Tab, urls: Optional[List[str]] = None) -> List[Dict[str, Any]]: 292 | """ 293 | Get cookies. 294 | 295 | tab: Tab - The browser tab. 296 | urls: Optional[List[str]] - List of URLs to get cookies for, or None for all. 297 | Returns: List[Dict[str, Any]] - List of cookies. 298 | """ 299 | try: 300 | if urls: 301 | result = await tab.send(uc.cdp.network.get_cookies(urls=urls)) 302 | else: 303 | result = await tab.send(uc.cdp.network.get_all_cookies()) 304 | if isinstance(result, dict): 305 | return result.get("cookies", []) 306 | elif isinstance(result, list): 307 | return result 308 | else: 309 | return [] 310 | except Exception as e: 311 | raise Exception(f"Failed to get cookies: {str(e)}") 312 | 313 | async def emulate_network_conditions( 314 | self, 315 | tab: Tab, 316 | offline: bool = False, 317 | latency: int = 0, 318 | download_throughput: int = -1, 319 | upload_throughput: int = -1, 320 | ): 321 | """ 322 | Emulate network conditions. 323 | 324 | tab: Tab - The browser tab. 325 | offline: bool - Whether to emulate offline mode. 326 | latency: int - Additional latency (ms). 327 | download_throughput: int - Download speed (bytes/sec). 328 | upload_throughput: int - Upload speed (bytes/sec). 329 | Returns: bool - True if successful. 330 | """ 331 | try: 332 | await tab.send( 333 | uc.cdp.network.emulate_network_conditions( 334 | offline=offline, 335 | latency=latency, 336 | download_throughput=download_throughput, 337 | upload_throughput=upload_throughput, 338 | ) 339 | ) 340 | return True 341 | except Exception as e: 342 | raise Exception(f"Failed to emulate network conditions: {str(e)}") 343 | 344 | async def clear_instance_data(self, instance_id: str): 345 | """ 346 | Clear all network data for an instance. 347 | 348 | instance_id: str - The browser instance identifier. 349 | """ 350 | async with self._lock: 351 | if instance_id in self._instance_requests: 352 | for req_id in self._instance_requests[instance_id]: 353 | self._requests.pop(req_id, None) 354 | self._responses.pop(req_id, None) 355 | del self._instance_requests[instance_id] ``` -------------------------------------------------------------------------------- /src/comprehensive_element_cloner.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Comprehensive Element Cloner - CopyIt-CDP v3 Style 3 | =================================================== 4 | 5 | This module provides comprehensive element extraction capabilities matching CopyIt-CDP v3 6 | functionality using proper nodriver API without JSON.stringify wrappers. 7 | 8 | Key features: 9 | - Complete computed styles extraction 10 | - Event listener detection (inline, addEventListener, React/framework) 11 | - CSS rules matching from all stylesheets 12 | - Pseudo-element styles (::before, ::after, etc.) 13 | - Animation and transition properties 14 | - Framework detection and handler extraction 15 | - Child element extraction with depth tracking 16 | """ 17 | 18 | import os 19 | import sys 20 | from typing import Dict, Any, Optional 21 | from pathlib import Path 22 | 23 | project_root = Path(__file__).parent 24 | sys.path.append(str(project_root)) 25 | 26 | from debug_logger import debug_logger 27 | 28 | 29 | class ComprehensiveElementCloner: 30 | """ 31 | Comprehensive element cloner that extracts complete element data 32 | matching CopyIt-CDP v3 functionality using proper nodriver APIs. 33 | """ 34 | 35 | def __init__(self): 36 | """Initialize the comprehensive element cloner.""" 37 | pass 38 | 39 | async def extract_complete_element( 40 | self, 41 | tab, 42 | selector: str, 43 | include_children: bool = True 44 | ) -> Dict[str, Any]: 45 | """ 46 | Extract complete element data matching CopyIt-CDP v3 functionality. 47 | 48 | This method extracts: 49 | - HTML structure with all attributes 50 | - Complete computed styles (all CSS properties) 51 | - Event listeners (inline, addEventListener, React handlers) 52 | - CSS rules from stylesheets that match the element 53 | - Pseudo-elements (::before, ::after) with their styles 54 | - Animations, transitions, and transforms 55 | - Font information 56 | - Child elements with depth tracking (if requested) 57 | - Framework detection (React, Vue, Angular handlers) 58 | """ 59 | try: 60 | debug_logger.log_info("element_cloner", "extract_complete", f"Starting comprehensive extraction for {selector}") 61 | 62 | js_code = f""" 63 | (async function() {{ 64 | async function extractSingleElement(element) {{ 65 | const computedStyles = window.getComputedStyle(element); 66 | const styles = {{}}; 67 | for (let i = 0; i < computedStyles.length; i++) {{ 68 | const prop = computedStyles[i]; 69 | styles[prop] = computedStyles.getPropertyValue(prop); 70 | }} 71 | 72 | const html = {{ 73 | outerHTML: element.outerHTML, 74 | innerHTML: element.innerHTML, 75 | tagName: element.tagName, 76 | id: element.id, 77 | className: element.className, 78 | attributes: Array.from(element.attributes).map(attr => ({{ 79 | name: attr.name, 80 | value: attr.value 81 | }})) 82 | }}; 83 | 84 | const eventListeners = []; 85 | 86 | for (const attr of element.attributes) {{ 87 | if (attr.name.startsWith('on')) {{ 88 | eventListeners.push({{ 89 | type: attr.name.substring(2), 90 | handler: attr.value, 91 | source: 'inline' 92 | }}); 93 | }} 94 | }} 95 | 96 | if (typeof getEventListeners === 'function') {{ 97 | try {{ 98 | const listeners = getEventListeners(element); 99 | for (const eventType in listeners) {{ 100 | listeners[eventType].forEach(listener => {{ 101 | eventListeners.push({{ 102 | type: eventType, 103 | handler: listener.listener.toString().substring(0, 200) + '...', 104 | useCapture: listener.useCapture, 105 | passive: listener.passive, 106 | once: listener.once, 107 | source: 'addEventListener' 108 | }}); 109 | }}); 110 | }} 111 | }} catch (e) {{}} 112 | }} 113 | 114 | const commonEvents = ['click', 'mousedown', 'mouseup', 'mouseover', 'mouseout', 'focus', 'blur', 'change', 'input', 'submit']; 115 | commonEvents.forEach(eventType => {{ 116 | if (element[`on${{eventType}}`] && typeof element[`on${{eventType}}`] === 'function') {{ 117 | const handler = element[`on${{eventType}}`].toString(); 118 | if (!eventListeners.some(l => l.type === eventType && l.source === 'inline')) {{ 119 | eventListeners.push({{ 120 | type: eventType, 121 | handler: handler, 122 | handlerPreview: handler.substring(0, 100) + (handler.length > 100 ? '...' : ''), 123 | source: 'property' 124 | }}); 125 | }} 126 | }} 127 | }}); 128 | 129 | try {{ 130 | const reactKeys = Object.keys(element).filter(key => key.startsWith('__react')); 131 | if (reactKeys.length > 0) {{ 132 | const reactDetails = []; 133 | reactKeys.forEach(key => {{ 134 | try {{ 135 | const reactData = element[key]; 136 | if (reactData && reactData.memoizedProps) {{ 137 | const props = reactData.memoizedProps; 138 | Object.keys(props).forEach(prop => {{ 139 | if (prop.startsWith('on') && typeof props[prop] === 'function') {{ 140 | const funcStr = props[prop].toString(); 141 | reactDetails.push({{ 142 | event: prop.substring(2).toLowerCase(), 143 | handler: funcStr, 144 | handlerPreview: funcStr.substring(0, 100) + (funcStr.length > 100 ? '...' : '') 145 | }}); 146 | }} 147 | }}); 148 | }} 149 | }} catch (e) {{}} 150 | }}); 151 | 152 | eventListeners.push({{ 153 | type: 'framework', 154 | handler: 'React event handlers detected', 155 | source: 'react', 156 | details: `Found ${{reactKeys.length}} React properties`, 157 | reactHandlers: reactDetails 158 | }}); 159 | }} 160 | }} catch (e) {{}} 161 | 162 | const cssRules = []; 163 | const sheets = document.styleSheets; 164 | for (let i = 0; i < sheets.length; i++) {{ 165 | try {{ 166 | const rules = sheets[i].cssRules || sheets[i].rules; 167 | for (let j = 0; j < rules.length; j++) {{ 168 | const rule = rules[j]; 169 | if (rule.type === 1 && element.matches(rule.selectorText)) {{ 170 | cssRules.push({{ 171 | selector: rule.selectorText, 172 | css: rule.style.cssText, 173 | source: sheets[i].href || 'inline' 174 | }}); 175 | }} 176 | }} 177 | }} catch (e) {{ 178 | }} 179 | }} 180 | 181 | const pseudoElements = {{}}; 182 | ['::before', '::after', '::first-line', '::first-letter'].forEach(pseudo => {{ 183 | const pseudoStyles = window.getComputedStyle(element, pseudo); 184 | const content = pseudoStyles.getPropertyValue('content'); 185 | if (content && content !== 'none') {{ 186 | pseudoElements[pseudo] = {{ 187 | content: content, 188 | styles: {{}} 189 | }}; 190 | for (let i = 0; i < pseudoStyles.length; i++) {{ 191 | const prop = pseudoStyles[i]; 192 | pseudoElements[pseudo].styles[prop] = pseudoStyles.getPropertyValue(prop); 193 | }} 194 | }} 195 | }}); 196 | 197 | const animations = {{ 198 | animation: styles.animation || 'none', 199 | transition: styles.transition || 'none', 200 | transform: styles.transform || 'none' 201 | }}; 202 | 203 | const fonts = {{ 204 | computed: styles.fontFamily, 205 | fontSize: styles.fontSize, 206 | fontWeight: styles.fontWeight 207 | }}; 208 | 209 | return {{ 210 | html, 211 | styles, 212 | eventListeners, 213 | cssRules, 214 | pseudoElements, 215 | animations, 216 | fonts 217 | }}; 218 | }} 219 | 220 | function getElementDepth(child, parent) {{ 221 | let depth = 0; 222 | let current = child; 223 | while (current && current !== parent) {{ 224 | depth++; 225 | current = current.parentElement; 226 | }} 227 | return depth; 228 | }} 229 | 230 | function getElementPath(child, parent) {{ 231 | const path = []; 232 | let current = child; 233 | while (current && current !== parent) {{ 234 | const tag = current.tagName.toLowerCase(); 235 | const index = Array.from(current.parentElement.children) 236 | .filter(el => el.tagName === current.tagName) 237 | .indexOf(current); 238 | path.unshift(index > 0 ? `${{tag}}[${{index}}]` : tag); 239 | current = current.parentElement; 240 | }} 241 | return path.join(' > '); 242 | }} 243 | 244 | const element = document.querySelector('{selector}'); 245 | if (!element) return null; 246 | 247 | const result = {{ 248 | element: await extractSingleElement(element), 249 | children: [] 250 | }}; 251 | 252 | if ({str(include_children).lower()}) {{ 253 | let targetElement = element; 254 | const children = element.querySelectorAll('*'); 255 | 256 | if (children.length === 0 && element.parentElement) {{ 257 | console.log('No children found, extracting from parent element instead'); 258 | targetElement = element.parentElement; 259 | result.extractedFrom = 'parent'; 260 | result.originalElement = await extractSingleElement(element); 261 | result.element = await extractSingleElement(targetElement); 262 | }} 263 | 264 | const allChildren = targetElement.querySelectorAll('*'); 265 | for (let i = 0; i < allChildren.length; i++) {{ 266 | const childData = await extractSingleElement(allChildren[i]); 267 | childData.depth = getElementDepth(allChildren[i], targetElement); 268 | childData.path = getElementPath(allChildren[i], targetElement); 269 | if (allChildren[i] === element) {{ 270 | childData.isOriginallySelected = true; 271 | }} 272 | result.children.push(childData); 273 | }} 274 | }} 275 | 276 | return result; 277 | }})() 278 | """ 279 | 280 | debug_logger.log_info("element_cloner", "extract_complete", "Executing comprehensive JavaScript extraction") 281 | 282 | result = await tab.evaluate(js_code, return_by_value=True, await_promise=True) 283 | 284 | debug_logger.log_info("element_cloner", "extract_complete", f"Raw result type: {type(result)}") 285 | 286 | if isinstance(result, dict): 287 | extracted_data = result 288 | elif result is None: 289 | debug_logger.log_error("element_cloner", "extract_complete", "Element not found") 290 | return {"error": "Element not found", "selector": selector} 291 | elif hasattr(result, '__class__') and 'RemoteObject' in str(type(result)): 292 | debug_logger.log_info("element_cloner", "extract_complete", "Got RemoteObject, extracting value") 293 | if hasattr(result, 'value') and result.value is not None: 294 | extracted_data = result.value 295 | elif hasattr(result, 'deep_serialized_value') and result.deep_serialized_value is not None: 296 | deep_val = result.deep_serialized_value.value 297 | debug_logger.log_info("element_cloner", "extract_complete", f"Deep serialized value type: {type(deep_val)}") 298 | debug_logger.log_info("element_cloner", "extract_complete", f"Deep serialized value sample: {str(deep_val)[:300]}") 299 | 300 | if isinstance(deep_val, list) and len(deep_val) > 0: 301 | try: 302 | extracted_data = {} 303 | for item in deep_val: 304 | if isinstance(item, list) and len(item) == 2: 305 | key, val = item 306 | extracted_data[key] = val 307 | debug_logger.log_info("element_cloner", "extract_complete", f"Converted deep serialized to dict with {len(extracted_data)} keys") 308 | except Exception as e: 309 | debug_logger.log_error("element_cloner", "extract_complete", f"Failed to convert deep serialized value: {e}") 310 | extracted_data = {"error": f"Failed to convert deep serialized value: {e}"} 311 | else: 312 | extracted_data = deep_val 313 | else: 314 | debug_logger.log_error("element_cloner", "extract_complete", "RemoteObject has no accessible value") 315 | return {"error": "RemoteObject has no accessible value", "remote_object": str(result)[:200]} 316 | else: 317 | debug_logger.log_error("element_cloner", "extract_complete", f"Unexpected result type: {type(result)}") 318 | return {"error": f"Unexpected result type: {type(result)}", "result": str(result)[:200]} 319 | 320 | if not isinstance(extracted_data, dict): 321 | debug_logger.log_error("element_cloner", "extract_complete", f"Extracted data is not dict: {type(extracted_data)}") 322 | return {"error": f"Extracted data is not dict: {type(extracted_data)}"} 323 | 324 | final_result = { 325 | **extracted_data, 326 | "url": tab.url, 327 | "selector": selector, 328 | "timestamp": "now", 329 | "includesChildren": include_children 330 | } 331 | 332 | debug_logger.log_info("element_cloner", "extract_complete", "Comprehensive extraction completed successfully") 333 | return final_result 334 | 335 | except Exception as e: 336 | debug_logger.log_error("element_cloner", "extract_complete", f"Error during extraction: {str(e)}") 337 | return { 338 | "error": f"Extraction failed: {str(e)}", 339 | "selector": selector, 340 | "url": getattr(tab, 'url', 'unknown'), 341 | "timestamp": "now" 342 | } 343 | 344 | comprehensive_element_cloner = ComprehensiveElementCloner() ``` -------------------------------------------------------------------------------- /src/debug_logger.py: -------------------------------------------------------------------------------- ```python 1 | import json 2 | import traceback 3 | from datetime import datetime 4 | from typing import Dict, List, Any, Optional 5 | from collections import defaultdict 6 | import threading 7 | import pickle 8 | import gzip 9 | import os 10 | import asyncio 11 | from concurrent.futures import ThreadPoolExecutor, TimeoutError 12 | 13 | 14 | class DebugLogger: 15 | """Centralized debug logging system for the MCP server.""" 16 | 17 | def __init__(self): 18 | """ 19 | Initializes the DebugLogger. 20 | 21 | Variables: 22 | self._errors (List[Dict[str, Any]]): Stores error logs. 23 | self._warnings (List[Dict[str, Any]]): Stores warning logs. 24 | self._info (List[Dict[str, Any]]): Stores info logs. 25 | self._stats (Dict[str, int]): Stores statistics for errors, warnings, and calls. 26 | self._lock (threading.Lock): Ensures thread safety for logging. 27 | self._enabled (bool): Indicates if logging is enabled. 28 | self._seen_errors (set): Track error signatures to prevent duplicates. 29 | """ 30 | self._errors: List[Dict[str, Any]] = [] 31 | self._warnings: List[Dict[str, Any]] = [] 32 | self._info: List[Dict[str, Any]] = [] 33 | self._stats: Dict[str, int] = defaultdict(int) 34 | self._lock = threading.Lock() 35 | self._enabled = True 36 | self._lock_owner = "none" 37 | import time 38 | self._lock_acquired_time = 0 39 | self._seen_errors: set = set() 40 | 41 | def log_error(self, component: str, method: str, error: Exception, context: Optional[Dict[str, Any]] = None): 42 | """ 43 | Log an error with full context. 44 | 45 | Args: 46 | component (str): Name of the component where the error occurred. 47 | method (str): Name of the method where the error occurred. 48 | error (Exception): The exception instance. 49 | context (Optional[Dict[str, Any]]): Additional context for the error. 50 | """ 51 | if not self._enabled: 52 | return 53 | 54 | with self._lock: 55 | error_signature = f"{component}.{method}.{type(error).__name__}.{str(error)}" 56 | 57 | if error_signature in self._seen_errors: 58 | self._stats[f'{component}.{method}.errors'] += 1 59 | return 60 | 61 | self._seen_errors.add(error_signature) 62 | 63 | error_entry = { 64 | 'timestamp': datetime.now().isoformat(), 65 | 'component': component, 66 | 'method': method, 67 | 'error_type': type(error).__name__, 68 | 'error_message': str(error), 69 | 'traceback': traceback.format_exc(), 70 | 'context': context or {} 71 | } 72 | self._errors.append(error_entry) 73 | self._stats[f'{component}.{method}.errors'] += 1 74 | print(f"[DEBUG ERROR] {component}.{method}: {error}") 75 | 76 | def log_warning(self, component: str, method: str, message: str, context: Optional[Dict[str, Any]] = None): 77 | """ 78 | Log a warning. 79 | 80 | Args: 81 | component (str): Name of the component where the warning occurred. 82 | method (str): Name of the method where the warning occurred. 83 | message (str): Warning message. 84 | context (Optional[Dict[str, Any]]): Additional context for the warning. 85 | """ 86 | if not self._enabled: 87 | return 88 | 89 | with self._lock: 90 | warning_entry = { 91 | 'timestamp': datetime.now().isoformat(), 92 | 'component': component, 93 | 'method': method, 94 | 'message': message, 95 | 'context': context or {} 96 | } 97 | self._warnings.append(warning_entry) 98 | self._stats[f'{component}.{method}.warnings'] += 1 99 | print(f"[DEBUG WARN] {component}.{method}: {message}") 100 | 101 | def log_info(self, component: str, method: str, message: str, data: Optional[Any] = None): 102 | """ 103 | Log information for debugging. 104 | 105 | Args: 106 | component (str): Name of the component where the info is logged. 107 | method (str): Name of the method where the info is logged. 108 | message (str): Info message. 109 | data (Optional[Any]): Additional data for the info log. 110 | """ 111 | if not self._enabled: 112 | return 113 | 114 | with self._lock: 115 | info_entry = { 116 | 'timestamp': datetime.now().isoformat(), 117 | 'component': component, 118 | 'method': method, 119 | 'message': message, 120 | 'data': data 121 | } 122 | self._info.append(info_entry) 123 | self._stats[f'{component}.{method}.calls'] += 1 124 | print(f"[DEBUG INFO] {component}.{method}: {message}") 125 | if data: 126 | print(f" Data: {data}") 127 | 128 | def get_debug_view(self) -> Dict[str, Any]: 129 | """ 130 | Get comprehensive debug view of all logged data. 131 | 132 | Returns: 133 | Dict[str, Any]: Dictionary containing summary, recent errors/warnings, all errors/warnings, and component breakdown. 134 | """ 135 | return self.get_debug_view_paginated() 136 | 137 | def get_debug_view_paginated( 138 | self, 139 | max_errors: Optional[int] = None, 140 | max_warnings: Optional[int] = None, 141 | max_info: Optional[int] = None 142 | ) -> Dict[str, Any]: 143 | """ 144 | Get paginated debug view of logged data with size limits. 145 | 146 | Args: 147 | max_errors (Optional[int]): Maximum number of errors to include. None for all. 148 | max_warnings (Optional[int]): Maximum number of warnings to include. None for all. 149 | max_info (Optional[int]): Maximum number of info logs to include. None for all. 150 | 151 | Returns: 152 | Dict[str, Any]: Dictionary containing summary, recent errors/warnings, limited errors/warnings, and component breakdown. 153 | """ 154 | with self._lock: 155 | if max_errors is not None: 156 | limited_errors = self._errors[-max_errors:] if self._errors else [] 157 | all_errors = limited_errors 158 | else: 159 | limited_errors = self._errors[-10:] if self._errors else [] 160 | all_errors = self._errors 161 | 162 | if max_warnings is not None: 163 | limited_warnings = self._warnings[-max_warnings:] if self._warnings else [] 164 | all_warnings = limited_warnings 165 | else: 166 | limited_warnings = self._warnings[-10:] if self._warnings else [] 167 | all_warnings = self._warnings 168 | 169 | if max_info is not None: 170 | limited_info = self._info[-max_info:] if self._info else [] 171 | all_info = limited_info 172 | else: 173 | limited_info = self._info[-10:] if self._info else [] 174 | all_info = self._info 175 | 176 | return { 177 | 'summary': { 178 | 'total_errors': len(self._errors), 179 | 'total_warnings': len(self._warnings), 180 | 'total_info': len(self._info), 181 | 'returned_errors': len(all_errors), 182 | 'returned_warnings': len(all_warnings), 183 | 'returned_info': len(all_info), 184 | 'error_types': self._get_error_summary(), 185 | 'stats': dict(self._stats) 186 | }, 187 | 'recent_errors': limited_errors, 188 | 'recent_warnings': limited_warnings, 189 | 'recent_info': limited_info, 190 | 'all_errors': all_errors, 191 | 'all_warnings': all_warnings, 192 | 'all_info': all_info, 193 | 'component_breakdown': self._get_component_breakdown() 194 | } 195 | 196 | def _get_error_summary(self) -> Dict[str, int]: 197 | """ 198 | Get summary of error types. 199 | 200 | Returns: 201 | Dict[str, int]: Dictionary mapping error type names to their counts. 202 | """ 203 | error_types = defaultdict(int) 204 | for error in self._errors: 205 | error_types[error['error_type']] += 1 206 | return dict(error_types) 207 | 208 | def _get_component_breakdown(self) -> Dict[str, Dict[str, int]]: 209 | """ 210 | Get breakdown by component. 211 | 212 | Returns: 213 | Dict[str, Dict[str, int]]: Dictionary mapping component names to their error, warning, and call counts. 214 | """ 215 | breakdown = defaultdict(lambda: {'errors': 0, 'warnings': 0, 'calls': 0}) 216 | 217 | for error in self._errors: 218 | breakdown[error['component']]['errors'] += 1 219 | 220 | for warning in self._warnings: 221 | breakdown[warning['component']]['warnings'] += 1 222 | 223 | for info in self._info: 224 | breakdown[info['component']]['calls'] += 1 225 | 226 | return dict(breakdown) 227 | 228 | def clear_debug_view(self): 229 | """ 230 | Clear all debug logs with timeout protection. 231 | 232 | Variables: 233 | self._errors (List[Dict[str, Any]]): Cleared. 234 | self._warnings (List[Dict[str, Any]]): Cleared. 235 | self._info (List[Dict[str, Any]]): Cleared. 236 | self._stats (Dict[str, int]): Cleared. 237 | """ 238 | try: 239 | if self._lock.acquire(timeout=5.0): 240 | try: 241 | self._errors.clear() 242 | self._warnings.clear() 243 | self._info.clear() 244 | self._stats.clear() 245 | print("[DEBUG] Debug logs cleared") 246 | finally: 247 | self._lock.release() 248 | else: 249 | print("[DEBUG] Failed to clear logs - timeout acquiring lock") 250 | except Exception as e: 251 | print(f"[DEBUG] Error clearing logs: {e}") 252 | 253 | def clear_debug_view_safe(self): 254 | """ 255 | Safe version that recreates data structures if lock fails. 256 | """ 257 | try: 258 | self.clear_debug_view() 259 | except: 260 | self._errors = [] 261 | self._warnings = [] 262 | self._info = [] 263 | self._stats = defaultdict(int) 264 | print("[DEBUG] Debug logs force-cleared (lock bypass)") 265 | 266 | def enable(self): 267 | """ 268 | Enable debug logging. 269 | 270 | Variables: 271 | self._enabled (bool): Set to True. 272 | """ 273 | self._enabled = True 274 | print("[DEBUG] Debug logging enabled") 275 | 276 | def disable(self): 277 | """ 278 | Disable debug logging. 279 | 280 | Variables: 281 | self._enabled (bool): Set to False. 282 | """ 283 | self._enabled = False 284 | print("[DEBUG] Debug logging disabled") 285 | 286 | def get_lock_status(self) -> Dict[str, Any]: 287 | """Get current lock status for debugging.""" 288 | import time 289 | return { 290 | "lock_owner": self._lock_owner, 291 | "lock_held_duration": time.time() - self._lock_acquired_time if self._lock_acquired_time > 0 else 0, 292 | "lock_acquired": self._lock.locked() if hasattr(self._lock, 'locked') else "unknown" 293 | } 294 | 295 | def export_to_file(self, filepath: str = "debug_log.json"): 296 | """ 297 | Export debug logs to a JSON file. 298 | 299 | Args: 300 | filepath (str): Path to the file where logs will be exported. 301 | 302 | Returns: 303 | str: The filepath where logs were exported. 304 | """ 305 | return self.export_to_file_paginated(filepath) 306 | 307 | def export_to_file_paginated( 308 | self, 309 | filepath: str = "debug_log.json", 310 | max_errors: Optional[int] = None, 311 | max_warnings: Optional[int] = None, 312 | max_info: Optional[int] = None, 313 | format: str = "auto" 314 | ): 315 | """ 316 | Export paginated debug logs to a file using fastest method available. 317 | 318 | Args: 319 | filepath (str): Path to the file where logs will be exported. 320 | max_errors (Optional[int]): Maximum number of errors to export. None for all. 321 | max_warnings (Optional[int]): Maximum number of warnings to export. None for all. 322 | max_info (Optional[int]): Maximum number of info logs to export. None for all. 323 | format (str): Export format: 'json', 'pickle', 'gzip-pickle', 'auto' (default: 'auto'). 324 | 325 | Returns: 326 | str: The filepath where logs were exported. 327 | """ 328 | import time 329 | try: 330 | print(f"[DEBUG] export_debug_logs attempting lock acquisition...") 331 | current_status = self.get_lock_status() 332 | print(f"[DEBUG] Current lock status: {current_status}") 333 | 334 | acquired = self._lock.acquire(timeout=5.0) 335 | if not acquired: 336 | print("[DEBUG] Lock timeout - falling back to lock-free export") 337 | return self._export_lockfree(filepath, max_errors, max_warnings, max_info, format) 338 | 339 | self._lock_owner = "export_debug_logs" 340 | self._lock_acquired_time = time.time() 341 | print("[DEBUG] Lock acquired by export_debug_logs") 342 | 343 | try: 344 | debug_data = self.get_debug_view_paginated( 345 | max_errors=max_errors, 346 | max_warnings=max_warnings, 347 | max_info=max_info 348 | ) 349 | finally: 350 | self._lock_owner = "none" 351 | self._lock_acquired_time = 0 352 | self._lock.release() 353 | print("[DEBUG] Lock released by export_debug_logs") 354 | except Exception as e: 355 | print(f"[DEBUG] Exception in export: {e}") 356 | return self._export_lockfree(filepath, max_errors, max_warnings, max_info, format) 357 | 358 | if format == "auto": 359 | total_items = (debug_data['summary']['returned_errors'] + 360 | debug_data['summary']['returned_warnings'] + 361 | debug_data['summary']['returned_info']) 362 | if total_items > 1000: 363 | format = "gzip-pickle" 364 | elif total_items > 100: 365 | format = "pickle" 366 | else: 367 | format = "json" 368 | 369 | if format == "gzip-pickle": 370 | return self._export_gzip_pickle(debug_data, filepath) 371 | elif format == "pickle": 372 | return self._export_pickle(debug_data, filepath) 373 | else: 374 | return self._export_json(debug_data, filepath) 375 | 376 | def _export_lockfree(self, filepath: str, max_errors: Optional[int], max_warnings: Optional[int], max_info: Optional[int], format: str) -> str: 377 | """ 378 | Lock-free export method that creates a snapshot without acquiring locks. 379 | """ 380 | errors_snapshot = list(self._errors) 381 | warnings_snapshot = list(self._warnings) 382 | info_snapshot = list(self._info) 383 | 384 | if max_errors is not None: 385 | errors_snapshot = errors_snapshot[:max_errors] 386 | if max_warnings is not None: 387 | warnings_snapshot = warnings_snapshot[:max_warnings] 388 | if max_info is not None: 389 | info_snapshot = info_snapshot[:max_info] 390 | 391 | debug_data = { 392 | 'summary': { 393 | 'total_errors': len(self._errors), 394 | 'total_warnings': len(self._warnings), 395 | 'total_info': len(self._info), 396 | 'returned_errors': len(errors_snapshot), 397 | 'returned_warnings': len(warnings_snapshot), 398 | 'returned_info': len(info_snapshot) 399 | }, 400 | 'all_errors': errors_snapshot, 401 | 'all_warnings': warnings_snapshot, 402 | 'all_info': info_snapshot 403 | } 404 | 405 | if format == "auto": 406 | total_items = len(errors_snapshot) + len(warnings_snapshot) + len(info_snapshot) 407 | if total_items > 1000: 408 | format = "gzip-pickle" 409 | elif total_items > 100: 410 | format = "pickle" 411 | else: 412 | format = "json" 413 | 414 | if format == "gzip-pickle": 415 | return self._export_gzip_pickle(debug_data, filepath) 416 | elif format == "pickle": 417 | return self._export_pickle(debug_data, filepath) 418 | else: 419 | return self._export_json(debug_data, filepath) 420 | 421 | def _export_gzip_pickle(self, debug_data: Dict[str, Any], filepath: str) -> str: 422 | if not filepath.endswith('.pkl.gz'): 423 | filepath = filepath.replace('.json', '.pkl.gz') 424 | 425 | with gzip.open(filepath, 'wb') as f: 426 | pickle.dump(debug_data, f, protocol=pickle.HIGHEST_PROTOCOL) 427 | 428 | file_size = os.path.getsize(filepath) 429 | print(f"[DEBUG] Exported {debug_data['summary']['returned_errors']} errors, " 430 | f"{debug_data['summary']['returned_warnings']} warnings, " 431 | f"{debug_data['summary']['returned_info']} info logs to {filepath} " 432 | f"({file_size} bytes, gzip-pickle format)") 433 | return filepath 434 | 435 | def _export_pickle(self, debug_data: Dict[str, Any], filepath: str) -> str: 436 | """Export using pickle (fast for medium data).""" 437 | if not filepath.endswith('.pkl'): 438 | filepath = filepath.replace('.json', '.pkl') 439 | 440 | with open(filepath, 'wb') as f: 441 | pickle.dump(debug_data, f, protocol=pickle.HIGHEST_PROTOCOL) 442 | 443 | file_size = os.path.getsize(filepath) 444 | print(f"[DEBUG] Exported {debug_data['summary']['returned_errors']} errors, " 445 | f"{debug_data['summary']['returned_warnings']} warnings, " 446 | f"{debug_data['summary']['returned_info']} info logs to {filepath} " 447 | f"({file_size} bytes, pickle format)") 448 | return filepath 449 | 450 | def _export_json(self, debug_data: Dict[str, Any], filepath: str) -> str: 451 | """Export using JSON (human readable but slower).""" 452 | with open(filepath, 'w') as f: 453 | json.dump(debug_data, f, separators=(',', ':'), default=str) 454 | 455 | file_size = os.path.getsize(filepath) 456 | print(f"[DEBUG] Exported {debug_data['summary']['returned_errors']} errors, " 457 | f"{debug_data['summary']['returned_warnings']} warnings, " 458 | f"{debug_data['summary']['returned_info']} info logs to {filepath} " 459 | f"({file_size} bytes, JSON format)") 460 | return filepath 461 | 462 | 463 | debug_logger = DebugLogger() ``` -------------------------------------------------------------------------------- /src/browser_manager.py: -------------------------------------------------------------------------------- ```python 1 | """Browser instance management with nodriver.""" 2 | 3 | import asyncio 4 | import uuid 5 | from typing import Dict, Optional, List 6 | from datetime import datetime, timedelta 7 | 8 | import nodriver as uc 9 | from nodriver import Browser, Tab 10 | 11 | from debug_logger import debug_logger 12 | from models import BrowserInstance, BrowserState, BrowserOptions, PageState 13 | from persistent_storage import persistent_storage 14 | from dynamic_hook_system import dynamic_hook_system 15 | from platform_utils import get_platform_info 16 | from process_cleanup import process_cleanup 17 | 18 | 19 | class BrowserManager: 20 | """Manages multiple browser instances.""" 21 | 22 | def __init__(self): 23 | self._instances: Dict[str, dict] = {} 24 | self._lock = asyncio.Lock() 25 | 26 | async def spawn_browser(self, options: BrowserOptions) -> BrowserInstance: 27 | """ 28 | Spawn a new browser instance with given options. 29 | 30 | Args: 31 | options (BrowserOptions): Options for browser configuration. 32 | 33 | Returns: 34 | BrowserInstance: The spawned browser instance. 35 | """ 36 | instance_id = str(uuid.uuid4()) 37 | 38 | instance = BrowserInstance( 39 | instance_id=instance_id, 40 | headless=options.headless, 41 | user_agent=options.user_agent, 42 | viewport={"width": options.viewport_width, "height": options.viewport_height} 43 | ) 44 | 45 | try: 46 | platform_info = get_platform_info() 47 | debug_logger.log_info( 48 | "browser_manager", 49 | "spawn_browser", 50 | f"Platform info: {platform_info['system']} | Root: {platform_info['is_root']} | Container: {platform_info['is_container']} | Sandbox: {options.sandbox}" 51 | ) 52 | 53 | config = uc.Config( 54 | headless=options.headless, 55 | user_data_dir=options.user_data_dir, 56 | sandbox=options.sandbox 57 | ) 58 | 59 | browser = await uc.start(config=config) 60 | tab = browser.main_tab 61 | 62 | if hasattr(browser, '_process') and browser._process: 63 | process_cleanup.track_browser_process(instance_id, browser._process) 64 | else: 65 | debug_logger.log_warning("browser_manager", "spawn_browser", 66 | f"Browser {instance_id} has no process to track") 67 | 68 | if options.user_agent: 69 | await tab.send(uc.cdp.emulation.set_user_agent_override( 70 | user_agent=options.user_agent 71 | )) 72 | 73 | if options.extra_headers: 74 | await tab.send(uc.cdp.network.set_extra_http_headers( 75 | headers=options.extra_headers 76 | )) 77 | 78 | await tab.set_window_size( 79 | left=0, 80 | top=0, 81 | width=options.viewport_width, 82 | height=options.viewport_height 83 | ) 84 | print(f"[DEBUG] Set viewport to {options.viewport_width}x{options.viewport_height}") 85 | 86 | await self._setup_dynamic_hooks(tab, instance_id) 87 | 88 | async with self._lock: 89 | self._instances[instance_id] = { 90 | 'browser': browser, 91 | 'tab': tab, 92 | 'instance': instance, 93 | 'options': options, 94 | 'network_data': [] 95 | } 96 | 97 | instance.state = BrowserState.READY 98 | instance.update_activity() 99 | 100 | persistent_storage.store_instance(instance_id, { 101 | 'state': instance.state.value, 102 | 'created_at': instance.created_at.isoformat(), 103 | 'current_url': getattr(tab, 'url', ''), 104 | 'title': 'Browser Instance' 105 | }) 106 | 107 | except Exception as e: 108 | instance.state = BrowserState.ERROR 109 | raise Exception(f"Failed to spawn browser: {str(e)}") 110 | 111 | return instance 112 | 113 | async def _setup_dynamic_hooks(self, tab: Tab, instance_id: str): 114 | """Setup dynamic hook system for browser instance.""" 115 | try: 116 | dynamic_hook_system.add_instance(instance_id) 117 | 118 | await dynamic_hook_system.setup_interception(tab, instance_id) 119 | 120 | debug_logger.log_info("browser_manager", "_setup_dynamic_hooks", f"Dynamic hook system setup complete for instance {instance_id}") 121 | 122 | except Exception as e: 123 | debug_logger.log_error("browser_manager", "_setup_dynamic_hooks", f"Failed to setup dynamic hooks for {instance_id}: {e}") 124 | 125 | async def get_instance(self, instance_id: str) -> Optional[dict]: 126 | """ 127 | Get browser instance by ID. 128 | 129 | Args: 130 | instance_id (str): The ID of the browser instance. 131 | 132 | Returns: 133 | Optional[dict]: The browser instance data if found, else None. 134 | """ 135 | async with self._lock: 136 | return self._instances.get(instance_id) 137 | 138 | async def list_instances(self) -> List[BrowserInstance]: 139 | """ 140 | List all browser instances. 141 | 142 | Returns: 143 | List[BrowserInstance]: List of all browser instances. 144 | """ 145 | async with self._lock: 146 | return [data['instance'] for data in self._instances.values()] 147 | 148 | async def close_instance(self, instance_id: str) -> bool: 149 | """ 150 | Close and remove a browser instance. 151 | 152 | Args: 153 | instance_id (str): The ID of the browser instance to close. 154 | 155 | Returns: 156 | bool: True if closed successfully, False otherwise. 157 | """ 158 | import asyncio 159 | 160 | async def _do_close(): 161 | async with self._lock: 162 | if instance_id not in self._instances: 163 | return False 164 | 165 | data = self._instances[instance_id] 166 | browser = data['browser'] 167 | instance = data['instance'] 168 | 169 | try: 170 | if hasattr(browser, 'tabs') and browser.tabs: 171 | for tab in browser.tabs[:]: 172 | try: 173 | await tab.close() 174 | except Exception: 175 | pass 176 | except Exception: 177 | pass 178 | 179 | try: 180 | import asyncio 181 | if hasattr(browser, 'connection') and browser.connection: 182 | asyncio.get_event_loop().create_task(browser.connection.disconnect()) 183 | debug_logger.log_info("browser_manager", "close_connection", "closed connection using get_event_loop().create_task()") 184 | except RuntimeError: 185 | try: 186 | import asyncio 187 | if hasattr(browser, 'connection') and browser.connection: 188 | await asyncio.wait_for(browser.connection.disconnect(), timeout=2.0) 189 | debug_logger.log_info("browser_manager", "close_connection", "closed connection with direct await and timeout") 190 | except (asyncio.TimeoutError, Exception) as e: 191 | debug_logger.log_info("browser_manager", "close_connection", f"connection disconnect failed or timed out: {e}") 192 | pass 193 | except Exception as e: 194 | debug_logger.log_info("browser_manager", "close_connection", f"connection disconnect failed: {e}") 195 | pass 196 | 197 | try: 198 | import nodriver.cdp.browser as cdp_browser 199 | if hasattr(browser, 'connection') and browser.connection: 200 | await browser.connection.send(cdp_browser.close()) 201 | except Exception: 202 | pass 203 | 204 | try: 205 | process_cleanup.kill_browser_process(instance_id) 206 | except Exception as e: 207 | debug_logger.log_warning("browser_manager", "close_instance", 208 | f"Process cleanup failed for {instance_id}: {e}") 209 | 210 | try: 211 | await browser.stop() 212 | except Exception: 213 | pass 214 | 215 | if hasattr(browser, '_process') and browser._process and browser._process.returncode is None: 216 | import os 217 | 218 | for attempt in range(3): 219 | try: 220 | browser._process.terminate() 221 | debug_logger.log_info("browser_manager", "terminate_process", f"terminated browser with pid {browser._process.pid} successfully on attempt {attempt + 1}") 222 | break 223 | except Exception: 224 | try: 225 | browser._process.kill() 226 | debug_logger.log_info("browser_manager", "kill_process", f"killed browser with pid {browser._process.pid} successfully on attempt {attempt + 1}") 227 | break 228 | except Exception: 229 | try: 230 | if hasattr(browser, '_process_pid') and browser._process_pid: 231 | os.kill(browser._process_pid, 15) 232 | debug_logger.log_info("browser_manager", "kill_process", f"killed browser with pid {browser._process_pid} using signal 15 successfully on attempt {attempt + 1}") 233 | break 234 | except (PermissionError, ProcessLookupError) as e: 235 | debug_logger.log_info("browser_manager", "kill_process", f"browser already stopped or no permission to kill: {e}") 236 | break 237 | except Exception as e: 238 | if attempt == 2: 239 | debug_logger.log_error("browser_manager", "kill_process", e) 240 | 241 | try: 242 | if hasattr(browser, '_process'): 243 | browser._process = None 244 | if hasattr(browser, '_process_pid'): 245 | browser._process_pid = None 246 | 247 | instance.state = BrowserState.CLOSED 248 | except Exception: 249 | pass 250 | 251 | del self._instances[instance_id] 252 | 253 | persistent_storage.remove_instance(instance_id) 254 | 255 | return True 256 | 257 | try: 258 | return await asyncio.wait_for(_do_close(), timeout=5.0) 259 | except asyncio.TimeoutError: 260 | debug_logger.log_info("browser_manager", "close_instance", f"Close timeout for {instance_id}, forcing cleanup") 261 | try: 262 | async with self._lock: 263 | if instance_id in self._instances: 264 | data = self._instances[instance_id] 265 | data['instance'].state = BrowserState.CLOSED 266 | del self._instances[instance_id] 267 | persistent_storage.remove_instance(instance_id) 268 | except Exception: 269 | pass 270 | return True 271 | except Exception as e: 272 | debug_logger.log_error("browser_manager", "close_instance", e) 273 | return False 274 | 275 | async def get_tab(self, instance_id: str) -> Optional[Tab]: 276 | """ 277 | Get the main tab for a browser instance. 278 | 279 | Args: 280 | instance_id (str): The ID of the browser instance. 281 | 282 | Returns: 283 | Optional[Tab]: The main tab if found, else None. 284 | """ 285 | data = await self.get_instance(instance_id) 286 | if data: 287 | return data['tab'] 288 | return None 289 | 290 | async def get_browser(self, instance_id: str) -> Optional[Browser]: 291 | """ 292 | Get the browser object for an instance. 293 | 294 | Args: 295 | instance_id (str): The ID of the browser instance. 296 | 297 | Returns: 298 | Optional[Browser]: The browser object if found, else None. 299 | """ 300 | data = await self.get_instance(instance_id) 301 | if data: 302 | return data['browser'] 303 | return None 304 | 305 | async def list_tabs(self, instance_id: str) -> List[Dict[str, str]]: 306 | """ 307 | List all tabs for a browser instance. 308 | 309 | Args: 310 | instance_id (str): The ID of the browser instance. 311 | 312 | Returns: 313 | List[Dict[str, str]]: List of tab information dictionaries. 314 | """ 315 | browser = await self.get_browser(instance_id) 316 | if not browser: 317 | return [] 318 | 319 | await browser.update_targets() 320 | 321 | tabs = [] 322 | for tab in browser.tabs: 323 | await tab 324 | tabs.append({ 325 | 'tab_id': str(tab.target.target_id), 326 | 'url': getattr(tab, 'url', '') or '', 327 | 'title': getattr(tab.target, 'title', '') or 'Untitled', 328 | 'type': getattr(tab.target, 'type_', 'page') 329 | }) 330 | 331 | return tabs 332 | 333 | async def switch_to_tab(self, instance_id: str, tab_id: str) -> bool: 334 | """ 335 | Switch to a specific tab by bringing it to front. 336 | 337 | Args: 338 | instance_id (str): The ID of the browser instance. 339 | tab_id (str): The target ID of the tab to switch to. 340 | 341 | Returns: 342 | bool: True if switched successfully, False otherwise. 343 | """ 344 | browser = await self.get_browser(instance_id) 345 | if not browser: 346 | return False 347 | 348 | await browser.update_targets() 349 | 350 | target_tab = None 351 | for tab in browser.tabs: 352 | if str(tab.target.target_id) == tab_id: 353 | target_tab = tab 354 | break 355 | 356 | if not target_tab: 357 | return False 358 | 359 | try: 360 | await target_tab.bring_to_front() 361 | async with self._lock: 362 | if instance_id in self._instances: 363 | self._instances[instance_id]['tab'] = target_tab 364 | 365 | return True 366 | except Exception: 367 | return False 368 | 369 | async def get_active_tab(self, instance_id: str) -> Optional[Tab]: 370 | """ 371 | Get the currently active tab. 372 | 373 | Args: 374 | instance_id (str): The ID of the browser instance. 375 | 376 | Returns: 377 | Optional[Tab]: The active tab if found, else None. 378 | """ 379 | return await self.get_tab(instance_id) 380 | 381 | async def close_tab(self, instance_id: str, tab_id: str) -> bool: 382 | """ 383 | Close a specific tab. 384 | 385 | Args: 386 | instance_id (str): The ID of the browser instance. 387 | tab_id (str): The target ID of the tab to close. 388 | 389 | Returns: 390 | bool: True if closed successfully, False otherwise. 391 | """ 392 | browser = await self.get_browser(instance_id) 393 | if not browser: 394 | return False 395 | 396 | target_tab = None 397 | for tab in browser.tabs: 398 | if str(tab.target.target_id) == tab_id: 399 | target_tab = tab 400 | break 401 | 402 | if not target_tab: 403 | return False 404 | 405 | try: 406 | await target_tab.close() 407 | return True 408 | except Exception: 409 | return False 410 | 411 | async def update_instance_state(self, instance_id: str, url: str = None, title: str = None): 412 | """ 413 | Update instance state after navigation or action. 414 | 415 | Args: 416 | instance_id (str): The ID of the browser instance. 417 | url (str, optional): The current URL to update. 418 | title (str, optional): The title to update. 419 | """ 420 | async with self._lock: 421 | if instance_id in self._instances: 422 | instance = self._instances[instance_id]['instance'] 423 | if url: 424 | instance.current_url = url 425 | if title: 426 | instance.title = title 427 | instance.update_activity() 428 | 429 | async def get_page_state(self, instance_id: str) -> Optional[PageState]: 430 | """ 431 | Get complete page state for an instance. 432 | 433 | Args: 434 | instance_id (str): The ID of the browser instance. 435 | 436 | Returns: 437 | Optional[PageState]: The page state if available, else None. 438 | """ 439 | tab = await self.get_tab(instance_id) 440 | if not tab: 441 | return None 442 | 443 | try: 444 | url = await tab.evaluate("window.location.href") 445 | title = await tab.evaluate("document.title") 446 | ready_state = await tab.evaluate("document.readyState") 447 | 448 | cookies = await tab.send(uc.cdp.network.get_cookies()) 449 | 450 | local_storage = {} 451 | session_storage = {} 452 | 453 | try: 454 | local_storage_keys = await tab.evaluate("Object.keys(localStorage)") 455 | for key in local_storage_keys: 456 | value = await tab.evaluate(f"localStorage.getItem('{key}')") 457 | local_storage[key] = value 458 | 459 | session_storage_keys = await tab.evaluate("Object.keys(sessionStorage)") 460 | for key in session_storage_keys: 461 | value = await tab.evaluate(f"sessionStorage.getItem('{key}')") 462 | session_storage[key] = value 463 | except Exception: 464 | pass 465 | 466 | viewport = await tab.evaluate(""" 467 | ({ 468 | width: window.innerWidth, 469 | height: window.innerHeight, 470 | devicePixelRatio: window.devicePixelRatio 471 | }) 472 | """) 473 | 474 | return PageState( 475 | instance_id=instance_id, 476 | url=url, 477 | title=title, 478 | ready_state=ready_state, 479 | cookies=cookies.get('cookies', []), 480 | local_storage=local_storage, 481 | session_storage=session_storage, 482 | viewport=viewport 483 | ) 484 | 485 | except Exception as e: 486 | raise Exception(f"Failed to get page state: {str(e)}") 487 | 488 | async def cleanup_inactive(self, timeout_minutes: int = 30): 489 | """ 490 | Clean up inactive browser instances. 491 | 492 | Args: 493 | timeout_minutes (int, optional): Timeout in minutes to consider an instance inactive. Defaults to 30. 494 | """ 495 | now = datetime.now() 496 | timeout = timedelta(minutes=timeout_minutes) 497 | 498 | to_close = [] 499 | async with self._lock: 500 | for instance_id, data in self._instances.items(): 501 | instance = data['instance'] 502 | if now - instance.last_activity > timeout: 503 | to_close.append(instance_id) 504 | 505 | for instance_id in to_close: 506 | await self.close_instance(instance_id) 507 | 508 | async def close_all(self): 509 | """ 510 | Close all browser instances. 511 | 512 | Closes all currently managed browser instances. 513 | """ 514 | instance_ids = list(self._instances.keys()) 515 | for instance_id in instance_ids: 516 | await self.close_instance(instance_id) ``` -------------------------------------------------------------------------------- /demo/augment-hero-recreation.html: -------------------------------------------------------------------------------- ```html 1 | <!DOCTYPE html> 2 | <html lang="en"> 3 | <head> 4 | <meta charset="UTF-8"> 5 | <meta name="viewport" content="width=device-width, initial-scale=1.0"> 6 | <title>Augment Code Hero Recreation</title> 7 | <style> 8 | @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800;900&display=swap'); 9 | 10 | * { 11 | margin: 0; 12 | padding: 0; 13 | box-sizing: border-box; 14 | } 15 | 16 | body { 17 | font-family: "Inter", system-ui, -apple-system, "Segoe UI", "Roboto", sans-serif; 18 | color: #fafaf9; 19 | background: #000000; 20 | line-height: 1.5; 21 | font-size: 16px; 22 | font-weight: 400; 23 | -webkit-font-smoothing: antialiased; 24 | -moz-osx-font-smoothing: grayscale; 25 | } 26 | 27 | /* Navigation Bar */ 28 | .navbar { 29 | position: fixed; 30 | top: 0; 31 | left: 0; 32 | right: 0; 33 | z-index: 1000; 34 | padding: 1rem 1.5rem; 35 | background: rgba(0, 0, 0, 0.8); 36 | backdrop-filter: blur(20px); 37 | border-bottom: 1px solid rgba(255, 255, 255, 0.05); 38 | } 39 | 40 | .nav-container { 41 | max-width: 1200px; 42 | margin: 0 auto; 43 | display: flex; 44 | align-items: center; 45 | justify-content: space-between; 46 | } 47 | 48 | .logo { 49 | display: flex; 50 | align-items: center; 51 | gap: 0.5rem; 52 | font-size: 1.1rem; 53 | font-weight: 600; 54 | color: #fafaf9; 55 | text-decoration: none; 56 | } 57 | 58 | .nav-links { 59 | display: none; 60 | gap: 2rem; 61 | list-style: none; 62 | } 63 | 64 | @media (min-width: 768px) { 65 | .nav-links { 66 | display: flex; 67 | } 68 | } 69 | 70 | .nav-links a { 71 | color: #a1a1aa; 72 | text-decoration: none; 73 | font-weight: 500; 74 | transition: color 0.3s ease; 75 | } 76 | 77 | .nav-links a:hover { 78 | color: #fafaf9; 79 | } 80 | 81 | .nav-buttons { 82 | display: flex; 83 | gap: 0.5rem; 84 | } 85 | 86 | .nav-btn { 87 | padding: 0.5rem 1rem; 88 | border-radius: 0.375rem; 89 | font-weight: 500; 90 | font-size: 0.875rem; 91 | text-decoration: none; 92 | transition: all 0.3s ease; 93 | } 94 | 95 | .nav-btn.secondary { 96 | color: #fafaf9; 97 | background: transparent; 98 | border: 1px solid rgba(255, 255, 255, 0.1); 99 | } 100 | 101 | .nav-btn.secondary:hover { 102 | background: rgba(255, 255, 255, 0.05); 103 | } 104 | 105 | .nav-btn.primary { 106 | color: #000; 107 | background: #fafaf9; 108 | border: 1px solid #fafaf9; 109 | } 110 | 111 | .nav-btn.primary:hover { 112 | background: #f4f4f5; 113 | } 114 | 115 | /* Hero Section */ 116 | .hero-section { 117 | position: relative; 118 | min-height: 100vh; 119 | overflow: hidden; 120 | padding: 0 1rem; 121 | background: 122 | radial-gradient(ellipse 50% 80% at 20% 40%, rgba(120, 119, 198, 0.3), transparent), 123 | radial-gradient(ellipse 50% 80% at 80% 50%, rgba(120, 119, 198, 0.15), transparent), 124 | radial-gradient(ellipse 50% 80% at 40% 80%, rgba(120, 119, 198, 0.1), transparent), 125 | #000000; 126 | display: flex; 127 | align-items: center; 128 | } 129 | 130 | .hero-container { 131 | position: relative; 132 | z-index: 10; 133 | margin: 0 auto; 134 | display: flex; 135 | max-width: 1200px; 136 | width: 100%; 137 | flex-direction: column; 138 | align-items: center; 139 | justify-content: center; 140 | text-align: center; 141 | gap: 3rem; 142 | padding: 6rem 0 4rem 0; 143 | } 144 | 145 | /* Announcement Banner */ 146 | .announcement { 147 | animation: slideInFromTop 0.8s ease-out; 148 | } 149 | 150 | .announcement a { 151 | text-decoration: none; 152 | color: inherit; 153 | } 154 | 155 | .announcement-banner { 156 | display: inline-flex; 157 | align-items: center; 158 | gap: 0.75rem; 159 | padding: 0.5rem 1.25rem; 160 | border-radius: 50px; 161 | border: 1px solid rgba(255, 255, 255, 0.08); 162 | background: rgba(0, 0, 0, 0.4); 163 | backdrop-filter: blur(10px); 164 | font-size: 0.875rem; 165 | font-weight: 500; 166 | letter-spacing: 0.5px; 167 | text-transform: uppercase; 168 | transition: all 0.3s ease; 169 | cursor: pointer; 170 | } 171 | 172 | .announcement-banner:hover { 173 | background: rgba(255, 255, 255, 0.05); 174 | border-color: rgba(255, 255, 255, 0.15); 175 | transform: translateY(-1px); 176 | } 177 | 178 | /* Main Headlines */ 179 | .main-headlines { 180 | animation: slideInFromBottom 0.8s ease-out 0.2s both; 181 | } 182 | 183 | .headline-large { 184 | font-size: clamp(2.5rem, 8vw, 6rem); 185 | font-weight: 800; 186 | line-height: 1.1; 187 | letter-spacing: -0.02em; 188 | margin-bottom: 0.5rem; 189 | background: linear-gradient(135deg, #fafaf9 0%, #d4d4d8 100%); 190 | background-clip: text; 191 | -webkit-background-clip: text; 192 | -webkit-text-fill-color: transparent; 193 | text-align: center; 194 | } 195 | 196 | @media (min-width: 640px) { 197 | .headline-large { 198 | font-size: clamp(3rem, 10vw, 7rem); 199 | } 200 | } 201 | 202 | /* Subtitle */ 203 | .subtitle { 204 | max-width: 42rem; 205 | font-size: 1.25rem; 206 | line-height: 1.6; 207 | color: #a1a1aa; 208 | font-weight: 400; 209 | margin: 0 auto; 210 | animation: slideInFromBottom 0.8s ease-out 0.4s both; 211 | } 212 | 213 | @media (min-width: 768px) { 214 | .subtitle { 215 | font-size: 1.375rem; 216 | line-height: 1.7; 217 | } 218 | } 219 | 220 | /* CTA Button */ 221 | .cta-section { 222 | animation: slideInFromBottom 0.8s ease-out 0.6s both; 223 | } 224 | 225 | .install-button { 226 | display: inline-block; 227 | text-decoration: none; 228 | background: linear-gradient(135deg, rgba(120, 119, 198, 0.15) 0%, rgba(120, 119, 198, 0.05) 100%); 229 | border: 1px solid rgba(120, 119, 198, 0.2); 230 | border-radius: 8px; 231 | padding: 1px; 232 | transition: all 0.3s ease; 233 | position: relative; 234 | overflow: hidden; 235 | } 236 | 237 | .install-button::before { 238 | content: ''; 239 | position: absolute; 240 | inset: 0; 241 | background: linear-gradient(135deg, rgba(120, 119, 198, 0.1) 0%, rgba(120, 119, 198, 0.02) 100%); 242 | opacity: 0; 243 | transition: opacity 0.3s ease; 244 | border-radius: 7px; 245 | } 246 | 247 | .install-button:hover::before { 248 | opacity: 1; 249 | } 250 | 251 | .install-button:hover { 252 | border-color: rgba(120, 119, 198, 0.3); 253 | transform: translateY(-2px); 254 | box-shadow: 0 20px 40px rgba(120, 119, 198, 0.1); 255 | } 256 | 257 | .button-content { 258 | position: relative; 259 | z-index: 1; 260 | display: flex; 261 | align-items: center; 262 | gap: 1.5rem; 263 | padding: 1rem 1.5rem; 264 | background: rgba(0, 0, 0, 0.6); 265 | border-radius: 7px; 266 | backdrop-filter: blur(10px); 267 | } 268 | 269 | .button-text { 270 | font-size: 1.25rem; 271 | font-weight: 600; 272 | color: #fafaf9; 273 | } 274 | 275 | .button-divider { 276 | width: 1px; 277 | height: 2rem; 278 | background: rgba(255, 255, 255, 0.1); 279 | } 280 | 281 | .button-icons { 282 | display: flex; 283 | gap: 1rem; 284 | } 285 | 286 | .icon-wrapper { 287 | width: 2.5rem; 288 | height: 2.5rem; 289 | display: flex; 290 | align-items: center; 291 | justify-content: center; 292 | border-radius: 6px; 293 | background: rgba(255, 255, 255, 0.05); 294 | transition: all 0.3s ease; 295 | cursor: pointer; 296 | } 297 | 298 | .icon-wrapper:hover { 299 | background: rgba(255, 255, 255, 0.1); 300 | transform: scale(1.05); 301 | } 302 | 303 | .icon-wrapper svg { 304 | width: 1.5rem; 305 | height: 1.5rem; 306 | } 307 | 308 | /* Video Container */ 309 | .video-showcase { 310 | position: relative; 311 | max-width: 900px; 312 | width: 100%; 313 | animation: slideInFromBottom 0.8s ease-out 0.8s both; 314 | } 315 | 316 | .video-frame { 317 | position: relative; 318 | background: linear-gradient(135deg, rgba(120, 119, 198, 0.1) 0%, rgba(120, 119, 198, 0.02) 100%); 319 | border: 1px solid rgba(255, 255, 255, 0.08); 320 | border-radius: 16px; 321 | padding: 1rem; 322 | backdrop-filter: blur(20px); 323 | } 324 | 325 | .video-container { 326 | position: relative; 327 | overflow: hidden; 328 | border-radius: 12px; 329 | background: #000; 330 | box-shadow: 331 | 0 25px 50px rgba(0, 0, 0, 0.5), 332 | 0 0 0 1px rgba(255, 255, 255, 0.05); 333 | } 334 | 335 | .hero-video { 336 | width: 100%; 337 | height: auto; 338 | display: block; 339 | } 340 | 341 | /* Animations */ 342 | @keyframes slideInFromTop { 343 | from { 344 | opacity: 0; 345 | transform: translateY(-30px); 346 | } 347 | to { 348 | opacity: 1; 349 | transform: translateY(0); 350 | } 351 | } 352 | 353 | @keyframes slideInFromBottom { 354 | from { 355 | opacity: 0; 356 | transform: translateY(30px); 357 | } 358 | to { 359 | opacity: 1; 360 | transform: translateY(0); 361 | } 362 | } 363 | 364 | /* Floating Elements (Desktop Only) */ 365 | .floating-ui { 366 | position: absolute; 367 | pointer-events: none; 368 | opacity: 0; 369 | transition: all 0.8s ease; 370 | } 371 | 372 | @media (min-width: 1200px) { 373 | .video-showcase { 374 | animation: slideInFromBottom 0.8s ease-out 0.8s both, floatIn 1s ease-out 1.6s both; 375 | } 376 | 377 | .floating-ui { 378 | opacity: 0.8; 379 | } 380 | } 381 | 382 | .floating-terminal { 383 | top: -10rem; 384 | right: -15rem; 385 | z-index: 5; 386 | } 387 | 388 | .floating-panel { 389 | bottom: -10rem; 390 | left: -15rem; 391 | z-index: 5; 392 | } 393 | 394 | .floating-ide { 395 | top: 50%; 396 | right: -18rem; 397 | transform: translateY(-50%); 398 | z-index: 5; 399 | } 400 | 401 | .floating-element { 402 | width: 200px; 403 | height: 300px; 404 | background: linear-gradient(135deg, rgba(120, 119, 198, 0.05) 0%, rgba(120, 119, 198, 0.01) 100%); 405 | border: 1px solid rgba(255, 255, 255, 0.05); 406 | border-radius: 12px; 407 | padding: 0.5rem; 408 | backdrop-filter: blur(20px); 409 | } 410 | 411 | .floating-element img { 412 | width: 100%; 413 | height: 100%; 414 | object-fit: cover; 415 | border-radius: 8px; 416 | opacity: 0.9; 417 | } 418 | 419 | @keyframes floatIn { 420 | from { 421 | opacity: 0; 422 | transform: scale(0.8); 423 | } 424 | to { 425 | opacity: 0.8; 426 | transform: scale(1); 427 | } 428 | } 429 | 430 | /* Responsive Adjustments */ 431 | @media (max-width: 768px) { 432 | .hero-container { 433 | gap: 2rem; 434 | padding: 4rem 0 2rem 0; 435 | } 436 | 437 | .button-content { 438 | flex-direction: column; 439 | gap: 1rem; 440 | text-align: center; 441 | } 442 | 443 | .button-divider { 444 | display: none; 445 | } 446 | 447 | .button-text { 448 | font-size: 1.125rem; 449 | } 450 | 451 | .subtitle { 452 | font-size: 1.125rem; 453 | } 454 | } 455 | </style> 456 | </head> 457 | <body> 458 | <!-- Navigation --> 459 | <nav class="navbar"> 460 | <div class="nav-container"> 461 | <a href="/" class="logo"> 462 | <svg width="20" height="20" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"> 463 | <rect x="3" y="3" width="6" height="6" fill="currentColor"/> 464 | <rect x="15" y="3" width="6" height="6" fill="currentColor"/> 465 | <rect x="3" y="15" width="6" height="6" fill="currentColor"/> 466 | <rect x="15" y="15" width="6" height="6" fill="currentColor"/> 467 | </svg> 468 | augment code 469 | </a> 470 | 471 | <ul class="nav-links"> 472 | <li><a href="/product">Product</a></li> 473 | <li><a href="/pricing">Pricing</a></li> 474 | <li><a href="/docs">Docs</a></li> 475 | <li><a href="/blog">Blog</a></li> 476 | </ul> 477 | 478 | <div class="nav-buttons"> 479 | <a href="/signin" class="nav-btn secondary">Sign in</a> 480 | <a href="/install" class="nav-btn primary">Install</a> 481 | </div> 482 | </div> 483 | </nav> 484 | 485 | <!-- Hero Section --> 486 | <section class="hero-section"> 487 | <div class="hero-container"> 488 | <!-- Announcement --> 489 | <div class="announcement"> 490 | <a href="/blog/gpt-5-is-here-and-we-now-have-a-model-picker"> 491 | <div class="announcement-banner"> 492 | <span>Now supporting GPT-5 and Sonnet 4</span> 493 | <span>→</span> 494 | </div> 495 | </a> 496 | </div> 497 | 498 | <!-- Main Headlines --> 499 | <div class="main-headlines"> 500 | <h1 class="headline-large">Better Context. Better Agent.</h1> 501 | <h1 class="headline-large">Better Code.</h1> 502 | </div> 503 | 504 | <!-- Subtitle --> 505 | <p class="subtitle"> 506 | The most powerful AI software development platform backed by the industry-leading context engine. 507 | </p> 508 | 509 | <!-- CTA Button --> 510 | <div class="cta-section"> 511 | <a href="/signup" class="install-button"> 512 | <div class="button-content"> 513 | <span class="button-text">Install now</span> 514 | <div class="button-divider"></div> 515 | <div class="button-icons"> 516 | <div class="icon-wrapper"> 517 | <svg viewBox="0 0 50 48" fill="none" xmlns="http://www.w3.org/2000/svg"> 518 | <path d="M2.355 17.08C2.355 17.08 1.2012 16.2498 2.58576 15.1412L5.8116 12.2617C5.8116 12.2617 6.73465 11.2922 7.71057 12.1369L37.4787 34.6354V45.4239C37.4787 45.4239 37.4643 47.118 35.2865 46.9309L2.355 17.08Z" fill="#2489CA"/> 519 | <path d="M10.0252 24.0346L2.35237 30.9982C2.35237 30.9982 1.56394 31.5837 2.35237 32.6299L5.91473 35.8646C5.91473 35.8646 6.76086 36.7716 8.01081 35.7398L16.1451 29.5824L10.0252 24.0346Z" fill="#1070B3"/> 520 | <path d="M23.4933 24.0917L37.5649 13.3655L37.4735 2.63458C37.4735 2.63458 36.8726 0.292582 34.8678 1.51157L16.1426 18.5246L23.4933 24.0917Z" fill="#0877B9"/> 521 | <path d="M35.2826 46.9455C36.0999 47.7806 37.0902 47.507 37.0902 47.507L48.0561 42.1127C49.4599 41.1577 49.2628 39.9723 49.2628 39.9723V7.76029C49.2628 6.34453 47.811 5.85502 47.811 5.85502L38.3065 1.28141C36.2297 2.88486e-05 34.8691 1.51177 34.8691 1.51177C34.8691 1.51177 36.6191 0.254385 37.4748 2.63477V45.2274C37.4748 45.5202 37.4123 45.8081 37.2873 46.0673C37.0373 46.5712 36.4941 47.0415 35.1912 46.8447L35.2826 46.9455Z" fill="#3C99D4"/> 522 | </svg> 523 | </div> 524 | <div class="icon-wrapper"> 525 | <svg viewBox="0 0 64 64"> 526 | <defs> 527 | <linearGradient id="jetbrains-gradient" x1=".8" x2="62.6" y1="3.3" y2="64.2" gradientTransform="matrix(1 0 0 -1 0 66)" gradientUnits="userSpaceOnUse"> 528 | <stop offset="0" stop-color="#ff9419"/> 529 | <stop offset=".4" stop-color="#ff021d"/> 530 | <stop offset="1" stop-color="#e600ff"/> 531 | </linearGradient> 532 | </defs> 533 | <path d="M20.3 3.7 3.7 20.3C1.4 22.6 0 25.8 0 29.1v29.8c0 2.8 2.2 5 5 5h29.8c3.3 0 6.5-1.3 8.8-3.7l16.7-16.7c2.3-2.3 3.7-5.5 3.7-8.8V5c0-2.8-2.2-5-5-5H29.2c-3.3 0-6.5 1.3-8.8 3.7Z" fill="url(#jetbrains-gradient)"/> 534 | <path d="M48 16H8v40h40V16Z" fill="#000"/> 535 | <path d="M30 47H13v4h17v-4Z" fill="#fff"/> 536 | </svg> 537 | </div> 538 | </div> 539 | </div> 540 | </a> 541 | </div> 542 | 543 | <!-- Video Showcase --> 544 | <div class="video-showcase"> 545 | <div class="video-frame"> 546 | <div class="video-container"> 547 | <video class="hero-video" autoplay loop muted playsinline poster=""> 548 | <source src="https://augment-assets.com/video.hevc.mp4" type="video/mp4; codecs=hvc1"> 549 | <source src="https://augment-assets.com/video.h264.mp4" type="video/mp4; codecs=avc1.4D401E"> 550 | </video> 551 | </div> 552 | </div> 553 | 554 | <!-- Floating UI Elements --> 555 | <div class="floating-ui floating-terminal"> 556 | <div class="floating-element"> 557 | <div style="width: 100%; height: 100%; background: linear-gradient(135deg, #1a1a1a 0%, #000 100%); border-radius: 8px; display: flex; align-items: center; justify-content: center; color: #00ff88; font-family: monospace; font-size: 0.75rem;">Terminal</div> 558 | </div> 559 | </div> 560 | <div class="floating-ui floating-panel"> 561 | <div class="floating-element"> 562 | <div style="width: 100%; height: 100%; background: linear-gradient(135deg, #2d2d2d 0%, #1a1a1a 100%); border-radius: 8px; display: flex; align-items: center; justify-content: center; color: #60a5fa; font-family: sans-serif; font-size: 0.75rem;">Augment Panel</div> 563 | </div> 564 | </div> 565 | <div class="floating-ui floating-ide"> 566 | <div class="floating-element"> 567 | <div style="width: 100%; height: 100%; background: linear-gradient(135deg, #ff6b35 0%, #f7931e 100%); border-radius: 8px; display: flex; align-items: center; justify-content: center; color: #fff; font-family: sans-serif; font-size: 0.75rem;">IntelliJ IDE</div> 568 | </div> 569 | </div> 570 | </div> 571 | </div> 572 | </section> 573 | </body> 574 | </html> ``` -------------------------------------------------------------------------------- /src/hook_learning_system.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Hook Learning System - AI Training and Examples 3 | 4 | This system provides examples, documentation, and learning materials for AI 5 | to understand how to create effective hook functions. 6 | """ 7 | 8 | from typing import Dict, List, Any 9 | import ast 10 | 11 | 12 | class HookLearningSystem: 13 | """System to help AI learn how to create hook functions.""" 14 | 15 | @staticmethod 16 | def get_request_object_documentation() -> Dict[str, Any]: 17 | """Get comprehensive documentation of the request object structure.""" 18 | return { 19 | "request_object": { 20 | "description": "The request object passed to hook functions", 21 | "type": "dict", 22 | "fields": { 23 | "request_id": { 24 | "type": "str", 25 | "description": "Unique identifier for this request", 26 | "example": "fetch-12345-abcde" 27 | }, 28 | "instance_id": { 29 | "type": "str", 30 | "description": "Browser instance ID that made the request", 31 | "example": "8e226b0c-3879-4d5e-96b3-db1805bfd4c4" 32 | }, 33 | "url": { 34 | "type": "str", 35 | "description": "Full URL of the request", 36 | "example": "https://example.com/api/data?param=value" 37 | }, 38 | "method": { 39 | "type": "str", 40 | "description": "HTTP method (GET, POST, PUT, DELETE, etc.)", 41 | "example": "GET" 42 | }, 43 | "headers": { 44 | "type": "dict[str, str]", 45 | "description": "Request headers as key-value pairs", 46 | "example": { 47 | "User-Agent": "Mozilla/5.0...", 48 | "Accept": "application/json", 49 | "Authorization": "Bearer token123" 50 | } 51 | }, 52 | "post_data": { 53 | "type": "str or None", 54 | "description": "POST/PUT body data (None for GET requests)", 55 | "example": '{"username": "user", "password": "pass"}' 56 | }, 57 | "resource_type": { 58 | "type": "str or None", 59 | "description": "Type of resource (Document, Script, Image, XHR, etc.)", 60 | "example": "Document" 61 | }, 62 | "stage": { 63 | "type": "str", 64 | "description": "Request stage (request or response)", 65 | "example": "request" 66 | } 67 | } 68 | }, 69 | "hook_action": { 70 | "description": "Return value from hook functions", 71 | "type": "HookAction or dict", 72 | "actions": { 73 | "continue": { 74 | "description": "Allow request to proceed normally", 75 | "example": 'HookAction(action="continue")' 76 | }, 77 | "block": { 78 | "description": "Block the request entirely", 79 | "example": 'HookAction(action="block")' 80 | }, 81 | "redirect": { 82 | "description": "Redirect request to a different URL", 83 | "fields": ["url"], 84 | "example": 'HookAction(action="redirect", url="https://httpbin.org/get")' 85 | }, 86 | "modify": { 87 | "description": "Modify request parameters", 88 | "fields": ["url", "method", "headers", "post_data"], 89 | "example": 'HookAction(action="modify", headers={"X-Custom": "value"})' 90 | }, 91 | "fulfill": { 92 | "description": "Return custom response without sending request", 93 | "fields": ["status_code", "headers", "body"], 94 | "example": 'HookAction(action="fulfill", status_code=200, body="Custom response")' 95 | } 96 | } 97 | } 98 | } 99 | 100 | @staticmethod 101 | def get_hook_examples() -> List[Dict[str, Any]]: 102 | """Get example hook functions for AI learning.""" 103 | return [ 104 | { 105 | "name": "Simple URL Blocker", 106 | "description": "Block all requests to doubleclick.net (ad blocker)", 107 | "requirements": { 108 | "url_pattern": "*doubleclick.net*" 109 | }, 110 | "function": ''' 111 | def process_request(request): 112 | # Block any request to doubleclick.net 113 | return HookAction(action="block") 114 | ''', 115 | "explanation": "This hook blocks all requests matching the URL pattern. No conditions needed since we always want to block ads." 116 | }, 117 | { 118 | "name": "Simple Redirect", 119 | "description": "Redirect example.com to httpbin.org for testing", 120 | "requirements": { 121 | "url_pattern": "*example.com*" 122 | }, 123 | "function": ''' 124 | def process_request(request): 125 | # Redirect to httpbin for testing 126 | return HookAction(action="redirect", url="https://httpbin.org/get") 127 | ''', 128 | "explanation": "This hook redirects any request to example.com to httpbin.org for testing purposes." 129 | }, 130 | { 131 | "name": "Header Modifier", 132 | "description": "Add custom headers to API requests", 133 | "requirements": { 134 | "url_pattern": "*/api/*" 135 | }, 136 | "function": ''' 137 | def process_request(request): 138 | # Add API key header to all API requests 139 | new_headers = request["headers"].copy() 140 | new_headers["X-API-Key"] = "secret-api-key-123" 141 | new_headers["X-Custom-Client"] = "Browser-Hook-System" 142 | 143 | return HookAction( 144 | action="modify", 145 | headers=new_headers 146 | ) 147 | ''', 148 | "explanation": "This hook adds custom headers to API requests. It copies existing headers and adds new ones." 149 | }, 150 | { 151 | "name": "Method Converter", 152 | "description": "Convert GET requests to POST for specific endpoints", 153 | "requirements": { 154 | "url_pattern": "*/convert-to-post*", 155 | "method": "GET" 156 | }, 157 | "function": ''' 158 | def process_request(request): 159 | # Convert GET to POST and add JSON body 160 | return HookAction( 161 | action="modify", 162 | method="POST", 163 | headers={ 164 | **request["headers"], 165 | "Content-Type": "application/json" 166 | }, 167 | post_data='{"converted": true, "original_url": "' + request["url"] + '"}' 168 | ) 169 | ''', 170 | "explanation": "This hook converts GET requests to POST, adds JSON content-type header, and includes original URL in body." 171 | }, 172 | { 173 | "name": "Custom Response Generator", 174 | "description": "Return custom JSON response for API endpoints", 175 | "requirements": { 176 | "url_pattern": "*/mock-api/*" 177 | }, 178 | "function": ''' 179 | def process_request(request): 180 | # Return mock API response 181 | mock_data = { 182 | "status": "success", 183 | "data": { 184 | "message": "This is a mocked response", 185 | "request_url": request["url"], 186 | "timestamp": datetime.now().isoformat() 187 | } 188 | } 189 | 190 | return HookAction( 191 | action="fulfill", 192 | status_code=200, 193 | headers={"Content-Type": "application/json"}, 194 | body=str(mock_data).replace("'", '"') # Convert to JSON string 195 | ) 196 | ''', 197 | "explanation": "This hook intercepts API requests and returns custom JSON responses without hitting the real server." 198 | }, 199 | { 200 | "name": "Conditional Blocker", 201 | "description": "Block requests based on multiple conditions", 202 | "requirements": { 203 | "url_pattern": "*" # Match all URLs 204 | }, 205 | "function": ''' 206 | def process_request(request): 207 | # Block requests to social media trackers during work hours 208 | social_trackers = ["facebook.com", "twitter.com", "linkedin.com", "instagram.com"] 209 | 210 | # Check if URL contains social tracker 211 | is_social_tracker = any(tracker in request["url"] for tracker in social_trackers) 212 | 213 | # Check if it's tracking related 214 | is_tracker = "/track" in request["url"] or "/analytics" in request["url"] 215 | 216 | if is_social_tracker and is_tracker: 217 | return HookAction(action="block") 218 | 219 | # Otherwise continue normally 220 | return HookAction(action="continue") 221 | ''', 222 | "explanation": "This hook uses conditional logic to block social media trackers based on URL patterns and content." 223 | }, 224 | { 225 | "name": "Dynamic URL Rewriter", 226 | "description": "Rewrite URLs based on patterns and parameters", 227 | "requirements": { 228 | "url_pattern": "*old-domain.com*" 229 | }, 230 | "function": ''' 231 | def process_request(request): 232 | original_url = request["url"] 233 | 234 | # Replace domain but keep path and parameters 235 | new_url = original_url.replace("old-domain.com", "new-domain.com") 236 | 237 | # Add cache-busting parameter 238 | separator = "&" if "?" in new_url else "?" 239 | new_url += f"{separator}cache_bust=hook_modified" 240 | 241 | return HookAction(action="redirect", url=new_url) 242 | ''', 243 | "explanation": "This hook rewrites URLs by replacing domains and adding parameters, useful for domain migrations." 244 | }, 245 | { 246 | "name": "Request Logger", 247 | "description": "Log specific requests without modifying them", 248 | "requirements": { 249 | "url_pattern": "*important-api*" 250 | }, 251 | "function": ''' 252 | def process_request(request): 253 | # Log important API calls for debugging 254 | print(f"[API LOG] {request['method']} {request['url']}") 255 | 256 | # Log headers if they contain auth info 257 | if "authorization" in str(request["headers"]).lower(): 258 | print(f"[API LOG] Has Authorization header") 259 | 260 | # Always continue the request 261 | return HookAction(action="continue") 262 | ''', 263 | "explanation": "This hook logs request details for debugging/monitoring purposes but doesn't modify the request." 264 | }, 265 | { 266 | "name": "Security Header Injector", 267 | "description": "Add security headers to outgoing requests", 268 | "requirements": { 269 | "url_pattern": "*", 270 | "custom_condition": "request['method'] in ['POST', 'PUT', 'PATCH']" 271 | }, 272 | "function": ''' 273 | def process_request(request): 274 | # Add security headers to modification requests 275 | security_headers = request["headers"].copy() 276 | security_headers.update({ 277 | "X-Requested-With": "XMLHttpRequest", 278 | "X-CSRF-Protection": "enabled", 279 | "X-Custom-Security": "browser-hook-system" 280 | }) 281 | 282 | return HookAction( 283 | action="modify", 284 | headers=security_headers 285 | ) 286 | ''', 287 | "explanation": "This hook adds security headers to POST/PUT/PATCH requests using custom conditions in requirements." 288 | }, 289 | { 290 | "name": "Response Time Simulator", 291 | "description": "Add artificial delays by fulfilling with delayed responses", 292 | "requirements": { 293 | "url_pattern": "*slow-api*" 294 | }, 295 | "function": ''' 296 | def process_request(request): 297 | # Simulate slow API by returning custom response immediately 298 | # (In real implementation, you'd add actual delays) 299 | 300 | return HookAction( 301 | action="fulfill", 302 | status_code=200, 303 | headers={"Content-Type": "application/json"}, 304 | body='{"message": "Simulated slow response", "delay": "3000ms"}' 305 | ) 306 | ''', 307 | "explanation": "This hook simulates slow APIs by immediately returning responses instead of waiting for real server." 308 | }, 309 | { 310 | "name": "Response Content Modifier", 311 | "description": "Modify response content at response stage", 312 | "requirements": { 313 | "url_pattern": "*api/*", 314 | "stage": "response" 315 | }, 316 | "function": ''' 317 | def process_request(request): 318 | # Only process responses (not requests) 319 | if request.get("stage") != "response": 320 | return HookAction(action="continue") 321 | 322 | # Get response body 323 | response_body = request.get("response_body", "") 324 | 325 | if "user_data" in response_body: 326 | # Replace sensitive data in API responses 327 | modified_body = response_body.replace( 328 | '"email":', '"email_redacted":' 329 | ).replace( 330 | '"phone":', '"phone_redacted":' 331 | ) 332 | 333 | return HookAction( 334 | action="fulfill", 335 | status_code=200, 336 | headers={"Content-Type": "application/json"}, 337 | body=modified_body 338 | ) 339 | 340 | # Continue normally if no modification needed 341 | return HookAction(action="continue") 342 | ''', 343 | "explanation": "This response-stage hook modifies API response content to redact sensitive user data." 344 | }, 345 | { 346 | "name": "Response Header Injector", 347 | "description": "Add security headers to responses at response stage", 348 | "requirements": { 349 | "url_pattern": "*", 350 | "stage": "response" 351 | }, 352 | "function": ''' 353 | def process_request(request): 354 | # Only process responses 355 | if request.get("stage") != "response": 356 | return HookAction(action="continue") 357 | 358 | # Add security headers to all responses 359 | security_headers = { 360 | "X-Content-Type-Options": "nosniff", 361 | "X-Frame-Options": "DENY", 362 | "X-XSS-Protection": "1; mode=block", 363 | "Strict-Transport-Security": "max-age=31536000" 364 | } 365 | 366 | # Merge with existing headers 367 | current_headers = request.get("response_headers", {}) 368 | merged_headers = {**current_headers, **security_headers} 369 | 370 | return HookAction( 371 | action="modify", 372 | headers=merged_headers 373 | ) 374 | ''', 375 | "explanation": "This response-stage hook adds security headers to all responses for better protection." 376 | }, 377 | { 378 | "name": "API Response Faker", 379 | "description": "Replace API responses with fake data for testing", 380 | "requirements": { 381 | "url_pattern": "*api/users*", 382 | "stage": "response" 383 | }, 384 | "function": ''' 385 | def process_request(request): 386 | # Only process responses 387 | if request.get("stage") != "response": 388 | return HookAction(action="continue") 389 | 390 | # Generate fake user data for testing 391 | fake_response = { 392 | "users": [ 393 | {"id": 1, "name": "Test User 1", "email": "[email protected]"}, 394 | {"id": 2, "name": "Test User 2", "email": "[email protected]"}, 395 | {"id": 3, "name": "Test User 3", "email": "[email protected]"} 396 | ], 397 | "total": 3, 398 | "fake": True 399 | } 400 | 401 | return HookAction( 402 | action="fulfill", 403 | status_code=200, 404 | headers={"Content-Type": "application/json"}, 405 | body=str(fake_response).replace("'", '"') 406 | ) 407 | ''', 408 | "explanation": "This response-stage hook replaces real API responses with fake data for testing environments." 409 | } 410 | ] 411 | 412 | @staticmethod 413 | def get_requirements_documentation() -> Dict[str, Any]: 414 | """Get documentation on hook requirements/matching criteria.""" 415 | return { 416 | "requirements": { 417 | "description": "Criteria that determine when a hook should trigger", 418 | "fields": { 419 | "url_pattern": { 420 | "type": "str", 421 | "description": "Wildcard pattern to match URLs (* = any characters, ? = single character)", 422 | "examples": [ 423 | "*example.com*", # Any URL containing example.com 424 | "https://api.*.com/*", # Any subdomain of .com domains 425 | "*api/v*/users*", # API versioned endpoints 426 | "*.jpg", # Image files 427 | "*doubleclick*" # Ad networks 428 | ] 429 | }, 430 | "method": { 431 | "type": "str", 432 | "description": "HTTP method to match (GET, POST, PUT, DELETE, etc.)", 433 | "examples": ["GET", "POST", "PUT", "DELETE"] 434 | }, 435 | "resource_type": { 436 | "type": "str", 437 | "description": "Type of resource to match", 438 | "examples": ["Document", "Script", "Image", "XHR", "Fetch", "WebSocket"] 439 | }, 440 | "stage": { 441 | "type": "str", 442 | "description": "Stage of request processing (request = before sending, response = after receiving headers/body)", 443 | "examples": ["request", "response"], 444 | "note": "Response stage hooks can access response_body, response_status_code, and response_headers" 445 | }, 446 | "custom_condition": { 447 | "type": "str", 448 | "description": "Python expression evaluated with 'request' variable", 449 | "examples": [ 450 | "len(request['headers']) > 10", 451 | "'json' in request['headers'].get('Content-Type', '')", 452 | "request['method'] in ['POST', 'PUT']", 453 | "'auth' in request['url'].lower()" 454 | ] 455 | } 456 | } 457 | }, 458 | "best_practices": [ 459 | "Use specific URL patterns to avoid over-matching", 460 | "Include method filters for POST/PUT hooks to avoid affecting GET requests", 461 | "Use custom conditions for complex matching logic", 462 | "Test hooks with console logging before deploying", 463 | "Always return a HookAction object", 464 | "Handle exceptions gracefully", 465 | "Use priority (lower = higher priority) to control hook execution order" 466 | ] 467 | } 468 | 469 | @staticmethod 470 | def get_common_patterns() -> List[Dict[str, Any]]: 471 | """Get common hook patterns and use cases.""" 472 | return [ 473 | { 474 | "pattern": "Ad Blocker", 475 | "requirements": {"url_pattern": "*ads*|*analytics*|*tracking*"}, 476 | "action": "block", 477 | "use_case": "Block advertising and tracking requests" 478 | }, 479 | { 480 | "pattern": "API Proxy", 481 | "requirements": {"url_pattern": "*api.old-site.com*"}, 482 | "action": "redirect", 483 | "use_case": "Redirect API calls to new endpoints" 484 | }, 485 | { 486 | "pattern": "Authentication Injector", 487 | "requirements": {"url_pattern": "*api/*", "method": "GET|POST"}, 488 | "action": "modify", 489 | "use_case": "Add authentication headers to API requests" 490 | }, 491 | { 492 | "pattern": "Mock Server", 493 | "requirements": {"url_pattern": "*mock/*"}, 494 | "action": "fulfill", 495 | "use_case": "Return custom responses for testing" 496 | }, 497 | { 498 | "pattern": "Request Logger", 499 | "requirements": {"url_pattern": "*"}, 500 | "action": "continue", 501 | "use_case": "Log requests for debugging without modification" 502 | }, 503 | { 504 | "pattern": "Security Headers", 505 | "requirements": {"method": "POST|PUT|PATCH"}, 506 | "action": "modify", 507 | "use_case": "Add security headers to modification requests" 508 | } 509 | ] 510 | 511 | @staticmethod 512 | def validate_hook_function(function_code: str) -> Dict[str, Any]: 513 | """Validate hook function code for common issues.""" 514 | issues = [] 515 | warnings = [] 516 | 517 | try: 518 | # Parse the function code 519 | parsed = ast.parse(function_code) 520 | 521 | # Check for required function 522 | has_process_request = False 523 | for node in ast.walk(parsed): 524 | if isinstance(node, ast.FunctionDef) and node.name == "process_request": 525 | has_process_request = True 526 | 527 | # Check function parameters 528 | if len(node.args.args) != 1: 529 | issues.append("process_request function must take exactly one parameter (request)") 530 | elif node.args.args[0].arg != "request": 531 | warnings.append("First parameter should be named 'request' for clarity") 532 | 533 | if not has_process_request: 534 | issues.append("Function must define 'process_request(request)' function") 535 | 536 | # Check for dangerous operations 537 | dangerous_nodes = [] 538 | for node in ast.walk(parsed): 539 | if isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom): 540 | warnings.append(f"Imports may not work in hook context: {ast.dump(node)}") 541 | elif isinstance(node, ast.Call) and isinstance(node.func, ast.Name): 542 | if node.func.id in ['eval', 'exec', 'open', 'input']: 543 | issues.append(f"Dangerous function call: {node.func.id}") 544 | 545 | return { 546 | "valid": len(issues) == 0, 547 | "issues": issues, 548 | "warnings": warnings 549 | } 550 | 551 | except SyntaxError as e: 552 | return { 553 | "valid": False, 554 | "issues": [f"Syntax error: {e}"], 555 | "warnings": [] 556 | } 557 | except Exception as e: 558 | return { 559 | "valid": False, 560 | "issues": [f"Parse error: {e}"], 561 | "warnings": [] 562 | } 563 | 564 | 565 | # Global instance 566 | hook_learning_system = HookLearningSystem() ``` -------------------------------------------------------------------------------- /src/dynamic_hook_system.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Dynamic Hook System - AI-Generated Request Hooks 3 | 4 | This system allows AI to create custom hook functions that process network requests 5 | in real-time with no pending state. Hooks are Python functions generated by AI 6 | that can modify, block, redirect, or fulfill requests dynamically. 7 | """ 8 | 9 | import asyncio 10 | import uuid 11 | import fnmatch 12 | from datetime import datetime 13 | from typing import Dict, List, Any, Callable, Optional, Union 14 | from dataclasses import dataclass, asdict 15 | import nodriver as uc 16 | from debug_logger import debug_logger 17 | import ast 18 | import sys 19 | from io import StringIO 20 | import contextlib 21 | 22 | 23 | @dataclass 24 | class RequestInfo: 25 | """Request information passed to hook functions.""" 26 | request_id: str 27 | instance_id: str 28 | url: str 29 | method: str 30 | headers: Dict[str, str] 31 | post_data: Optional[str] = None 32 | resource_type: Optional[str] = None 33 | stage: str = "request" # "request" or "response" 34 | 35 | def to_dict(self) -> Dict[str, Any]: 36 | """Convert to dictionary for AI function processing.""" 37 | return asdict(self) 38 | 39 | 40 | @dataclass 41 | class HookAction: 42 | """Action returned by hook functions.""" 43 | action: str # "continue", "block", "redirect", "fulfill", "modify" 44 | url: Optional[str] = None # For redirect/modify 45 | method: Optional[str] = None # For modify 46 | headers: Optional[Dict[str, str]] = None # For modify/fulfill 47 | body: Optional[str] = None # For fulfill 48 | status_code: Optional[int] = None # For fulfill 49 | post_data: Optional[str] = None # For modify 50 | 51 | 52 | class DynamicHook: 53 | """A dynamic hook with AI-generated function.""" 54 | 55 | def __init__(self, hook_id: str, name: str, requirements: Dict[str, Any], 56 | function_code: str, priority: int = 100): 57 | self.hook_id = hook_id 58 | self.name = name 59 | self.requirements = requirements 60 | self.function_code = function_code 61 | self.priority = priority # Lower number = higher priority 62 | self.created_at = datetime.now() 63 | self.trigger_count = 0 64 | self.last_triggered: Optional[datetime] = None 65 | self.status = "active" 66 | self.request_stage = requirements.get('stage', 'request') # 'request' or 'response' 67 | 68 | self._compiled_function = self._compile_function() 69 | 70 | def _compile_function(self) -> Callable: 71 | """Compile the AI-generated function.""" 72 | try: 73 | namespace = { 74 | 'HookAction': HookAction, 75 | 'datetime': datetime, 76 | 'fnmatch': fnmatch, 77 | '__builtins__': { 78 | 'len': len, 'str': str, 'int': int, 'float': float, 79 | 'bool': bool, 'dict': dict, 'list': list, 'tuple': tuple, 80 | 'print': lambda *args: debug_logger.log_info("hook_function", self.name, " ".join(map(str, args))) 81 | } 82 | } 83 | 84 | exec(self.function_code, namespace) 85 | 86 | if 'process_request' not in namespace: 87 | raise ValueError("Function must define 'process_request(request)'") 88 | 89 | return namespace['process_request'] 90 | 91 | except Exception as e: 92 | debug_logger.log_error("dynamic_hook", "compile_function", f"Failed to compile function for hook {self.name}: {e}") 93 | return lambda request: HookAction(action="continue") 94 | 95 | def matches(self, request: RequestInfo) -> bool: 96 | """Check if this hook matches the request.""" 97 | try: 98 | # Check URL pattern 99 | if 'url_pattern' in self.requirements: 100 | if not fnmatch.fnmatch(request.url, self.requirements['url_pattern']): 101 | return False 102 | 103 | # Check method 104 | if 'method' in self.requirements: 105 | if request.method.upper() != self.requirements['method'].upper(): 106 | return False 107 | 108 | # Check resource type 109 | if 'resource_type' in self.requirements: 110 | if request.resource_type != self.requirements['resource_type']: 111 | return False 112 | 113 | # Check stage 114 | if 'stage' in self.requirements: 115 | if request.stage != self.requirements['stage']: 116 | return False 117 | 118 | # Check custom conditions (if any) 119 | if 'custom_condition' in self.requirements: 120 | condition_code = self.requirements['custom_condition'] 121 | namespace = {'request': request, '__builtins__': {'len': len, 'str': str}} 122 | try: 123 | result = eval(condition_code, namespace) 124 | if not result: 125 | return False 126 | except: 127 | return False 128 | 129 | return True 130 | 131 | except Exception as e: 132 | debug_logger.log_error("dynamic_hook", "matches", f"Error matching hook {self.name}: {e}") 133 | return False 134 | 135 | def process(self, request: RequestInfo) -> HookAction: 136 | """Execute the hook function.""" 137 | try: 138 | self.trigger_count += 1 139 | self.last_triggered = datetime.now() 140 | 141 | debug_logger.log_info("dynamic_hook", "process", f"Processing request {request.url} with hook {self.name}") 142 | 143 | result = self._compiled_function(request.to_dict()) 144 | 145 | if isinstance(result, dict): 146 | result = HookAction(**result) 147 | elif not isinstance(result, HookAction): 148 | debug_logger.log_error("dynamic_hook", "process", f"Hook {self.name} returned invalid type: {type(result)}") 149 | return HookAction(action="continue") 150 | 151 | debug_logger.log_info("dynamic_hook", "process", f"Hook {self.name} returned action: {result.action}") 152 | return result 153 | 154 | except Exception as e: 155 | debug_logger.log_error("dynamic_hook", "process", f"Error executing hook {self.name}: {e}") 156 | return HookAction(action="continue") 157 | 158 | 159 | class DynamicHookSystem: 160 | """Real-time dynamic hook processing system.""" 161 | 162 | def __init__(self): 163 | self.hooks: Dict[str, DynamicHook] = {} 164 | self.instance_hooks: Dict[str, List[str]] = {} # instance_id -> list of hook_ids 165 | self._lock = asyncio.Lock() 166 | 167 | async def setup_interception(self, tab, instance_id: str): 168 | """Set up request and response interception for a browser tab.""" 169 | try: 170 | all_hooks = [] 171 | 172 | instance_hook_ids = self.instance_hooks.get(instance_id, []) 173 | for hook_id in instance_hook_ids: 174 | hook = self.hooks.get(hook_id) 175 | if hook and hook.status == "active": 176 | all_hooks.append(hook) 177 | 178 | for hook_id, hook in self.hooks.items(): 179 | if hook.status == "active" and hook_id not in instance_hook_ids: 180 | if not hasattr(hook, 'instance_ids') or not hook.instance_ids: 181 | all_hooks.append(hook) 182 | 183 | request_patterns = [] 184 | response_patterns = [] 185 | 186 | for hook in all_hooks: 187 | url_pattern = hook.requirements.get('url_pattern', '*') 188 | resource_type = hook.requirements.get('resource_type') 189 | stage = hook.request_stage 190 | 191 | if stage == 'response': 192 | pattern = uc.cdp.fetch.RequestPattern( 193 | url_pattern=url_pattern, 194 | resource_type=getattr(uc.cdp.network.ResourceType, resource_type.upper()) if resource_type else None, 195 | request_stage=uc.cdp.fetch.RequestStage.RESPONSE 196 | ) 197 | response_patterns.append(pattern) 198 | else: 199 | pattern = uc.cdp.fetch.RequestPattern( 200 | url_pattern=url_pattern, 201 | resource_type=getattr(uc.cdp.network.ResourceType, resource_type.upper()) if resource_type else None, 202 | request_stage=uc.cdp.fetch.RequestStage.REQUEST 203 | ) 204 | request_patterns.append(pattern) 205 | 206 | all_patterns = request_patterns + response_patterns 207 | 208 | if not all_patterns: 209 | all_patterns = [ 210 | uc.cdp.fetch.RequestPattern(url_pattern='*', request_stage=uc.cdp.fetch.RequestStage.REQUEST), 211 | uc.cdp.fetch.RequestPattern(url_pattern='*', request_stage=uc.cdp.fetch.RequestStage.RESPONSE) 212 | ] 213 | 214 | await tab.send(uc.cdp.fetch.enable(patterns=all_patterns)) 215 | 216 | tab.add_handler( 217 | uc.cdp.fetch.RequestPaused, 218 | lambda event: asyncio.create_task(self._on_request_paused(tab, event, instance_id)) 219 | ) 220 | 221 | debug_logger.log_info("dynamic_hook_system", "setup_interception", f"Set up interception for instance {instance_id} with {len(all_patterns)} patterns ({len(request_patterns)} request, {len(response_patterns)} response)") 222 | 223 | except Exception as e: 224 | debug_logger.log_error("dynamic_hook_system", "setup_interception", f"Failed to setup interception: {e}") 225 | 226 | async def _on_request_paused(self, tab, event, instance_id: str): 227 | """Handle intercepted requests and responses - process hooks immediately.""" 228 | try: 229 | # Determine if this is request stage or response stage 230 | # According to nodriver docs: "The stage of the request can be determined by presence of responseErrorReason 231 | # and responseStatusCode -- the request is at the response stage if either of these fields is present" 232 | is_response_stage = (hasattr(event, 'response_status_code') and event.response_status_code is not None) or \ 233 | (hasattr(event, 'response_error_reason') and event.response_error_reason is not None) 234 | 235 | stage = "response" if is_response_stage else "request" 236 | 237 | request = RequestInfo( 238 | request_id=str(event.request_id), 239 | instance_id=instance_id, 240 | url=event.request.url, 241 | method=event.request.method, 242 | headers=dict(event.request.headers) if hasattr(event.request, 'headers') else {}, 243 | post_data=event.request.post_data if hasattr(event.request, 'post_data') else None, 244 | resource_type=str(event.resource_type) if hasattr(event, 'resource_type') else None, 245 | stage=stage 246 | ) 247 | 248 | debug_logger.log_info("dynamic_hook_system", "_on_request_paused", f"Intercepted {stage}: {request.method} {request.url}") 249 | 250 | if is_response_stage and hasattr(event, 'response_status_code'): 251 | debug_logger.log_info("dynamic_hook_system", "_on_request_paused", f"Response status: {event.response_status_code}") 252 | 253 | await self._process_request_hooks(tab, request, event) 254 | 255 | except Exception as e: 256 | debug_logger.log_error("dynamic_hook_system", "_on_request_paused", f"Error processing {stage if 'stage' in locals() else 'request'}: {e}") 257 | try: 258 | await tab.send(uc.cdp.fetch.continue_request(request_id=event.request_id)) 259 | except: 260 | pass 261 | 262 | async def _process_request_hooks(self, tab, request: RequestInfo, event=None): 263 | """Process hooks for a request/response in real-time with priority chain processing.""" 264 | try: 265 | instance_hook_ids = self.instance_hooks.get(request.instance_id, []) 266 | 267 | matching_hooks = [] 268 | for hook_id in instance_hook_ids: 269 | hook = self.hooks.get(hook_id) 270 | if hook and hook.status == "active" and hook.request_stage == request.stage and hook.matches(request): 271 | matching_hooks.append(hook) 272 | 273 | matching_hooks.sort(key=lambda h: h.priority) 274 | 275 | if not matching_hooks: 276 | debug_logger.log_info("dynamic_hook_system", "_process_request_hooks", f"No matching hooks for {request.stage} stage: {request.url}") 277 | if request.stage == "response": 278 | await tab.send(uc.cdp.fetch.continue_response(request_id=uc.cdp.fetch.RequestId(request.request_id))) 279 | else: 280 | await tab.send(uc.cdp.fetch.continue_request(request_id=uc.cdp.fetch.RequestId(request.request_id))) 281 | return 282 | 283 | debug_logger.log_info("dynamic_hook_system", "_process_request_hooks", f"Found {len(matching_hooks)} matching hooks for {request.stage} stage: {request.url}") 284 | 285 | response_body = None 286 | if request.stage == "response" and event: 287 | try: 288 | body_result = await tab.send(uc.cdp.fetch.get_response_body(request_id=uc.cdp.fetch.RequestId(request.request_id))) 289 | response_body = body_result[0] # body content 290 | debug_logger.log_info("dynamic_hook_system", "_process_request_hooks", f"Retrieved response body ({len(response_body)} chars)") 291 | except Exception as e: 292 | debug_logger.log_error("dynamic_hook_system", "_process_request_hooks", f"Failed to get response body: {e}") 293 | 294 | hook = matching_hooks[0] 295 | 296 | request_data = request.to_dict() 297 | if response_body: 298 | request_data['response_body'] = response_body 299 | request_data['response_status_code'] = getattr(event, 'response_status_code', None) 300 | response_headers = {} 301 | if hasattr(event, 'response_headers') and event.response_headers: 302 | try: 303 | if isinstance(event.response_headers, dict): 304 | response_headers = event.response_headers 305 | elif hasattr(event.response_headers, 'items'): 306 | for header in event.response_headers: 307 | if hasattr(header, 'name') and hasattr(header, 'value'): 308 | response_headers[header.name] = header.value 309 | else: 310 | response_headers = {} 311 | except Exception: 312 | response_headers = {} 313 | request_data['response_headers'] = response_headers 314 | 315 | action = hook._compiled_function(request_data) 316 | if isinstance(action, dict): 317 | action = HookAction(**action) 318 | 319 | hook.trigger_count += 1 320 | hook.last_triggered = datetime.now() 321 | 322 | debug_logger.log_info("dynamic_hook_system", "_process_request_hooks", f"Hook {hook.name} returned action: {action.action}") 323 | 324 | await self._execute_hook_action(tab, request, action, event if request.stage == "response" else None) 325 | 326 | except Exception as e: 327 | debug_logger.log_error("dynamic_hook_system", "_process_request_hooks", f"Error processing hooks: {e}") 328 | try: 329 | if request.stage == "response": 330 | await tab.send(uc.cdp.fetch.continue_response(request_id=uc.cdp.fetch.RequestId(request.request_id))) 331 | else: 332 | await tab.send(uc.cdp.fetch.continue_request(request_id=uc.cdp.fetch.RequestId(request.request_id))) 333 | except: 334 | pass 335 | 336 | async def create_hook(self, name: str, requirements: Dict[str, Any], function_code: str, 337 | instance_ids: Optional[List[str]] = None, priority: int = 100) -> str: 338 | """Create a new dynamic hook.""" 339 | try: 340 | hook_id = str(uuid.uuid4()) 341 | hook = DynamicHook(hook_id, name, requirements, function_code, priority) 342 | 343 | async with self._lock: 344 | self.hooks[hook_id] = hook 345 | 346 | if instance_ids: 347 | for instance_id in instance_ids: 348 | if instance_id not in self.instance_hooks: 349 | self.instance_hooks[instance_id] = [] 350 | self.instance_hooks[instance_id].append(hook_id) 351 | else: 352 | for instance_id in self.instance_hooks: 353 | self.instance_hooks[instance_id].append(hook_id) 354 | 355 | debug_logger.log_info("dynamic_hook_system", "create_hook", f"Created hook {name} with ID {hook_id}") 356 | return hook_id 357 | 358 | except Exception as e: 359 | debug_logger.log_error("dynamic_hook_system", "create_hook", f"Failed to create hook {name}: {e}") 360 | raise 361 | 362 | def list_hooks(self) -> List[Dict[str, Any]]: 363 | """List all hooks.""" 364 | return [ 365 | { 366 | "hook_id": hook.hook_id, 367 | "name": hook.name, 368 | "requirements": hook.requirements, 369 | "priority": hook.priority, 370 | "status": hook.status, 371 | "trigger_count": hook.trigger_count, 372 | "last_triggered": hook.last_triggered.isoformat() if hook.last_triggered else None, 373 | "created_at": hook.created_at.isoformat() 374 | } 375 | for hook in self.hooks.values() 376 | ] 377 | 378 | def get_hook_details(self, hook_id: str) -> Optional[Dict[str, Any]]: 379 | """Get detailed hook information.""" 380 | hook = self.hooks.get(hook_id) 381 | if not hook: 382 | return None 383 | 384 | return { 385 | "hook_id": hook.hook_id, 386 | "name": hook.name, 387 | "requirements": hook.requirements, 388 | "function_code": hook.function_code, 389 | "priority": hook.priority, 390 | "status": hook.status, 391 | "trigger_count": hook.trigger_count, 392 | "last_triggered": hook.last_triggered.isoformat() if hook.last_triggered else None, 393 | "created_at": hook.created_at.isoformat() 394 | } 395 | 396 | async def remove_hook(self, hook_id: str) -> bool: 397 | """Remove a hook.""" 398 | try: 399 | async with self._lock: 400 | if hook_id in self.hooks: 401 | del self.hooks[hook_id] 402 | 403 | for instance_id in self.instance_hooks: 404 | if hook_id in self.instance_hooks[instance_id]: 405 | self.instance_hooks[instance_id].remove(hook_id) 406 | 407 | debug_logger.log_info("dynamic_hook_system", "remove_hook", f"Removed hook {hook_id}") 408 | return True 409 | 410 | return False 411 | 412 | except Exception as e: 413 | debug_logger.log_error("dynamic_hook_system", "remove_hook", f"Failed to remove hook {hook_id}: {e}") 414 | return False 415 | 416 | def add_instance(self, instance_id: str): 417 | """Add a new browser instance.""" 418 | if instance_id not in self.instance_hooks: 419 | self.instance_hooks[instance_id] = [] 420 | 421 | async def _execute_hook_action(self, tab, request: RequestInfo, action: HookAction, event=None): 422 | """Execute a hook action for either request or response stage.""" 423 | try: 424 | request_id = uc.cdp.fetch.RequestId(request.request_id) 425 | 426 | if action.action == "block": 427 | await tab.send(uc.cdp.fetch.fail_request( 428 | request_id=request_id, 429 | error_reason=uc.cdp.network.ErrorReason.BLOCKED_BY_CLIENT 430 | )) 431 | debug_logger.log_info("dynamic_hook_system", "_execute_hook_action", f"Blocked {request.stage} {request.url}") 432 | 433 | elif action.action == "fulfill": 434 | headers = [] 435 | if action.headers: 436 | for name, value in action.headers.items(): 437 | headers.append(uc.cdp.fetch.HeaderEntry(name=name, value=value)) 438 | 439 | import base64 440 | body_bytes = (action.body or "").encode('utf-8') 441 | body_base64 = base64.b64encode(body_bytes).decode('ascii') 442 | 443 | await tab.send(uc.cdp.fetch.fulfill_request( 444 | request_id=request_id, 445 | response_code=action.status_code or 200, 446 | response_headers=headers, 447 | body=body_base64 448 | )) 449 | debug_logger.log_info("dynamic_hook_system", "_execute_hook_action", f"Fulfilled {request.stage} {request.url}") 450 | 451 | elif action.action == "redirect" and request.stage == "request": 452 | await tab.send(uc.cdp.fetch.continue_request( 453 | request_id=request_id, 454 | url=action.url 455 | )) 456 | debug_logger.log_info("dynamic_hook_system", "_execute_hook_action", f"Redirected request {request.url} to {action.url}") 457 | 458 | elif action.action == "modify": 459 | if request.stage == "response": 460 | response_headers = [] 461 | if action.headers: 462 | for name, value in action.headers.items(): 463 | response_headers.append(uc.cdp.fetch.HeaderEntry(name=name, value=value)) 464 | 465 | await tab.send(uc.cdp.fetch.continue_response( 466 | request_id=request_id, 467 | response_code=action.status_code, 468 | response_headers=response_headers if response_headers else None 469 | )) 470 | debug_logger.log_info("dynamic_hook_system", "_execute_hook_action", f"Modified response for {request.url}") 471 | else: 472 | headers = [] 473 | if action.headers: 474 | for name, value in action.headers.items(): 475 | headers.append(uc.cdp.fetch.HeaderEntry(name=name, value=value)) 476 | 477 | await tab.send(uc.cdp.fetch.continue_request( 478 | request_id=request_id, 479 | url=action.url or request.url, 480 | method=action.method or request.method, 481 | headers=headers if headers else None, 482 | post_data=action.post_data 483 | )) 484 | debug_logger.log_info("dynamic_hook_system", "_execute_hook_action", f"Modified request {request.url}") 485 | 486 | else: 487 | if request.stage == "response": 488 | await tab.send(uc.cdp.fetch.continue_response(request_id=request_id)) 489 | debug_logger.log_info("dynamic_hook_system", "_execute_hook_action", f"Continued response {request.url}") 490 | else: 491 | await tab.send(uc.cdp.fetch.continue_request(request_id=request_id)) 492 | debug_logger.log_info("dynamic_hook_system", "_execute_hook_action", f"Continued request {request.url}") 493 | 494 | except Exception as e: 495 | debug_logger.log_error("dynamic_hook_system", "_execute_hook_action", f"Error executing {request.stage} action: {e}") 496 | try: 497 | if request.stage == "response": 498 | await tab.send(uc.cdp.fetch.continue_response(request_id=uc.cdp.fetch.RequestId(request.request_id))) 499 | else: 500 | await tab.send(uc.cdp.fetch.continue_request(request_id=uc.cdp.fetch.RequestId(request.request_id))) 501 | except: 502 | pass 503 | 504 | 505 | dynamic_hook_system = DynamicHookSystem() ``` -------------------------------------------------------------------------------- /src/dom_handler.py: -------------------------------------------------------------------------------- ```python 1 | """DOM manipulation and element interaction utilities.""" 2 | 3 | import asyncio 4 | import time 5 | from typing import List, Optional, Dict, Any 6 | 7 | from nodriver import Tab, Element 8 | from models import ElementInfo, ElementAction 9 | from debug_logger import debug_logger 10 | 11 | 12 | 13 | class DOMHandler: 14 | """Handles DOM queries and element interactions.""" 15 | 16 | @staticmethod 17 | async def query_elements( 18 | tab: Tab, 19 | selector: str, 20 | text_filter: Optional[str] = None, 21 | visible_only: bool = True, 22 | limit: Optional[Any] = None 23 | ) -> List[ElementInfo]: 24 | """ 25 | Query elements with advanced filtering. 26 | 27 | Args: 28 | tab (Tab): The browser tab object. 29 | selector (str): CSS or XPath selector for elements. 30 | text_filter (Optional[str]): Filter elements by text content. 31 | visible_only (bool): Only include visible elements. 32 | limit (Optional[Any]): Limit the number of results. 33 | 34 | Returns: 35 | List[ElementInfo]: List of element information objects. 36 | """ 37 | processed_limit = None 38 | if limit is not None: 39 | try: 40 | if isinstance(limit, int): 41 | processed_limit = limit 42 | elif isinstance(limit, str) and limit.isdigit(): 43 | processed_limit = int(limit) 44 | elif isinstance(limit, str) and limit.strip() == '': 45 | processed_limit = None 46 | else: 47 | debug_logger.log_warning('DOMHandler', 'query_elements', 48 | f'Invalid limit parameter: {limit} (type: {type(limit)})') 49 | processed_limit = None 50 | except (ValueError, TypeError) as e: 51 | debug_logger.log_error('DOMHandler', 'query_elements', e, 52 | {'limit_value': limit, 'limit_type': type(limit)}) 53 | processed_limit = None 54 | 55 | debug_logger.log_info('DOMHandler', 'query_elements', 56 | f'Starting query with selector: {selector}', 57 | {'text_filter': text_filter, 'visible_only': visible_only, 58 | 'limit': limit, 'processed_limit': processed_limit}) 59 | try: 60 | if selector.startswith('//'): 61 | elements = await tab.select_all(f'xpath={selector}') 62 | debug_logger.log_info('DOMHandler', 'query_elements', 63 | f'XPath query returned {len(elements)} elements') 64 | else: 65 | elements = await tab.select_all(selector) 66 | debug_logger.log_info('DOMHandler', 'query_elements', 67 | f'CSS query returned {len(elements)} elements') 68 | 69 | results = [] 70 | for idx, elem in enumerate(elements): 71 | try: 72 | debug_logger.log_info('DOMHandler', 'query_elements', 73 | f'Processing element {idx+1}/{len(elements)}') 74 | 75 | if hasattr(elem, 'update'): 76 | await elem.update() 77 | debug_logger.log_info('DOMHandler', 'query_elements', 78 | f'Element {idx+1} updated') 79 | 80 | tag_name = elem.tag_name if hasattr(elem, 'tag_name') else 'unknown' 81 | text_content = elem.text_all if hasattr(elem, 'text_all') else '' 82 | attrs = elem.attrs if hasattr(elem, 'attrs') else {} 83 | 84 | debug_logger.log_info('DOMHandler', 'query_elements', 85 | f'Element {idx+1}: tag={tag_name}, text_len={len(text_content)}, attrs={len(attrs)}') 86 | 87 | if text_filter and text_filter.lower() not in text_content.lower(): 88 | continue 89 | 90 | is_visible = True 91 | if visible_only: 92 | try: 93 | is_visible = await elem.apply( 94 | """(elem) => { 95 | var style = window.getComputedStyle(elem); 96 | return style.display !== 'none' && 97 | style.visibility !== 'hidden' && 98 | style.opacity !== '0'; 99 | }""" 100 | ) 101 | if not is_visible: 102 | continue 103 | except: 104 | pass 105 | 106 | bbox = None 107 | try: 108 | position = await elem.get_position() 109 | if position: 110 | bbox = { 111 | 'x': position.x, 112 | 'y': position.y, 113 | 'width': position.width, 114 | 'height': position.height 115 | } 116 | debug_logger.log_info('DOMHandler', 'query_elements', 117 | f'Element {idx+1} position: {bbox}') 118 | except Exception as pos_error: 119 | debug_logger.log_warning('DOMHandler', 'query_elements', 120 | f'Could not get position for element {idx+1}: {pos_error}') 121 | 122 | is_clickable = False 123 | 124 | children_count = 0 125 | try: 126 | if hasattr(elem, 'children'): 127 | children = elem.children 128 | children_count = len(children) if children else 0 129 | except Exception: 130 | pass 131 | 132 | element_info = ElementInfo( 133 | selector=selector, 134 | tag_name=tag_name, 135 | text=text_content[:500] if text_content else None, 136 | attributes=attrs or {}, 137 | is_visible=is_visible, 138 | is_clickable=is_clickable, 139 | bounding_box=bbox, 140 | children_count=children_count 141 | ) 142 | 143 | results.append(element_info) 144 | 145 | if processed_limit and len(results) >= processed_limit: 146 | debug_logger.log_info('DOMHandler', 'query_elements', 147 | f'Reached limit of {processed_limit} results') 148 | break 149 | 150 | except Exception as elem_error: 151 | debug_logger.log_error('DOMHandler', 'query_elements', 152 | elem_error, 153 | {'element_index': idx, 'selector': selector}) 154 | continue 155 | 156 | debug_logger.log_info('DOMHandler', 'query_elements', 157 | f'Returning {len(results)} results') 158 | return results 159 | 160 | except Exception as e: 161 | debug_logger.log_error('DOMHandler', 'query_elements', e, 162 | {'selector': selector, 'tab': str(tab)}) 163 | return [] 164 | 165 | @staticmethod 166 | async def click_element( 167 | tab: Tab, 168 | selector: str, 169 | text_match: Optional[str] = None, 170 | timeout: int = 10000 171 | ) -> bool: 172 | """ 173 | Click an element with smart retry logic. 174 | 175 | Args: 176 | tab (Tab): The browser tab object. 177 | selector (str): CSS selector for the element. 178 | text_match (Optional[str]): Match element by text content. 179 | timeout (int): Timeout in milliseconds. 180 | 181 | Returns: 182 | bool: True if click succeeded, False otherwise. 183 | """ 184 | try: 185 | element = None 186 | 187 | if text_match: 188 | element = await tab.find(text_match, best_match=True) 189 | else: 190 | element = await tab.select(selector, timeout=timeout/1000) 191 | 192 | if not element: 193 | raise Exception(f"Element not found: {selector}") 194 | 195 | await element.scroll_into_view() 196 | await asyncio.sleep(0.5) 197 | 198 | try: 199 | await element.click() 200 | except Exception: 201 | await element.mouse_click() 202 | 203 | return True 204 | 205 | except Exception as e: 206 | raise Exception(f"Failed to click element: {str(e)}") 207 | 208 | @staticmethod 209 | async def type_text( 210 | tab: Tab, 211 | selector: str, 212 | text: str, 213 | clear_first: bool = True, 214 | delay_ms: int = 50, 215 | parse_newlines: bool = False, 216 | shift_enter: bool = False 217 | ) -> bool: 218 | """ 219 | Type text with human-like delays and optional newline parsing. 220 | 221 | Args: 222 | tab (Tab): The browser tab object. 223 | selector (str): CSS selector for the input element. 224 | text (str): Text to type. 225 | clear_first (bool): Clear input before typing. 226 | delay_ms (int): Delay between keystrokes in milliseconds. 227 | parse_newlines (bool): If True, parse \n as Enter key presses. 228 | shift_enter (bool): If True, use Shift+Enter instead of Enter (for chat apps). 229 | 230 | Returns: 231 | bool: True if typing succeeded, False otherwise. 232 | """ 233 | try: 234 | element = await tab.select(selector) 235 | if not element: 236 | raise Exception(f"Element not found: {selector}") 237 | 238 | await element.focus() 239 | await asyncio.sleep(0.1) 240 | 241 | if clear_first: 242 | try: 243 | await element.apply("(elem) => { elem.value = ''; }") 244 | except: 245 | await element.send_keys('\ue009' + 'a') 246 | await element.send_keys('\ue017') 247 | await asyncio.sleep(0.1) 248 | 249 | if parse_newlines: 250 | from nodriver import cdp 251 | lines = text.split('\n') 252 | for i, line in enumerate(lines): 253 | for char in line: 254 | await element.send_keys(char) 255 | await asyncio.sleep(delay_ms / 1000) 256 | 257 | if i < len(lines) - 1: 258 | if shift_enter: 259 | await element.apply('''(elem) => { 260 | const start = elem.selectionStart; 261 | const end = elem.selectionEnd; 262 | const value = elem.value; 263 | elem.value = value.substring(0, start) + '\\n' + value.substring(end); 264 | elem.selectionStart = elem.selectionEnd = start + 1; 265 | 266 | elem.dispatchEvent(new KeyboardEvent('keydown', { 267 | key: 'Enter', 268 | code: 'Enter', 269 | shiftKey: true, 270 | bubbles: true 271 | })); 272 | elem.dispatchEvent(new Event('input', { bubbles: true })); 273 | }''') 274 | else: 275 | await element.apply('''(elem) => { 276 | const start = elem.selectionStart; 277 | const end = elem.selectionEnd; 278 | const value = elem.value; 279 | elem.value = value.substring(0, start) + '\\n' + value.substring(end); 280 | elem.selectionStart = elem.selectionEnd = start + 1; 281 | 282 | elem.dispatchEvent(new KeyboardEvent('keydown', { 283 | key: 'Enter', 284 | code: 'Enter', 285 | bubbles: true 286 | })); 287 | elem.dispatchEvent(new Event('input', { bubbles: true })); 288 | }''') 289 | await asyncio.sleep(delay_ms / 1000) 290 | else: 291 | for char in text: 292 | await element.send_keys(char) 293 | await asyncio.sleep(delay_ms / 1000) 294 | 295 | return True 296 | 297 | except Exception as e: 298 | raise Exception(f"Failed to type text: {str(e)}") 299 | 300 | @staticmethod 301 | async def paste_text( 302 | tab: Tab, 303 | selector: str, 304 | text: str, 305 | clear_first: bool = True 306 | ) -> bool: 307 | """ 308 | Paste text instantly using nodriver's insert_text method. 309 | This is much faster than typing character by character. 310 | 311 | Args: 312 | tab (Tab): The browser tab object. 313 | selector (str): CSS selector for the input element. 314 | text (str): Text to paste. 315 | clear_first (bool): Clear input before pasting. 316 | 317 | Returns: 318 | bool: True if pasting succeeded, False otherwise. 319 | """ 320 | from nodriver import cdp 321 | 322 | try: 323 | element = await tab.select(selector) 324 | if not element: 325 | raise Exception(f"Element not found: {selector}") 326 | 327 | await element.focus() 328 | await asyncio.sleep(0.1) 329 | 330 | if clear_first: 331 | try: 332 | await element.apply("(elem) => { elem.value = ''; }") 333 | except: 334 | await tab.send(cdp.input_.dispatch_key_event( 335 | "rawKeyDown", 336 | modifiers=2, # Ctrl 337 | key="a", 338 | code="KeyA", 339 | windows_virtual_key_code=65 340 | )) 341 | await tab.send(cdp.input_.dispatch_key_event( 342 | "keyUp", 343 | modifiers=2, # Ctrl 344 | key="a", 345 | code="KeyA", 346 | windows_virtual_key_code=65 347 | )) 348 | await tab.send(cdp.input_.dispatch_key_event( 349 | "rawKeyDown", 350 | key="Delete", 351 | code="Delete", 352 | windows_virtual_key_code=46 353 | )) 354 | await tab.send(cdp.input_.dispatch_key_event( 355 | "keyUp", 356 | key="Delete", 357 | code="Delete", 358 | windows_virtual_key_code=46 359 | )) 360 | await asyncio.sleep(0.1) 361 | 362 | await tab.send(cdp.input_.insert_text(text)) 363 | 364 | return True 365 | 366 | except Exception as e: 367 | raise Exception(f"Failed to paste text: {str(e)}") 368 | 369 | @staticmethod 370 | async def select_option( 371 | tab: Tab, 372 | selector: str, 373 | value: Optional[str] = None, 374 | text: Optional[str] = None, 375 | index: Optional[int] = None 376 | ) -> bool: 377 | """ 378 | Select option from dropdown using nodriver's native methods. 379 | 380 | Args: 381 | tab (Tab): The browser tab object. 382 | selector (str): CSS selector for the select element. 383 | value (Optional[str]): Option value to select. 384 | text (Optional[str]): Option text to select. 385 | index (Optional[int]): Option index to select. 386 | 387 | Returns: 388 | bool: True if option selected, False otherwise. 389 | """ 390 | try: 391 | select_element = await tab.select(selector) 392 | if not select_element: 393 | raise Exception(f"Select element not found: {selector}") 394 | 395 | if text is not None: 396 | await select_element.send_keys(text) 397 | return True 398 | 399 | if value is not None: 400 | await tab.evaluate(f""" 401 | const select = document.querySelector('{selector}'); 402 | if (select) {{ 403 | select.value = '{value}'; 404 | select.dispatchEvent(new Event('change', {{bubbles: true}})); 405 | }} 406 | """) 407 | return True 408 | 409 | elif index is not None: 410 | await tab.evaluate(f""" 411 | const select = document.querySelector('{selector}'); 412 | if (select && {index} >= 0 && {index} < select.options.length) {{ 413 | select.selectedIndex = {index}; 414 | select.dispatchEvent(new Event('change', {{bubbles: true}})); 415 | }} 416 | """) 417 | return True 418 | 419 | raise Exception("No selection criteria provided (value, text, or index)") 420 | 421 | except Exception as e: 422 | raise Exception(f"Failed to select option: {str(e)}") 423 | 424 | @staticmethod 425 | async def get_element_state( 426 | tab: Tab, 427 | selector: str 428 | ) -> Dict[str, Any]: 429 | """ 430 | Get complete state of an element. 431 | 432 | Args: 433 | tab (Tab): The browser tab object. 434 | selector (str): CSS selector for the element. 435 | 436 | Returns: 437 | Dict[str, Any]: Dictionary of element state properties. 438 | """ 439 | try: 440 | element = await tab.select(selector) 441 | if not element: 442 | raise Exception(f"Element not found: {selector}") 443 | 444 | if hasattr(element, 'update'): 445 | await element.update() 446 | 447 | state = { 448 | 'tag_name': element.tag_name if hasattr(element, 'tag_name') else 'unknown', 449 | 'text': element.text if hasattr(element, 'text') else '', 450 | 'text_all': element.text_all if hasattr(element, 'text_all') else '', 451 | 'attributes': element.attrs if hasattr(element, 'attrs') else {}, 452 | 'is_visible': True, 453 | 'is_clickable': False, 454 | 'is_enabled': True, 455 | 'value': element.attrs.get('value') if hasattr(element, 'attrs') else None, 456 | 'href': element.attrs.get('href') if hasattr(element, 'attrs') else None, 457 | 'src': element.attrs.get('src') if hasattr(element, 'attrs') else None, 458 | 'class': element.attrs.get('class') if hasattr(element, 'attrs') else None, 459 | 'id': element.attrs.get('id') if hasattr(element, 'attrs') else None, 460 | 'position': await element.get_position() if hasattr(element, 'get_position') else None, 461 | 'computed_style': {}, 462 | 'children_count': len(element.children) if hasattr(element, 'children') and element.children else 0, 463 | 'parent_tag': None 464 | } 465 | 466 | return state 467 | 468 | except Exception as e: 469 | raise Exception(f"Failed to get element state: {str(e)}") 470 | 471 | @staticmethod 472 | async def wait_for_element( 473 | tab: Tab, 474 | selector: str, 475 | timeout: int = 30000, 476 | visible: bool = True, 477 | text_content: Optional[str] = None 478 | ) -> bool: 479 | """ 480 | Wait for element to appear and match conditions. 481 | 482 | Args: 483 | tab (Tab): The browser tab object. 484 | selector (str): CSS selector for the element. 485 | timeout (int): Timeout in milliseconds. 486 | visible (bool): Wait for element to be visible. 487 | text_content (Optional[str]): Wait for element to contain text. 488 | 489 | Returns: 490 | bool: True if element matches conditions, False otherwise. 491 | """ 492 | start_time = time.time() 493 | timeout_seconds = timeout / 1000 494 | 495 | while time.time() - start_time < timeout_seconds: 496 | try: 497 | element = await tab.select(selector) 498 | 499 | if element: 500 | if visible: 501 | try: 502 | is_visible = await element.apply( 503 | """(elem) => { 504 | var style = window.getComputedStyle(elem); 505 | return style.display !== 'none' && 506 | style.visibility !== 'hidden' && 507 | style.opacity !== '0'; 508 | }""" 509 | ) 510 | if not is_visible: 511 | await asyncio.sleep(0.5) 512 | continue 513 | except: 514 | pass 515 | 516 | if text_content: 517 | text = element.text_all 518 | if text_content not in text: 519 | await asyncio.sleep(0.5) 520 | continue 521 | 522 | return True 523 | 524 | except Exception: 525 | pass 526 | 527 | await asyncio.sleep(0.5) 528 | 529 | return False 530 | 531 | @staticmethod 532 | async def execute_script( 533 | tab: Tab, 534 | script: str, 535 | args: Optional[List[Any]] = None 536 | ) -> Any: 537 | """ 538 | Execute JavaScript in page context. 539 | 540 | Args: 541 | tab (Tab): The browser tab object. 542 | script (str): JavaScript code to execute. 543 | args (Optional[List[Any]]): Arguments for the script. 544 | 545 | Returns: 546 | Any: Result of script execution. 547 | """ 548 | try: 549 | if args: 550 | result = await tab.evaluate(f'(function() {{ {script} }})({",".join(map(str, args))})') 551 | else: 552 | result = await tab.evaluate(script) 553 | 554 | return result 555 | 556 | except Exception as e: 557 | raise Exception(f"Failed to execute script: {str(e)}") 558 | 559 | @staticmethod 560 | async def get_page_content( 561 | tab: Tab, 562 | include_frames: bool = False 563 | ) -> Dict[str, str]: 564 | """ 565 | Get page HTML and text content. 566 | 567 | Args: 568 | tab (Tab): The browser tab object. 569 | include_frames (bool): Include iframe contents. 570 | 571 | Returns: 572 | Dict[str, str]: Dictionary with page content. 573 | """ 574 | try: 575 | html = await tab.get_content() 576 | text = await tab.evaluate("document.body.innerText") 577 | 578 | content = { 579 | 'html': html, 580 | 'text': text, 581 | 'url': await tab.evaluate("window.location.href"), 582 | 'title': await tab.evaluate("document.title") 583 | } 584 | 585 | if include_frames: 586 | frames = [] 587 | iframe_elements = await tab.select_all('iframe') 588 | 589 | for i, iframe in enumerate(iframe_elements): 590 | try: 591 | src = iframe.attrs.get('src') if hasattr(iframe, 'attrs') else None 592 | if src: 593 | frames.append({ 594 | 'index': i, 595 | 'src': src, 596 | 'id': iframe.attrs.get('id') if hasattr(iframe, 'attrs') else None, 597 | 'name': iframe.attrs.get('name') if hasattr(iframe, 'attrs') else None 598 | }) 599 | except Exception: 600 | continue 601 | 602 | content['frames'] = frames 603 | 604 | return content 605 | 606 | except Exception as e: 607 | raise Exception(f"Failed to get page content: {str(e)}") 608 | 609 | @staticmethod 610 | async def scroll_page( 611 | tab: Tab, 612 | direction: str = "down", 613 | amount: int = 500, 614 | smooth: bool = True 615 | ) -> bool: 616 | """ 617 | Scroll the page in specified direction. 618 | 619 | Args: 620 | tab (Tab): The browser tab object. 621 | direction (str): Direction to scroll ('down', 'up', 'right', 'left', 'top', 'bottom'). 622 | amount (int): Amount to scroll in pixels. 623 | smooth (bool): Use smooth scrolling. 624 | 625 | Returns: 626 | bool: True if scroll succeeded, False otherwise. 627 | """ 628 | try: 629 | if direction == "down": 630 | script = f"window.scrollBy(0, {amount})" 631 | elif direction == "up": 632 | script = f"window.scrollBy(0, -{amount})" 633 | elif direction == "right": 634 | script = f"window.scrollBy({amount}, 0)" 635 | elif direction == "left": 636 | script = f"window.scrollBy(-{amount}, 0)" 637 | elif direction == "top": 638 | script = "window.scrollTo(0, 0)" 639 | elif direction == "bottom": 640 | script = "window.scrollTo(0, document.body.scrollHeight)" 641 | else: 642 | raise ValueError(f"Invalid scroll direction: {direction}") 643 | 644 | if smooth: 645 | script = script.replace("scrollBy", "scrollBy({behavior: 'smooth'}, ") 646 | script = script.replace("scrollTo", "scrollTo({behavior: 'smooth', top: ") 647 | if "scrollTo" in script: 648 | script = script.replace(")", "})") 649 | 650 | await tab.evaluate(script) 651 | await asyncio.sleep(0.5 if smooth else 0.1) 652 | 653 | return True 654 | 655 | except Exception as e: 656 | raise Exception(f"Failed to scroll page: {str(e)}") ```