This is page 2 of 3. Use http://codebase.md/ilikepizza2/qa-mcp?page={x} to view the full context.
# Directory Structure
```
├── .env.example
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── main.py
├── mcp_server.py
├── README.md
├── requirements.txt
├── src
│ ├── __init__.py
│ ├── agents
│ │ ├── __init__.py
│ │ ├── auth_agent.py
│ │ ├── crawler_agent.py
│ │ ├── js_utils
│ │ │ └── xpathgenerator.js
│ │ └── recorder_agent.py
│ ├── browser
│ │ ├── __init__.py
│ │ ├── browser_controller.py
│ │ └── panel
│ │ └── panel.py
│ ├── core
│ │ ├── __init__.py
│ │ └── task_manager.py
│ ├── dom
│ │ ├── buildDomTree.js
│ │ ├── history
│ │ │ ├── service.py
│ │ │ └── view.py
│ │ ├── service.py
│ │ └── views.py
│ ├── execution
│ │ ├── __init__.py
│ │ └── executor.py
│ ├── llm
│ │ ├── __init__.py
│ │ ├── clients
│ │ │ ├── azure_openai_client.py
│ │ │ ├── gemini_client.py
│ │ │ └── openai_client.py
│ │ └── llm_client.py
│ ├── security
│ │ ├── __init__.py
│ │ ├── nuclei_scanner.py
│ │ ├── semgrep_scanner.py
│ │ ├── utils.py
│ │ └── zap_scanner.py
│ └── utils
│ ├── __init__.py
│ ├── image_utils.py
│ └── utils.py
└── test_schema.md
```
# Files
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
```python
# main.py
import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '.')))
import time
import json
import argparse
from src.agents.recorder_agent import WebAgent
from src.agents.crawler_agent import CrawlerAgent
from src.llm.llm_client import LLMClient
from src.execution.executor import TestExecutor
from src.utils.utils import load_api_key, load_api_version, load_api_base_url, load_llm_model
from src.agents.auth_agent import record_selectors_and_save_auth_state
from src.security.utils import save_report
from src.security.semgrep_scanner import run_semgrep
import logging
import warnings
if __name__ == "__main__":
# Configure logging (DEBUG for detailed logs, INFO for less)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
# logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
# Suppress noisy logs from specific libraries if needed
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("playwright").setLevel(logging.INFO) # Show Playwright info but not debug
logger = logging.getLogger(__name__) # Logger for main script
# --- Argument Parser ---
parser = argparse.ArgumentParser(description="AI Web Testing Agent - Recorder & Executor")
parser.add_argument(
'--mode',
choices=['record', 'execute','auth' ,'discover', 'security'],
required=True,
help="Mode to run the agent in: 'record' (interactive AI-assisted recording) or 'execute' (deterministic playback)."
)
parser.add_argument(
'--file',
type=str,
help="Path to the JSON test file (required for 'execute' mode)."
)
parser.add_argument(
'--headless',
action='store_true', # Makes it a flag, default False
help="Run executor in headless mode (only applies to 'execute'/discover mode)."
)
parser.add_argument(
'--url', # <<< Added URL argument for discover mode
type=str,
help="Starting URL for website crawling/security (required for 'discover' and 'security' mode)."
)
parser.add_argument(
'--max-pages', # <<< Added max pages argument for discover mode
type=int,
default=10,
help="Maximum number of pages to crawl in 'discover' mode (default: 10)."
)
parser.add_argument(
'--automated',
action='store_true', # Use action='store_true' for boolean flags
help="Run recorder in automated mode (AI makes decisions without user prompts). Only applies to 'record' mode." # Clarified help text
)
parser.add_argument(
'--enable-healing',
action='store_true',
help="Enable self-healing during execution ('execute' mode only)."
)
parser.add_argument(
'--healing-mode',
choices=['soft', 'hard'],
default='soft',
help="Self-healing mode: 'soft' (fix selector) or 'hard' (re-record) ('execute' mode only)."
)
parser.add_argument("--code-path", help="Path to the codebase directory for Semgrep scan (optional).")
parser.add_argument("--output-dir", default="results", help="Directory to save scan reports.")
parser.add_argument('--provider', choices=['gemini', 'openai', 'azure'], default='gemini', help="LLM provider (default: gemini). Choose openai for any OpenAI compatible LLMs.")
parser.add_argument("--semgrep-config", default="auto", help="Semgrep config/ruleset (e.g., 'p/ci', 'r/python'). Default is 'auto'.")
parser.add_argument("--semgrep-timeout", type=int, default=600, help="Semgrep scan timeout in seconds.")
args = parser.parse_args()
# Validate arguments based on mode
if args.mode == 'execute':
if not args.file:
parser.error("--file is required when --mode is 'execute'")
if not args.enable_healing and args.healing_mode != 'soft':
logger.warning("--healing-mode is ignored when --enable-healing is not set.")
elif args.mode == 'record':
if args.enable_healing:
logger.warning("--enable-healing and --healing-mode are ignored in 'record' mode.")
elif args.mode == 'discover':
if not args.url:
parser.error("--url is required when --mode is 'discover'")
if args.enable_healing:
logger.warning("--enable-healing and --healing-mode are ignored in 'discover' mode.")
# --- End Argument Parser ---
# --- Security Warning ---
if args.mode == 'record': # Show warning mainly for recording
warnings.warn(
"SECURITY WARNING: You are about to run an AI agent that interacts with the web based on "
"LLM instructions for recording test steps. Ensure the target environment is safe.",
UserWarning
)
print("\n" + "*"*70)
print("!!! AI WEB TESTING AGENT - RECORDER MODE !!!")
print("This agent interacts with websites to record automated tests.")
print(">> Ensure you target the correct environment (e.g., staging).")
print(">> Avoid recording actions involving highly sensitive production data.")
print(">> You will be prompted to confirm or override AI suggestions.")
print("Proceed with caution.")
print("*"*70 + "\n")
# --- End Security Warning ---
try:
# --- Configuration ---
api_key = load_api_key()
endpoint = load_api_base_url()
api_version = load_api_version()
model_name = load_llm_model()
if not os.path.exists("output"):
try:
os.makedirs("output")
logger.info("Created 'output' directory for screenshots and evidence.")
except OSError as e:
logger.warning(f"Could not create 'output' directory: {e}. Saving evidence/screenshots might fail.")
if args.mode == 'record':
logger.info("Starting in RECORD mode...")
HEADLESS_BROWSER = False # Recording MUST be non-headless
MAX_TEST_ITERATIONS = 50 # Allow more steps for recording complex flows
MAX_HISTORY_FOR_LLM = 10
MAX_STEP_RETRIES = 1 # Retries during recording are for AI suggestion refinement
print("Running in interactive RECORD mode (Browser window is required).")
# --- Initialize Components ---
llm_client = LLMClient(provider=args.provider)
automated = False
if args.automated == True:
automated = True
recorder_agent = WebAgent(
llm_client=llm_client,
headless=HEADLESS_BROWSER, # Must be False
max_iterations=MAX_TEST_ITERATIONS,
max_history_length=MAX_HISTORY_FOR_LLM,
max_retries_per_subtask=MAX_STEP_RETRIES,
is_recorder_mode=True, # Add a flag to agent
automated_mode=automated
)
# --- Get Feature Description ---
print("\nEnter the feature or user flow you want to test.")
print("Examples:")
print("- go to https://practicetestautomation.com/practice-test-login/ and login with username as student and password as Password123 and verify if the login was successful")
print("- Navigate to 'https://example-shop.com', search for 'blue widget', add the first result to the cart, and verify the cart item count increases to 1 (selector: 'span#cart-count').")
print("- On 'https://form-page.com', fill the 'email' field with '[email protected]', check the 'terms' checkbox (id='terms-cb'), click submit, and verify the success message 'Form submitted!' is shown in 'div.status'.")
feature_description = input("\nPlease enter the test case description: ")
# --- Run the Test ---
if feature_description:
# The run method now handles the recording loop
recording_result = recorder_agent.record(feature_description) # Changed method name
print("\n" + "="*20 + " Recording Result " + "="*20)
if recording_result.get("success"):
print(f"Status: SUCCESS")
print(f"Recording saved to: {recording_result.get('output_file')}")
print(f"Total steps recorded: {recording_result.get('steps_recorded')}")
else:
print(f"Status: FAILED or ABORTED")
print(f"Message: {recording_result.get('message')}")
print("="*58)
else:
print("No test case description entered. Exiting.")
elif args.mode == 'execute':
logger.info(f"Starting in EXECUTE mode for file: {args.file}")
HEADLESS_BROWSER = args.headless # Use flag for executor headless
PIXEL_MISMATCH_THRESHOLD = 0.01
heal_msg = f"Self-Healing: {'ENABLED (' + args.healing_mode + ' mode)' if args.enable_healing else 'DISABLED'}"
print(f"Running in EXECUTE mode ({'Headless' if args.headless else 'Visible Browser'}). {heal_msg}")
llm_client = LLMClient(provider=args.provider)
# Executor doesn't need LLM client directly
executor = TestExecutor(
llm_client=llm_client, # Pass the initialized client
headless=args.headless,
enable_healing=args.enable_healing,
healing_mode=args.healing_mode,
pixel_threshold=PIXEL_MISMATCH_THRESHOLD,
get_performance=True,
get_network_requests=True
# healing_retries can be added as arg if needed
)
test_result = executor.run_test(args.file)
# --- Display Test Execution Results ---
print("\n" + "="*20 + " Execution Result " + "="*20)
print(f"Test File: {test_result.get('test_file', 'N/A')}")
print(f"Status: {test_result.get('status', 'UNKNOWN')}")
print(f"Duration: {test_result.get('duration_seconds', 'N/A')} seconds")
print(f"Message: {test_result.get('message', 'N/A')}")
print(f"Healing: {'ENABLED ('+test_result.get('healing_mode','N/A')+' mode)' if test_result.get('healing_enabled') else 'DISABLED'}")
perf_timing = test_result.get("performance_timing")
if perf_timing:
try:
nav_start = perf_timing.get('navigationStart', 0)
load_end = perf_timing.get('loadEventEnd', 0)
dom_content_loaded = perf_timing.get('domContentLoadedEventEnd', 0)
dom_interactive = perf_timing.get('domInteractive', 0)
if nav_start > 0: # Ensure navigationStart is valid
print("\n--- Performance Metrics (Initial Load) ---")
if load_end > nav_start: print(f" Page Load Time (loadEventEnd): {(load_end - nav_start):,}ms")
if dom_content_loaded > nav_start: print(f" DOM Content Loaded (domContentLoadedEventEnd): {(dom_content_loaded - nav_start):,}ms")
if dom_interactive > nav_start: print(f" DOM Interactive: {(dom_interactive - nav_start):,}ms")
print("-" * 20)
else:
print("\n--- Performance Metrics (Initial Load): navigationStart not captured ---")
except Exception as perf_err:
logger.warning(f"Could not process performance timing: {perf_err}")
print("\n--- Performance Metrics: Error processing data ---")
# ------------------------------------
# --- Network Request Summary ---
network_reqs = test_result.get("network_requests", [])
if network_reqs:
print("\n--- Network Summary ---")
total_reqs = len(network_reqs)
http_error_reqs = len([r for r in network_reqs if (r.get('status', 0) or 0) >= 400])
error_reqs = len([r for r in network_reqs if (r.get('status', 0) or 0) >= 400])
slow_reqs = len([r for r in network_reqs if (r.get('duration_ms') or 0) > 1500]) # Example: > 1.5s
print(f" Total Requests: {total_reqs}")
if http_error_reqs > 0: print(f" Requests >= 400 Status: {http_error_reqs}")
if error_reqs > 0: print(f" Requests >= 400 Status: {error_reqs}")
if slow_reqs > 0: print(f" Requests > 1500ms: {slow_reqs}")
print("(See JSON report for full network details)")
print("-" * 20)
visual_results = test_result.get("visual_assertion_results", [])
if visual_results:
print("\n--- Visual Assertion Results ---")
for vr in visual_results:
status = vr.get('status', 'UNKNOWN')
override = " (LLM Override)" if vr.get('llm_override') else ""
diff_percent = vr.get('pixel_difference_ratio', 0) * 100
thresh_percent = vr.get('pixel_threshold', PIXEL_MISMATCH_THRESHOLD) * 100 # Use executor's default if needed
print(f"- Step {vr.get('step_id')}, Baseline '{vr.get('baseline_id')}': {status}{override}")
print(f" Pixel Difference: {diff_percent:.4f}% (Threshold: {thresh_percent:.2f}%)")
if status == 'FAIL':
if vr.get('diff_image_path'):
print(f" Diff Image: {vr.get('diff_image_path')}")
if vr.get('llm_reasoning'):
print(f" LLM Reasoning: {vr.get('llm_reasoning')}")
elif vr.get('llm_override'): # Passed due to LLM
if vr.get('llm_reasoning'):
print(f" LLM Reasoning: {vr.get('llm_reasoning')}")
print("-" * 20)
# Display Healing Attempts Log
healing_attempts = test_result.get("healing_attempts", [])
if healing_attempts:
print("\n--- Healing Attempts ---")
for attempt in healing_attempts:
outcome = "SUCCESS" if attempt.get('success') else "FAIL"
mode = attempt.get('mode', 'N/A')
print(f"- Step {attempt.get('step_id')}: Attempt {attempt.get('attempt')} ({mode} mode) - {outcome}")
if outcome == "SUCCESS" and mode == "soft":
print(f" Old Selector: {attempt.get('failed_selector')}")
print(f" New Selector: {attempt.get('new_selector')}")
print(f" Reasoning: {attempt.get('reasoning', 'N/A')[:100]}...")
elif outcome == "FAIL" and mode == "soft":
print(f" Failed Selector: {attempt.get('failed_selector')}")
print(f" Reasoning: {attempt.get('reasoning', 'N/A')[:100]}...")
elif mode == "hard":
print(f" Triggered re-recording due to error: {attempt.get('error', 'N/A')[:100]}...")
print("-" * 20)
if test_result.get('status') == 'FAIL':
print("-" * 15 + " Failure Details " + "-" * 15)
failed_step_info = test_result.get('failed_step', {})
print(f"Failed Step ID: {failed_step_info.get('step_id', 'N/A')}")
print(f"Failed Step Description: {failed_step_info.get('description', 'N/A')}")
print(f"Action: {failed_step_info.get('action', 'N/A')}")
# Show the *last* selector tried if healing was attempted
last_selector_tried = failed_step_info.get('selector') # Default to original
last_failed_healing_attempt = next((a for a in reversed(healing_attempts) if a.get('step_id') == failed_step_info.get('step_id') and not a.get('success')), None)
if last_failed_healing_attempt:
last_selector_tried = last_failed_healing_attempt.get('failed_selector')
print(f"Selector Used (Last Attempt): {last_selector_tried or 'N/A'}")
print(f"Error: {test_result.get('error_details', 'N/A')}")
if test_result.get('screenshot_on_failure'):
print(f"Failure Screenshot: {test_result.get('screenshot_on_failure')}")
# (Console message display remains the same)
console_msgs = test_result.get("console_messages_on_failure", [])
if console_msgs:
print("\n--- Console Errors/Warnings (Recent): ---")
for msg in console_msgs:
msg_text = str(msg.get('text',''))
print(f"- [{msg.get('type','UNKNOWN').upper()}] {msg_text[:250]}{'...' if len(msg_text) > 250 else ''}")
total_err_warn = len([m for m in test_result.get("all_console_messages", []) if m.get('type') in ['error', 'warning']])
if total_err_warn > len(console_msgs):
print(f"... (Showing last {len(console_msgs)} of {total_err_warn} total errors/warnings. See JSON report for full logs)")
else:
print("\n--- No relevant console errors/warnings captured on failure. ---")
elif test_result.get('status') == 'PASS':
print(f"Steps Executed: {test_result.get('steps_executed', 'N/A')}")
elif test_result.get('status') == 'HEALING_TRIGGERED':
print(f"\nNOTICE: Hard Healing (re-recording) was triggered.")
print(f"The original execution stopped at Step {test_result.get('failed_step', {}).get('step_id', 'N/A')}.")
print(f"Check logs for the status and output file of the re-recording process.")
print("="*58)
# --- Save Full Execution Results to JSON ---
try:
base_name = os.path.splitext(os.path.basename(args.file))[0]
result_filename = os.path.join("output", f"execution_result_{base_name}_{time.strftime('%Y%m%d_%H%M%S')}.json")
with open(result_filename, 'w', encoding='utf-8') as f:
json.dump(test_result, f, indent=2, ensure_ascii=False)
print(f"\nFull execution result details saved to: {result_filename}")
except Exception as save_err:
logger.error(f"Failed to save full execution result JSON: {save_err}")
elif args.mode == 'discover':
warnings.warn(
"SECURITY WARNING: You are about to run an AI agent that interacts with the web based on "
"LLM instructions or crawling logic. Ensure the target environment is safe.",
UserWarning
)
print("!!! AI WEB TESTING AGENT - DISCOVERY MODE !!!")
print("This agent will crawl the website starting from the provided URL.")
print(">> It will analyze pages and ask an LLM for test step ideas.")
print(">> Ensure you have permission to crawl the target website.")
print(f">> Crawling will be limited to the domain of '{args.url}' and max {args.max_pages} pages.")
print("Proceed with caution.")
print("*"*70 + "\n")
logger.info(f"Starting in DISCOVER mode for URL: {args.url}")
HEADLESS_BROWSER = args.headless # Use the general headless flag
print(f"Running in DISCOVER mode ({'Headless' if HEADLESS_BROWSER else 'Visible Browser'}).")
print(f"Starting URL: {args.url}")
print(f"Max pages to crawl: {args.max_pages}")
# Initialize Components
llm_client = LLMClient(provider=args.provider)
crawler = CrawlerAgent(
llm_client=llm_client,
headless=HEADLESS_BROWSER
)
# Run Discovery
discovery_result = crawler.crawl_and_suggest(args.url, args.max_pages)
# Display Discovery Results
print("\n" + "="*20 + " Discovery Result " + "="*20)
print(f"Status: {'SUCCESS' if discovery_result.get('success') else 'FAILED'}")
print(f"Message: {discovery_result.get('message', 'N/A')}")
print(f"Start URL: {discovery_result.get('start_url', 'N/A')}")
print(f"Base Domain: {discovery_result.get('base_domain', 'N/A')}")
print(f"Pages Visited: {discovery_result.get('pages_visited', 0)}")
discovered_steps_map = discovery_result.get('discovered_steps', {})
print(f"Pages with Suggested Steps: {len(discovered_steps_map)}")
print("-" * 58)
if discovered_steps_map:
print("\n--- Suggested Test Steps per Page ---")
for page_url, steps in discovered_steps_map.items():
print(f"\n[Page: {page_url}]")
if steps:
for i, step_desc in enumerate(steps):
print(f" {i+1}. {step_desc}")
else:
print(" (No specific steps suggested by LLM for this page)")
else:
print("\nNo test step suggestions were generated.")
print("="*58)
# Save Full Discovery Results to JSON
if discovery_result.get('success'): # Only save if crawl succeeded somewhat
try:
# Generate a filename based on the domain
domain = discovery_result.get('base_domain', 'unknown_domain')
# Sanitize domain for filename
safe_domain = "".join(c if c.isalnum() else "_" for c in domain)
result_filename = os.path.join("output", f"discovery_results_{safe_domain}_{time.strftime('%Y%m%d_%H%M%S')}.json")
with open(result_filename, 'w', encoding='utf-8') as f:
json.dump(discovery_result, f, indent=2, ensure_ascii=False)
print(f"\nFull discovery result details saved to: {result_filename}")
except Exception as save_err:
logger.error(f"Failed to save full discovery result JSON: {save_err}")
elif args.mode == 'auth':
# Ensure output directory exists
os.makedirs("output", exist_ok=True)
# --- IMPORTANT: Initialize your LLM Client here ---
# Replace with your actual LLM provider and initialization
try:
# Example using Gemini (replace with your actual setup)
# Ensure GOOGLE_API_KEY is set as an environment variable if using GeminiClient defaults
logger.info(f"Using LLM Provider: {args.provider}")
llm = LLMClient(provider=args.provider)
logger.info("LLM Client initialized.")
except ValueError as e:
logger.error(f"❌ Failed to initialize LLM Client: {e}. Cannot proceed.")
llm = None
except Exception as e:
logger.error(f"❌ An unexpected error occurred initializing LLM Client: {e}. Cannot proceed.", exc_info=True)
llm = None
# ------------------------------------------------
if llm:
success = record_selectors_and_save_auth_state(llm, args.url, args.file)
if success:
print(f"\n--- Authentication state generation completed successfully. ---")
else:
print(f"\n--- Authentication state generation failed. Check logs and screenshots in 'output/'. ---")
else:
print("\n--- Could not initialize LLM Client. Aborting authentication state generation. ---")
elif args.mode == 'security':
logging.info("--- Starting Phase 1: Security Scanning ---")
all_findings = []
# 1. Run ZAP Scan
# logging.info("--- Running ZAP Scan ---")
# if not args.zap_api_key:
# logging.warning("ZAP API key not provided. ZAP scan might fail if API key is required.")
# zap_findings = run_zap_scan(
# target_url=args.url,
# zap_address=args.zap_address,
# zap_api_key=args.zap_api_key,
# spider_timeout=args.zap_spider_timeout,
# scan_timeout=args.zap_scan_timeout
# )
# if zap_findings:
# logging.info(f"Completed ZAP Scan. Found {len(zap_findings)} alerts.")
# all_findings.extend(zap_findings)
# save_report(zap_findings, "zap", args.output_dir, "scan_results")
# else:
# logging.warning("ZAP scan completed with no findings or failed.")
# 2. Run Nuclei Scan
# logging.info("--- Running Nuclei Scan ---")
# nuclei_findings = run_nuclei(
# target_url=args.url,
# templates=args.nuclei_templates,
# output_dir=args.output_dir,
# timeout=args.nuclei_timeout
# )
# if nuclei_findings:
# logging.info(f"Completed Nuclei Scan. Found {len(nuclei_findings)} potential issues.")
# all_findings.extend(nuclei_findings)
# # Nuclei output was already saved by the function, but we can save the parsed list again if needed
# # save_report(nuclei_findings, "nuclei", args.output_dir, "scan_results_parsed")
# else:
# logging.warning("Nuclei scan completed with no findings or failed.")
# 3. Run Semgrep Scan (if code path provided)
# 3. Run Semgrep Scan (if code path provided)
if args.code_path:
logging.info("--- Running Semgrep Scan ---")
semgrep_findings = run_semgrep(
code_path=args.code_path,
config=args.semgrep_config,
output_dir=args.output_dir,
timeout=args.semgrep_timeout
)
if semgrep_findings:
logging.info(f"Completed Semgrep Scan. Found {len(semgrep_findings)} potential issues.")
all_findings.extend(semgrep_findings)
# Semgrep output was already saved, save parsed list if desired
# save_report(semgrep_findings, "semgrep", args.output_dir, "scan_results_parsed")
else:
logging.warning("Semgrep scan completed with no findings or failed.")
else:
logging.info("Skipping Semgrep scan as --code-path was not provided.")
logging.info("--- Phase 1: Security Scanning Complete ---")
logging.info("--- Starting Phase 2: Consolidating Results ---")
logging.info(f"Total findings aggregated from all tools (future): {len(all_findings)}")
# Save the consolidated report
consolidated_report_path = save_report(all_findings, "consolidated", args.output_dir, "consolidated_scan_results")
if consolidated_report_path:
logging.info(f"Consolidated report saved to: {consolidated_report_path}")
print(f"\nConsolidated report saved to: {consolidated_report_path}") # Also print to stdout
else:
logging.error("Failed to save the consolidated report.")
logging.info("--- Phase 2: Consolidation Complete ---")
logging.info("--- Security Automation Script Finished ---")
except ValueError as e:
logger.error(f"Configuration or Input error: {e}")
print(f"Error: {e}")
except ImportError as e:
logger.error(f"Import error: {e}. Make sure requirements are installed and paths correct.")
print(f"Import Error: {e}. Please check installation.")
except Exception as e:
logger.critical(f"An unexpected error occurred in main: {e}", exc_info=True)
print(f"An critical unexpected error occurred: {e}")
```
--------------------------------------------------------------------------------
/src/browser/panel/panel.py:
--------------------------------------------------------------------------------
```python
# /src/browser/panel/panel.py
import threading
import logging
from typing import Optional, Dict, Any, List
from patchright.sync_api import sync_playwright, Page, Browser, Playwright, TimeoutError as PlaywrightTimeoutError
logger = logging.getLogger(__name__)
RECORDER_PANEL_JS = """
() => {
const PANEL_ID = 'bw-recorder-panel';
const INPUT_ID = 'bw-recorder-param-input'; // Used for general param input now
const PARAM_BTN_ID = 'bw-recorder-param-button'; // Button next to general param input
const PARAM_CONT_ID = 'bw-recorder-param-container'; // Container for single param input
const ASSERT_PARAM_INPUT1_ID = 'bw-assert-param1';
const ASSERT_PARAM_INPUT2_ID = 'bw-assert-param2';
const ASSERT_PARAM_CONT_ID = 'bw-assert-param-container'; // Container for assertion-specific params
// --- Function to create or get the panel ---
function getOrCreatePanel() {
let panel = document.getElementById(PANEL_ID);
if (!panel) {
panel = document.createElement('div');
panel.id = PANEL_ID;
// Basic Styling (customize as needed)
Object.assign(panel.style, {
position: 'fixed',
bottom: '10px',
right: '10px',
padding: '10px',
background: 'rgba(40, 40, 40, 0.9)',
color: 'white',
border: '1px solid #ccc',
borderRadius: '5px',
zIndex: '2147483647', // Max z-index
fontFamily: 'sans-serif',
fontSize: '12px',
boxShadow: '0 2px 5px rgba(0,0,0,0.3)',
display: 'none', // Initially hidden
pointerEvents: 'none'
});
document.body.appendChild(panel);
}
return panel;
}
// --- Helper to Set Button Listeners ---
// (choiceValue is what window._recorder_user_choice will be set to)
function setChoiceOnClick(buttonId, choiceValue) {
const btn = document.getElementById(buttonId);
if (btn) {
btn.onclick = () => { window._recorder_user_choice = choiceValue; };
} else {
console.warn(`[Recorder Panel] Button with ID ${buttonId} not found for listener.`);
}
}
// State 1: Confirm/Override Assertion Target
window._recorder_showAssertionTargetPanel = (plannedDesc, suggestedSelector) => {
const panel = getOrCreatePanel();
const selectorDisplay = suggestedSelector ? `<code>${suggestedSelector.substring(0, 100)}...</code>` : '<i>AI could not suggest a target.</i>';
panel.innerHTML = `
<div style="margin-bottom: 5px; font-weight: bold; pointer-events: auto;">Define Assertion:</div>
<div style="margin-bottom: 8px; max-width: 300px; word-wrap: break-word; pointer-events: auto;">${plannedDesc}</div>
<div style="margin-bottom: 5px; font-style: italic; pointer-events: auto;">Suggested Target Selector: ${selectorDisplay}</div>
<button id="bw-assert-confirm-target" style="margin: 2px; padding: 3px 6px; pointer-events: auto;" ${!suggestedSelector ? 'disabled' : ''}>Use Suggested</button>
<button id="bw-assert-override-target" style="margin: 2px; padding: 3px 6px; pointer-events: auto;">Click New Target</button>
<button id="bw-assert-skip" style="margin: 2px; padding: 3px 6px; pointer-events: auto;">Skip Assertion</button>
<button id="bw-abort-btn" style="margin: 2px; padding: 3px 6px; background-color: #d9534f; color: white; border: none; pointer-events: auto;">Abort</button>
`;
window._recorder_user_choice = undefined; // Reset choice
setChoiceOnClick('bw-assert-confirm-target', 'confirm_target');
setChoiceOnClick('bw-assert-override-target', 'override_target');
setChoiceOnClick('bw-assert-skip', 'skip');
setChoiceOnClick('bw-abort-btn', 'abort');
panel.style.display = 'block';
console.log('[Recorder Panel] Assertion Target Panel Shown.');
};
// State 2: Select Assertion Type
window._recorder_showAssertionTypePanel = (targetSelector) => {
const panel = getOrCreatePanel();
panel.innerHTML = `
<div style="margin-bottom: 5px; font-weight: bold; pointer-events: auto;">Select Assertion Type:</div>
<div style="margin-bottom: 8px; font-size: 11px; pointer-events: auto;">Target: <code>${targetSelector.substring(0, 100)}...</code></div>
<div style="display: flex; flex-wrap: wrap; gap: 5px; pointer-events: auto;">
<button id="type-contains" style="padding: 3px 6px; pointer-events: auto;">Text Contains</button>
<button id="type-equals" style="padding: 3px 6px; pointer-events: auto;">Text Equals</button>
<button id="type-visible" style="padding: 3px 6px; pointer-events: auto;">Is Visible</button>
<button id="type-hidden" style="padding: 3px 6px; pointer-events: auto;">Is Hidden</button>
<button id="type-attr" style="padding: 3px 6px; pointer-events: auto;">Attribute Equals</button>
<button id="type-count" style="padding: 3px 6px; pointer-events: auto;">Element Count</button>
<button id="type-checked" style="padding: 3px 6px; pointer-events: auto;">Is Checked</button>
<button id="type-not-checked" style="padding: 3px 6px; pointer-events: auto;">Not Checked</button>
</div>
<hr style="margin: 8px 0; border-top: 1px solid #555;">
<button id="bw-assert-back-target" style="margin-right: 5px; padding: 3px 6px; pointer-events: auto;">< Back (Target)</button>
<button id="bw-assert-skip" style="margin-right: 5px; padding: 3px 6px; pointer-events: auto;">Skip Assertion</button>
<button id="bw-abort-btn" style="padding: 3px 6px; background-color: #d9534f; color: white; border: none; pointer-events: auto;">Abort</button>
`;
window._recorder_user_choice = undefined; // Reset choice
// Set listeners for type selection
setChoiceOnClick('type-contains', 'select_type_text_contains');
setChoiceOnClick('type-equals', 'select_type_text_equals');
setChoiceOnClick('type-visible', 'select_type_visible');
setChoiceOnClick('type-hidden', 'select_type_hidden');
setChoiceOnClick('type-attr', 'select_type_attribute_equals');
setChoiceOnClick('type-count', 'select_type_element_count');
setChoiceOnClick('type-checked', 'select_type_checked');
setChoiceOnClick('type-not-checked', 'select_type_not_checked');
// Other controls
setChoiceOnClick('bw-assert-back-target', 'back_to_target');
setChoiceOnClick('bw-assert-skip', 'skip');
setChoiceOnClick('bw-abort-btn', 'abort');
panel.style.display = 'block';
console.log('[Recorder Panel] Assertion Type Panel Shown.');
};
// State 3: Enter Assertion Parameters
window._recorder_showAssertionParamsPanel = (targetSelector, assertionType, paramLabels) => {
// paramLabels is an array like ['Expected Text'] or ['Attribute Name', 'Expected Value'] or ['Expected Count']
const panel = getOrCreatePanel();
let inputHTML = '';
if (paramLabels.length === 1) {
inputHTML = `<label for="${ASSERT_PARAM_INPUT1_ID}" style="margin-right: 5px; pointer-events: auto;">${paramLabels[0]}:</label>
<input type="text" id="${ASSERT_PARAM_INPUT1_ID}" style="padding: 2px 4px; width: 180px; pointer-events: auto;">`;
} else if (paramLabels.length === 2) {
inputHTML = `<div style="margin-bottom: 3px;">
<label for="${ASSERT_PARAM_INPUT1_ID}" style="display: inline-block; width: 100px; pointer-events: auto;">${paramLabels[0]}:</label>
<input type="text" id="${ASSERT_PARAM_INPUT1_ID}" style="padding: 2px 4px; width: 120px; pointer-events: auto;">
</div>
<div>
<label for="${ASSERT_PARAM_INPUT2_ID}" style="display: inline-block; width: 100px; pointer-events: auto;">${paramLabels[1]}:</label>
<input type="text" id="${ASSERT_PARAM_INPUT2_ID}" style="padding: 2px 4px; width: 120px; pointer-events: auto;">
</div>`;
}
panel.innerHTML = `
<div style="margin-bottom: 5px; font-weight: bold; pointer-events: auto;">Enter Parameters:</div>
<div style="margin-bottom: 3px; font-size: 11px; pointer-events: auto;">Target: <code>${targetSelector.substring(0, 60)}...</code></div>
<div style="margin-bottom: 8px; font-size: 11px; pointer-events: auto;">Assertion: ${assertionType}</div>
<div id="${ASSERT_PARAM_CONT_ID}" style="margin-bottom: 8px; pointer-events: auto;">
${inputHTML}
</div>
<button id="bw-assert-record" style="margin-right: 5px; padding: 3px 6px; pointer-events: auto;">Record Assertion</button>
<button id="bw-assert-back-type" style="margin-right: 5px; padding: 3px 6px; pointer-events: auto;">< Back (Type)</button>
<button id="bw-abort-btn" style="padding: 3px 6px; background-color: #d9534f; color: white; border: none; pointer-events: auto;">Abort</button>
`;
window._recorder_user_choice = undefined; // Reset choice
setChoiceOnClick('bw-assert-record', 'submit_params');
setChoiceOnClick('bw-assert-back-type', 'back_to_type');
setChoiceOnClick('bw-abort-btn', 'abort');
panel.style.display = 'block';
// Auto-focus the first input if possible
const firstInput = document.getElementById(ASSERT_PARAM_INPUT1_ID);
if (firstInput) {
setTimeout(() => firstInput.focus(), 50); // Short delay
}
console.log('[Recorder Panel] Assertion Params Panel Shown.');
};
// State 4: Verification Review
window._recorder_showVerificationReviewPanel = (args) => {
const { plannedDesc, aiVerified, aiReasoning, assertionType, parameters, selector } = args;
const panel = getOrCreatePanel();
let detailsHTML = '';
let recordButtonDisabled = true; // Disable record button by default
// --- Build Details Section based on AI Result ---
if (aiVerified) {
// Check if we have enough info to actually record the assertion
const canRecord = assertionType && selector;
recordButtonDisabled = !canRecord;
detailsHTML += `<div style="margin-bottom: 3px; pointer-events: auto;">Assertion: <code>${assertionType || 'N/A'}</code></div>`;
detailsHTML += `<div style="margin-bottom: 3px; pointer-events: auto;">Selector: <code>${selector ? selector.substring(0, 100) + '...' : 'MISSING!'}</code></div>`;
// Safely format parameters (convert object to string)
let paramsString = 'None';
if (parameters && Object.keys(parameters).length > 0) {
try { paramsString = JSON.stringify(parameters); } catch(e){ paramsString = '{...}'; }
}
detailsHTML += `<div style="margin-bottom: 5px; pointer-events: auto;">Parameters: <code>${paramsString}</code></div>`;
if (!canRecord) {
detailsHTML += `<div style="color: #ffcc00; font-size: 11px; pointer-events: auto;">Warning: Cannot record assertion directly (missing type or selector from AI). Choose Manual or Skip.</div>`;
}
} else {
// Verification failed
detailsHTML += `<div style="color: #ffdddd; pointer-events: auto;">AI could not verify the condition.</div>`;
}
panel.innerHTML = `
<div style="margin-bottom: 5px; font-weight: bold; pointer-events: auto;">AI Verification Review:</div>
<div style="margin-bottom: 8px; max-width: 300px; word-wrap: break-word; pointer-events: auto;">${plannedDesc}</div>
<div style="margin-bottom: 5px; font-style: italic; color: ${aiVerified ? '#ccffcc' : '#ffdddd'}; pointer-events: auto;">
AI Result: ${aiVerified ? 'PASSED' : 'FAILED'}
</div>
<div style="margin-bottom: 8px; font-size: 11px; max-height: 60px; overflow-y: auto; border: 1px dashed #666; padding: 3px; pointer-events: auto;">
AI Reasoning: ${aiReasoning || 'N/A'}
</div>
${detailsHTML}
<hr style="margin: 8px 0; border-top: 1px solid #555;">
<button id="bw-verify-record" style="margin: 2px; padding: 3px 6px; pointer-events: auto;" ${recordButtonDisabled ? 'disabled title="Cannot record directly, missing info from AI"' : ''}>Record AI Assertion</button>
<button id="bw-verify-manual" style="margin: 2px; padding: 3px 6px; pointer-events: auto;">Define Manually</button>
<button id="bw-verify-skip" style="margin: 2px; padding: 3px 6px; pointer-events: auto;">Skip Step</button>
<button id="bw-abort-btn" style="margin: 2px; padding: 3px 6px; background-color: #d9534f; color: white; border: none; pointer-events: auto;">Abort</button>
<!-- Re-use existing parameterization container, initially hidden -->
<div id="${PARAM_CONT_ID}" style="margin-top: 8px; display: none; pointer-events: auto;">
<input type="text" id="${INPUT_ID}" placeholder="Parameter Name (optional)" style="padding: 2px 4px; width: 150px; margin-right: 5px; pointer-events: auto;">
<button id="${PARAM_BTN_ID}" style="padding: 3px 6px; pointer-events: auto;">Set Param & Record</button>
</div>
`;
window._recorder_user_choice = undefined; // Reset choice
window._recorder_parameter_name = undefined; // Reset param name
// Set listeners
setChoiceOnClick('bw-verify-record', 'record_ai');
setChoiceOnClick('bw-verify-manual', 'define_manual');
setChoiceOnClick('bw-verify-skip', 'skip');
setChoiceOnClick('bw-abort-btn', 'abort');
// Listener for the parameterization button (same as before)
const paramBtn = document.getElementById(PARAM_BTN_ID);
if (paramBtn) {
paramBtn.onclick = () => {
const inputVal = document.getElementById(INPUT_ID).value.trim();
window._recorder_parameter_name = inputVal ? inputVal : null;
window._recorder_user_choice = 'parameterized'; // Special choice
};
}
panel.style.display = 'block';
console.log('[Recorder Panel] Verification Review Panel Shown.');
};
// Function to retrieve assertion parameters
window._recorder_getAssertionParams = (count) => {
const params = {};
const input1 = document.getElementById(ASSERT_PARAM_INPUT1_ID);
if (input1) params.param1 = input1.value;
if (count > 1) {
const input2 = document.getElementById(ASSERT_PARAM_INPUT2_ID);
if (input2) params.param2 = input2.value;
}
console.log('[Recorder Panel] Retrieved assertion params:', params);
return params;
};
// --- Function to update panel content ---
window._recorder_showPanel = (stepDescription, suggestionText) => {
const panel = getOrCreatePanel();
panel.innerHTML = `
<div style="margin-bottom: 5px; font-weight: bold; pointer-events: auto;">Next Step:</div> <!-- Re-enable for text selection if needed -->
<div style="margin-bottom: 8px; max-width: 300px; word-wrap: break-word; pointer-events: auto;">${stepDescription}</div>
<div style="margin-bottom: 5px; font-style: italic; pointer-events: auto;">AI Suggests: ${suggestionText}</div>
<button id="bw-accept-btn" style="margin-right: 5px; padding: 3px 6px; pointer-events: auto;">Accept Suggestion</button> <!-- <<< Re-enable pointer events for buttons -->
<button id="bw-skip-btn" style="margin-right: 5px; padding: 3px 6px; pointer-events: auto;">Skip Step</button> <!-- <<< Re-enable pointer events for buttons -->
<button id="bw-abort-btn" style="padding: 3px 6px; background-color: #d9534f; color: white; border: none; pointer-events: auto;">Abort</button> <!-- <<< Re-enable pointer events for buttons -->
<div id="${PARAM_CONT_ID}" style="margin-top: 8px; display: none; pointer-events: auto;"> <!-- <<< Re-enable pointer events for container -->
<input type="text" id="${INPUT_ID}" placeholder="Parameter Name (optional)" style="padding: 2px 4px; width: 150px; margin-right: 5px; pointer-events: auto;"> <!-- <<< Re-enable pointer events for input -->
<button id="${PARAM_BTN_ID}" style="padding: 3px 6px; pointer-events: auto;">Set Param & Record</button> <!-- <<< Re-enable pointer events for buttons -->
</div>
`;
// --- Attach Button Listeners ---
// Reset choice flag before showing
window._recorder_user_choice = undefined;
window._recorder_parameter_name = undefined;
document.getElementById('bw-accept-btn').onclick = () => { window._recorder_user_choice = 'accept'; };
document.getElementById('bw-skip-btn').onclick = () => { window._recorder_user_choice = 'skip'; /* hidePanel(); */ }; // Optionally hide immediately
document.getElementById('bw-abort-btn').onclick = () => { window._recorder_user_choice = 'abort'; /* hidePanel(); */ };
document.getElementById(PARAM_BTN_ID).onclick = () => {
const inputVal = document.getElementById(INPUT_ID).value.trim();
window._recorder_parameter_name = inputVal ? inputVal : null; // Store null if empty
window._recorder_user_choice = 'parameterized'; // Special choice for parameterization submit
// Don't hide panel here, Python side handles it after retrieving value
};
panel.style.display = 'block'; // Make panel visible
console.log('[Recorder Panel] Panel shown.');
};
// --- Function to hide the panel ---
window._recorder_hidePanel = () => {
const panel = document.getElementById(PANEL_ID);
if (panel) {
panel.style.display = 'none';
console.log('[Recorder Panel] Panel hidden.');
}
// Also reset choice on hide just in case
window._recorder_user_choice = undefined;
window._recorder_parameter_name = undefined;
};
// --- Function to show parameterization UI ---
window._recorder_showParamUI = (defaultValue) => {
const paramContainer = document.getElementById(PARAM_CONT_ID);
const inputField = document.getElementById(INPUT_ID);
const acceptBtn = document.getElementById('bw-accept-btn');
if(paramContainer && inputField && acceptBtn) {
inputField.value = ''; // Clear previous value
inputField.setAttribute('placeholder', `Param Name for '${defaultValue.substring(0,20)}...' (optional)`);
paramContainer.style.display = 'block';
// Hide the original "Accept" button, show param button
acceptBtn.style.display = 'none';
document.getElementById(PARAM_BTN_ID).style.display = 'inline-block'; // Ensure param button is visible
console.log('[Recorder Panel] Parameterization UI shown.');
return true;
}
console.error('[Recorder Panel] Could not find parameterization elements.');
return false;
};
// --- Function to remove the panel ---
window._recorder_removePanel = () => {
const panel = document.getElementById(PANEL_ID);
if (panel) {
panel.remove();
console.log('[Recorder Panel] Panel removed.');
}
// Clean up global flags
delete window._recorder_user_choice;
delete window._recorder_parameter_name;
delete window._recorder_showPanel;
delete window._recorder_hidePanel;
delete window._recorder_showParamUI;
delete window._recorder_removePanel;
};
return true; // Indicate script injection success
}
"""
class Panel:
"""Deals with panel injected into browser in manual mode"""
def __init__(self, headless=True, page=None):
self._recorder_ui_injected = False # Track if UI script is injected
self._panel_interaction_lock = threading.Lock() # Prevent race conditions waiting for panel
self.headless = headless
self.page = page
# inject ui panel onto the browser
def inject_recorder_ui_scripts(self):
"""Injects the JS functions for the recorder UI panel."""
if self.headless: return # No UI in headless
if not self.page:
logger.error("Page not initialized. Cannot inject recorder UI.")
return False
if self._recorder_ui_injected:
logger.debug("Recorder UI scripts already injected.")
return True
try:
self.page.evaluate(RECORDER_PANEL_JS)
self._recorder_ui_injected = True
logger.info("Recorder UI panel JavaScript injected successfully.")
return True
except Exception as e:
logger.error(f"Failed to inject recorder UI panel JS: {e}", exc_info=True)
return False
def show_verification_review_panel(self, planned_desc: str, verification_result: Dict[str, Any]):
"""Shows the panel for reviewing AI verification results."""
if self.headless or not self.page: return
try:
# Extract data needed by the JS function
args = {
"plannedDesc": planned_desc,
"aiVerified": verification_result.get('verified', False),
"aiReasoning": verification_result.get('reasoning', 'N/A'),
"assertionType": verification_result.get('assertion_type'),
"parameters": verification_result.get('parameters', {}),
"selector": verification_result.get('verification_selector') # Use the final selector
}
js_script = f"""
(args) => {{
({RECORDER_PANEL_JS})(); // Ensure functions are defined
if (window._recorder_showVerificationReviewPanel) {{
window._recorder_showVerificationReviewPanel(args);
}} else {{ console.error('Verification review panel function not defined!'); }}
}}"""
self.page.evaluate(js_script, args)
except Exception as e:
logger.error(f"Failed to show verification review panel: {e}", exc_info=True)
def show_assertion_target_panel(self, planned_desc: str, suggested_selector: Optional[str]):
"""Shows the panel for confirming/overriding the assertion target."""
if self.headless or not self.page: return
try:
js_script = f"""
(args) => {{
({RECORDER_PANEL_JS})(); // Ensure functions are defined
if (window._recorder_showAssertionTargetPanel) {{
window._recorder_showAssertionTargetPanel(args.plannedDesc, args.suggestedSelector);
}} else {{ console.error('Assertion target panel function not defined!'); }}
}}"""
self.page.evaluate(js_script, {"plannedDesc": planned_desc, "suggestedSelector": suggested_selector})
except Exception as e:
logger.error(f"Failed to show assertion target panel: {e}", exc_info=True)
def show_assertion_type_panel(self, target_selector: str):
"""Shows the panel for selecting the assertion type."""
if self.headless or not self.page: return
try:
js_script = f"""
(args) => {{
({RECORDER_PANEL_JS})(); // Ensure functions are defined
if (window._recorder_showAssertionTypePanel) {{
window._recorder_showAssertionTypePanel(args.targetSelector);
}} else {{ console.error('Assertion type panel function not defined!'); }}
}}"""
self.page.evaluate(js_script, {"targetSelector": target_selector})
except Exception as e:
logger.error(f"Failed to show assertion type panel: {e}", exc_info=True)
def show_assertion_params_panel(self, target_selector: str, assertion_type: str, param_labels: List[str]):
"""Shows the panel for entering assertion parameters."""
if self.headless or not self.page: return
try:
js_script = f"""
(args) => {{
({RECORDER_PANEL_JS})(); // Ensure functions are defined
if (window._recorder_showAssertionParamsPanel) {{
window._recorder_showAssertionParamsPanel(args.targetSelector, args.assertionType, args.paramLabels);
}} else {{ console.error('Assertion params panel function not defined!'); }}
}}"""
self.page.evaluate(js_script, {
"targetSelector": target_selector,
"assertionType": assertion_type,
"paramLabels": param_labels
})
except Exception as e:
logger.error(f"Failed to show assertion params panel: {e}", exc_info=True)
def get_assertion_parameters_from_panel(self, count: int) -> Optional[Dict[str, str]]:
"""Retrieves the parameter values entered in the assertion panel."""
if self.headless or not self.page: return None
try:
params = self.page.evaluate("window._recorder_getAssertionParams ? window._recorder_getAssertionParams(count) : null", {"count": count})
return params
except Exception as e:
logger.error(f"Failed to get assertion parameters from panel: {e}")
return None
def show_recorder_panel(self, step_description: str, suggestion_text: str):
"""Shows the recorder UI panel with step info."""
if self.headless or not self.page:
logger.warning("Cannot show recorder panel (headless or no page).")
return
try:
# Evaluate a script that FIRST defines the functions, THEN calls showPanel
js_script = f"""
(args) => {{
// Ensure panel functions are defined (runs the definitions)
({RECORDER_PANEL_JS})();
// Now call the show function
if (window._recorder_showPanel) {{
window._recorder_showPanel(args.stepDescription, args.suggestionText);
}} else {{
console.error('[Recorder Panel] _recorder_showPanel function is still not defined after injection attempt!');
}}
}}
"""
self.page.evaluate(js_script, {"stepDescription": step_description, "suggestionText": suggestion_text})
except Exception as e:
logger.error(f"Failed to show recorder panel: {e}", exc_info=True) # Log full trace for debugging
def hide_recorder_panel(self):
"""Hides the recorder UI panel if it exists."""
if self.headless or not self.page: return
try:
# Check if function exists before calling
self.page.evaluate("if (window._recorder_hidePanel) window._recorder_hidePanel()")
except Exception as e:
logger.warning(f"Failed to hide recorder panel (might be removed or page navigated): {e}")
def remove_recorder_panel(self):
"""Removes the recorder UI panel from the DOM if it exists."""
if self.headless or not self.page: return
try:
# Check if function exists before calling
self.page.evaluate("if (window._recorder_removePanel) window._recorder_removePanel()")
except Exception as e:
logger.warning(f"Failed to remove recorder panel (might be removed or page navigated): {e}")
def prompt_parameterization_in_panel(self, default_value: str) -> bool:
"""Shows the parameterization input field, ensuring functions are defined."""
if self.headless or not self.page: return False
try:
# Combine definition and call again
js_script = f"""
(args) => {{
// Ensure panel functions are defined
({RECORDER_PANEL_JS})();
// Now call the show param UI function
if (window._recorder_showParamUI) {{
return window._recorder_showParamUI(args.defaultValue);
}} else {{
console.error('[Recorder Panel] _recorder_showParamUI function is still not defined!');
return false;
}}
}}
"""
success = self.page.evaluate(js_script, {"defaultValue": default_value})
return success if success is True else False # Ensure boolean return
except Exception as e:
logger.error(f"Failed to show parameterization UI in panel: {e}")
return False
def wait_for_panel_interaction(self, timeout_seconds: float) -> Optional[str]:
"""
Waits for the user to click a button on the recorder panel.
Returns the choice ('accept', 'skip', 'abort', 'parameterized') or None on timeout.
"""
if self.headless or not self.page or not self._recorder_ui_injected: return None
with self._panel_interaction_lock: # Prevent concurrent waits if called rapidly
js_condition = "() => window._recorder_user_choice !== undefined"
timeout_ms = timeout_seconds * 1000
user_choice = None
logger.info(f"Waiting up to {timeout_seconds}s for user interaction via UI panel...")
try:
# Ensure the flag is initially undefined before waiting
self.page.evaluate("window._recorder_user_choice = undefined")
self.page.wait_for_function(js_condition, timeout=timeout_ms)
# If wait succeeds, get the choice
user_choice = self.page.evaluate("window._recorder_user_choice")
logger.info(f"User interaction detected via panel: '{user_choice}'")
except PlaywrightTimeoutError:
logger.warning("Timeout reached waiting for panel interaction.")
user_choice = None # Timeout occurred
except Exception as e:
logger.error(f"Error during page.wait_for_function for panel interaction: {e}", exc_info=True)
user_choice = None # Treat other errors as timeout/failure
finally:
# Reset the flag *immediately after reading or timeout* for the next wait
try:
self.page.evaluate("window._recorder_user_choice = undefined")
except Exception:
logger.warning("Could not reset panel choice flag after interaction/timeout.")
return user_choice
def get_parameterization_result(self) -> Optional[str]:
"""Retrieves the parameter name entered in the panel. Call after wait_for_panel_interaction returns 'parameterized'."""
if self.headless or not self.page or not self._recorder_ui_injected: return None
try:
param_name = self.page.evaluate("window._recorder_parameter_name")
# Reset the flag after reading
self.page.evaluate("window._recorder_parameter_name = undefined")
logger.debug(f"Retrieved parameter name from panel: {param_name}")
return param_name # Can be string or null
except Exception as e:
logger.error(f"Failed to get parameter name from panel: {e}")
return None
```
--------------------------------------------------------------------------------
/src/dom/buildDomTree.js:
--------------------------------------------------------------------------------
```javascript
(
args = {
doHighlightElements: true,
focusHighlightIndex: -1,
viewportExpansion: 0,
debugMode: false,
}
) => {
const { doHighlightElements, focusHighlightIndex, viewportExpansion, debugMode } = args;
let highlightIndex = 0; // Reset highlight index
// Add timing stack to handle recursion
const TIMING_STACK = {
nodeProcessing: [],
treeTraversal: [],
highlighting: [],
current: null
};
function pushTiming(type) {
TIMING_STACK[type] = TIMING_STACK[type] || [];
TIMING_STACK[type].push(performance.now());
}
function popTiming(type) {
const start = TIMING_STACK[type].pop();
const duration = performance.now() - start;
return duration;
}
// Only initialize performance tracking if in debug mode
const PERF_METRICS = debugMode ? {
buildDomTreeCalls: 0,
timings: {
buildDomTree: 0,
highlightElement: 0,
isInteractiveElement: 0,
isElementVisible: 0,
isTopElement: 0,
isInExpandedViewport: 0,
isTextNodeVisible: 0,
getEffectiveScroll: 0,
},
cacheMetrics: {
boundingRectCacheHits: 0,
boundingRectCacheMisses: 0,
computedStyleCacheHits: 0,
computedStyleCacheMisses: 0,
getBoundingClientRectTime: 0,
getComputedStyleTime: 0,
boundingRectHitRate: 0,
computedStyleHitRate: 0,
overallHitRate: 0,
},
nodeMetrics: {
totalNodes: 0,
processedNodes: 0,
skippedNodes: 0,
},
buildDomTreeBreakdown: {
totalTime: 0,
totalSelfTime: 0,
buildDomTreeCalls: 0,
domOperations: {
getBoundingClientRect: 0,
getComputedStyle: 0,
},
domOperationCounts: {
getBoundingClientRect: 0,
getComputedStyle: 0,
}
}
} : null;
// Simple timing helper that only runs in debug mode
function measureTime(fn) {
if (!debugMode) return fn;
return function (...args) {
const start = performance.now();
const result = fn.apply(this, args);
const duration = performance.now() - start;
return result;
};
}
// Helper to measure DOM operations
function measureDomOperation(operation, name) {
if (!debugMode) return operation();
const start = performance.now();
const result = operation();
const duration = performance.now() - start;
if (PERF_METRICS && name in PERF_METRICS.buildDomTreeBreakdown.domOperations) {
PERF_METRICS.buildDomTreeBreakdown.domOperations[name] += duration;
PERF_METRICS.buildDomTreeBreakdown.domOperationCounts[name]++;
}
return result;
}
// Add caching mechanisms at the top level
const DOM_CACHE = {
boundingRects: new WeakMap(),
computedStyles: new WeakMap(),
clearCache: () => {
DOM_CACHE.boundingRects = new WeakMap();
DOM_CACHE.computedStyles = new WeakMap();
}
};
// Cache helper functions
function getCachedBoundingRect(element) {
if (!element) return null;
if (DOM_CACHE.boundingRects.has(element)) {
if (debugMode && PERF_METRICS) {
PERF_METRICS.cacheMetrics.boundingRectCacheHits++;
}
return DOM_CACHE.boundingRects.get(element);
}
if (debugMode && PERF_METRICS) {
PERF_METRICS.cacheMetrics.boundingRectCacheMisses++;
}
let rect;
if (debugMode) {
const start = performance.now();
rect = element.getBoundingClientRect();
const duration = performance.now() - start;
if (PERF_METRICS) {
PERF_METRICS.buildDomTreeBreakdown.domOperations.getBoundingClientRect += duration;
PERF_METRICS.buildDomTreeBreakdown.domOperationCounts.getBoundingClientRect++;
}
} else {
rect = element.getBoundingClientRect();
}
if (rect) {
DOM_CACHE.boundingRects.set(element, rect);
}
return rect;
}
function getCachedComputedStyle(element) {
if (!element) return null;
if (DOM_CACHE.computedStyles.has(element)) {
if (debugMode && PERF_METRICS) {
PERF_METRICS.cacheMetrics.computedStyleCacheHits++;
}
return DOM_CACHE.computedStyles.get(element);
}
if (debugMode && PERF_METRICS) {
PERF_METRICS.cacheMetrics.computedStyleCacheMisses++;
}
let style;
if (debugMode) {
const start = performance.now();
style = window.getComputedStyle(element);
const duration = performance.now() - start;
if (PERF_METRICS) {
PERF_METRICS.buildDomTreeBreakdown.domOperations.getComputedStyle += duration;
PERF_METRICS.buildDomTreeBreakdown.domOperationCounts.getComputedStyle++;
}
} else {
style = window.getComputedStyle(element);
}
if (style) {
DOM_CACHE.computedStyles.set(element, style);
}
return style;
}
/**
* Hash map of DOM nodes indexed by their highlight index.
*
* @type {Object<string, any>}
*/
const DOM_HASH_MAP = {};
const ID = { current: 0 };
const HIGHLIGHT_CONTAINER_ID = "playwright-highlight-container";
/**
* Highlights an element in the DOM and returns the index of the next element.
*/
function highlightElement(element, index, parentIframe = null) {
if (!element) return index;
// Store overlays and the single label for updating
const overlays = [];
let label = null;
let labelWidth = 20; // Approximate label width
let labelHeight = 16; // Approximate label height
try {
// Create or get highlight container
let container = document.getElementById(HIGHLIGHT_CONTAINER_ID);
if (!container) {
container = document.createElement("div");
container.id = HIGHLIGHT_CONTAINER_ID;
container.style.position = "fixed";
container.style.pointerEvents = "none";
container.style.top = "0";
container.style.left = "0";
container.style.width = "100%";
container.style.height = "100%";
container.style.zIndex = "2147483647";
container.style.backgroundColor = 'transparent';
document.body.appendChild(container);
}
// Get element client rects
const rects = element.getClientRects(); // Use getClientRects()
if (!rects || rects.length === 0) return index; // Exit if no rects
// Generate a color based on the index
const colors = [
"#FF0000",
"#00FF00",
"#0000FF",
"#FFA500",
"#800080",
"#008080",
"#FF69B4",
"#4B0082",
"#FF4500",
"#2E8B57",
"#DC143C",
"#4682B4",
];
const colorIndex = index % colors.length;
const baseColor = colors[colorIndex];
const backgroundColor = baseColor + "1A"; // 10% opacity version of the color
// Get iframe offset if necessary
let iframeOffset = { x: 0, y: 0 };
if (parentIframe) {
const iframeRect = parentIframe.getBoundingClientRect(); // Keep getBoundingClientRect for iframe offset
iframeOffset.x = iframeRect.left;
iframeOffset.y = iframeRect.top;
}
// Create highlight overlays for each client rect
for (const rect of rects) {
if (rect.width === 0 || rect.height === 0) continue; // Skip empty rects
const overlay = document.createElement("div");
overlay.style.position = "fixed";
overlay.style.border = `2px solid ${baseColor}`;
overlay.style.backgroundColor = backgroundColor;
overlay.style.pointerEvents = "none";
overlay.style.boxSizing = "border-box";
const top = rect.top + iframeOffset.y;
const left = rect.left + iframeOffset.x;
overlay.style.top = `${top}px`;
overlay.style.left = `${left}px`;
overlay.style.width = `${rect.width}px`;
overlay.style.height = `${rect.height}px`;
container.appendChild(overlay);
overlays.push({ element: overlay, initialRect: rect }); // Store overlay and its rect
}
// Create and position a single label relative to the first rect
const firstRect = rects[0];
label = document.createElement("div");
label.className = "playwright-highlight-label";
label.style.position = "fixed";
label.style.background = baseColor;
label.style.color = "white";
label.style.padding = "1px 4px";
label.style.borderRadius = "4px";
label.style.fontSize = `${Math.min(12, Math.max(8, firstRect.height / 2))}px`;
label.textContent = index;
labelWidth = label.offsetWidth > 0 ? label.offsetWidth : labelWidth; // Update actual width if possible
labelHeight = label.offsetHeight > 0 ? label.offsetHeight : labelHeight; // Update actual height if possible
const firstRectTop = firstRect.top + iframeOffset.y;
const firstRectLeft = firstRect.left + iframeOffset.x;
let labelTop = firstRectTop + 2;
let labelLeft = firstRectLeft + firstRect.width - labelWidth - 2;
// Adjust label position if first rect is too small
if (firstRect.width < labelWidth + 4 || firstRect.height < labelHeight + 4) {
labelTop = firstRectTop - labelHeight - 2;
labelLeft = firstRectLeft + firstRect.width - labelWidth; // Align with right edge
if (labelLeft < iframeOffset.x) labelLeft = firstRectLeft; // Prevent going off-left
}
// Ensure label stays within viewport bounds slightly better
labelTop = Math.max(0, Math.min(labelTop, window.innerHeight - labelHeight));
labelLeft = Math.max(0, Math.min(labelLeft, window.innerWidth - labelWidth));
label.style.top = `${labelTop}px`;
label.style.left = `${labelLeft}px`;
container.appendChild(label);
// Update positions on scroll/resize
const updatePositions = () => {
const newRects = element.getClientRects(); // Get fresh rects
let newIframeOffset = { x: 0, y: 0 };
if (parentIframe) {
const iframeRect = parentIframe.getBoundingClientRect(); // Keep getBoundingClientRect for iframe
newIframeOffset.x = iframeRect.left;
newIframeOffset.y = iframeRect.top;
}
// Update each overlay
overlays.forEach((overlayData, i) => {
if (i < newRects.length) { // Check if rect still exists
const newRect = newRects[i];
const newTop = newRect.top + newIframeOffset.y;
const newLeft = newRect.left + newIframeOffset.x;
overlayData.element.style.top = `${newTop}px`;
overlayData.element.style.left = `${newLeft}px`;
overlayData.element.style.width = `${newRect.width}px`;
overlayData.element.style.height = `${newRect.height}px`;
overlayData.element.style.display = (newRect.width === 0 || newRect.height === 0) ? 'none' : 'block';
} else {
// If fewer rects now, hide extra overlays
overlayData.element.style.display = 'none';
}
});
// If there are fewer new rects than overlays, hide the extras
if (newRects.length < overlays.length) {
for (let i = newRects.length; i < overlays.length; i++) {
overlays[i].element.style.display = 'none';
}
}
// Update label position based on the first new rect
if (label && newRects.length > 0) {
const firstNewRect = newRects[0];
const firstNewRectTop = firstNewRect.top + newIframeOffset.y;
const firstNewRectLeft = firstNewRect.left + newIframeOffset.x;
let newLabelTop = firstNewRectTop + 2;
let newLabelLeft = firstNewRectLeft + firstNewRect.width - labelWidth - 2;
if (firstNewRect.width < labelWidth + 4 || firstNewRect.height < labelHeight + 4) {
newLabelTop = firstNewRectTop - labelHeight - 2;
newLabelLeft = firstNewRectLeft + firstNewRect.width - labelWidth;
if (newLabelLeft < newIframeOffset.x) newLabelLeft = firstNewRectLeft;
}
// Ensure label stays within viewport bounds
newLabelTop = Math.max(0, Math.min(newLabelTop, window.innerHeight - labelHeight));
newLabelLeft = Math.max(0, Math.min(newLabelLeft, window.innerWidth - labelWidth));
label.style.top = `${newLabelTop}px`;
label.style.left = `${newLabelLeft}px`;
label.style.display = 'block';
} else if (label) {
// Hide label if element has no rects anymore
label.style.display = 'none';
}
};
window.addEventListener('scroll', updatePositions, true); // Use capture phase
window.addEventListener('resize', updatePositions);
// TODO: Add cleanup logic to remove listeners and elements when done.
return index + 1;
} finally {
// popTiming('highlighting'); // Assuming this was a typo and should be removed or corrected
}
}
function getElementPosition(currentElement) {
if (!currentElement.parentElement) {
return 0; // No parent means no siblings
}
const tagName = currentElement.nodeName.toLowerCase();
const siblings = Array.from(currentElement.parentElement.children)
.filter((sib) => sib.nodeName.toLowerCase() === tagName);
if (siblings.length === 1) {
return 0; // Only element of its type
}
const index = siblings.indexOf(currentElement) + 1; // 1-based index
return index;
}
/**
* Returns an XPath tree string for an element.
*/
function getXPathTree(element, stopAtBoundary = true) {
const segments = [];
let currentElement = element;
while (currentElement && currentElement.nodeType === Node.ELEMENT_NODE) {
// Stop if we hit a shadow root or iframe
if (
stopAtBoundary &&
(currentElement.parentNode instanceof ShadowRoot ||
currentElement.parentNode instanceof HTMLIFrameElement)
) {
break;
}
const position = getElementPosition(currentElement);
const tagName = currentElement.nodeName.toLowerCase();
const xpathIndex = position > 0 ? `[${position}]` : "";
segments.unshift(`${tagName}${xpathIndex}`);
currentElement = currentElement.parentNode;
}
return segments.join("/");
}
/**
* Checks if a text node is visible.
*/
function isTextNodeVisible(textNode) {
try {
const range = document.createRange();
range.selectNodeContents(textNode);
const rects = range.getClientRects(); // Use getClientRects for Range
if (!rects || rects.length === 0) {
return false;
}
let isAnyRectVisible = false;
let isAnyRectInViewport = false;
for (const rect of rects) {
// Check size
if (rect.width > 0 && rect.height > 0) {
isAnyRectVisible = true;
// Viewport check for this rect
if (!(
rect.bottom < -viewportExpansion ||
rect.top > window.innerHeight + viewportExpansion ||
rect.right < -viewportExpansion ||
rect.left > window.innerWidth + viewportExpansion
) || viewportExpansion === -1) {
isAnyRectInViewport = true;
break; // Found a visible rect in viewport, no need to check others
}
}
}
if (!isAnyRectVisible || !isAnyRectInViewport) {
return false;
}
// Check parent visibility
const parentElement = textNode.parentElement;
if (!parentElement) return false;
try {
return isAnyRectInViewport && parentElement.checkVisibility({
checkOpacity: true,
checkVisibilityCSS: true,
});
} catch (e) {
// Fallback if checkVisibility is not supported
const style = window.getComputedStyle(parentElement);
return isAnyRectInViewport &&
style.display !== 'none' &&
style.visibility !== 'hidden' &&
style.opacity !== '0';
}
} catch (e) {
console.warn('Error checking text node visibility:', e);
return false;
}
}
// Helper function to check if element is accepted
function isElementAccepted(element) {
if (!element || !element.tagName) return false;
// Always accept body and common container elements
const alwaysAccept = new Set([
"body", "div", "main", "article", "section", "nav", "header", "footer"
]);
const tagName = element.tagName.toLowerCase();
if (alwaysAccept.has(tagName)) return true;
const leafElementDenyList = new Set([
"svg",
"script",
"style",
"link",
"meta",
"noscript",
"template",
]);
return !leafElementDenyList.has(tagName);
}
/**
* Checks if an element is visible.
*/
function isElementVisible(element) {
const style = getCachedComputedStyle(element);
return (
element.offsetWidth > 0 &&
element.offsetHeight > 0 &&
style.visibility !== "hidden" &&
style.display !== "none"
);
}
/**
* Checks if an element is interactive.
*
* lots of comments, and uncommented code - to show the logic of what we already tried
*
* One of the things we tried at the beginning was also to use event listeners, and other fancy class, style stuff -> what actually worked best was just combining most things with computed cursor style :)
*/
function isInteractiveElement(element) {
if (!element || element.nodeType !== Node.ELEMENT_NODE) {
return false;
}
// Define interactive cursors
const interactiveCursors = new Set([
'pointer', // Link/clickable elements
'move', // Movable elements
'text', // Text selection
'grab', // Grabbable elements
'grabbing', // Currently grabbing
'cell', // Table cell selection
'copy', // Copy operation
'alias', // Alias creation
'all-scroll', // Scrollable content
'col-resize', // Column resize
'context-menu', // Context menu available
'crosshair', // Precise selection
'e-resize', // East resize
'ew-resize', // East-west resize
'help', // Help available
'n-resize', // North resize
'ne-resize', // Northeast resize
'nesw-resize', // Northeast-southwest resize
'ns-resize', // North-south resize
'nw-resize', // Northwest resize
'nwse-resize', // Northwest-southeast resize
'row-resize', // Row resize
's-resize', // South resize
'se-resize', // Southeast resize
'sw-resize', // Southwest resize
'vertical-text', // Vertical text selection
'w-resize', // West resize
'zoom-in', // Zoom in
'zoom-out' // Zoom out
]);
// Define non-interactive cursors
const nonInteractiveCursors = new Set([
'not-allowed', // Action not allowed
'no-drop', // Drop not allowed
'wait', // Processing
'progress', // In progress
'initial', // Initial value
'inherit' // Inherited value
//? Let's just include all potentially clickable elements that are not specifically blocked
// 'none', // No cursor
// 'default', // Default cursor
// 'auto', // Browser default
]);
function doesElementHaveInteractivePointer(element) {
if (element.tagName.toLowerCase() === "html") return false;
const style = getCachedComputedStyle(element);
if (interactiveCursors.has(style.cursor)) return true;
return false;
}
let isInteractiveCursor = doesElementHaveInteractivePointer(element);
// Genius fix for almost all interactive elements
if (isInteractiveCursor) {
return true;
}
const interactiveElements = new Set([
"a", // Links
"button", // Buttons
"input", // All input types (text, checkbox, radio, etc.)
"select", // Dropdown menus
"textarea", // Text areas
"details", // Expandable details
"summary", // Summary element (clickable part of details)
"label", // Form labels (often clickable)
"option", // Select options
"optgroup", // Option groups
"fieldset", // Form fieldsets (can be interactive with legend)
"legend", // Fieldset legends
]);
// Define explicit disable attributes and properties
const explicitDisableTags = new Set([
'disabled', // Standard disabled attribute
// 'aria-disabled', // ARIA disabled state
'readonly', // Read-only state
// 'aria-readonly', // ARIA read-only state
// 'aria-hidden', // Hidden from accessibility
// 'hidden', // Hidden attribute
// 'inert', // Inert attribute
// 'aria-inert', // ARIA inert state
// 'tabindex="-1"', // Removed from tab order
// 'aria-hidden="true"' // Hidden from screen readers
]);
// handle inputs, select, checkbox, radio, textarea, button and make sure they are not cursor style disabled/not-allowed
if (interactiveElements.has(element.tagName.toLowerCase())) {
const style = getCachedComputedStyle(element);
// Check for non-interactive cursor
if (nonInteractiveCursors.has(style.cursor)) {
return false;
}
// Check for explicit disable attributes
for (const disableTag of explicitDisableTags) {
if (element.hasAttribute(disableTag) ||
element.getAttribute(disableTag) === 'true' ||
element.getAttribute(disableTag) === '') {
return false;
}
}
// Check for disabled property on form elements
if (element.disabled) {
return false;
}
// Check for readonly property on form elements
if (element.readOnly) {
return false;
}
// Check for inert property
if (element.inert) {
return false;
}
return true;
}
const tagName = element.tagName.toLowerCase();
const role = element.getAttribute("role");
const ariaRole = element.getAttribute("aria-role");
// Added enhancement to capture dropdown interactive elements
if (element.classList && (
element.classList.contains("button") ||
element.classList.contains('dropdown-toggle') ||
element.getAttribute('data-index') ||
element.getAttribute('data-toggle') === 'dropdown' ||
element.getAttribute('aria-haspopup') === 'true'
)) {
return true;
}
const interactiveRoles = new Set([
'button', // Directly clickable element
// 'link', // Clickable link
// 'menuitem', // Clickable menu item
'menuitemradio', // Radio-style menu item (selectable)
'menuitemcheckbox', // Checkbox-style menu item (toggleable)
'radio', // Radio button (selectable)
'checkbox', // Checkbox (toggleable)
'tab', // Tab (clickable to switch content)
'switch', // Toggle switch (clickable to change state)
'slider', // Slider control (draggable)
'spinbutton', // Number input with up/down controls
'combobox', // Dropdown with text input
'searchbox', // Search input field
'textbox', // Text input field
// 'listbox', // Selectable list
'option', // Selectable option in a list
'scrollbar' // Scrollable control
]);
// Basic role/attribute checks
const hasInteractiveRole =
interactiveElements.has(tagName) ||
interactiveRoles.has(role) ||
interactiveRoles.has(ariaRole);
if (hasInteractiveRole) return true;
// check whether element has event listeners
try {
if (typeof getEventListeners === 'function') {
const listeners = getEventListeners(element);
const mouseEvents = ['click', 'mousedown', 'mouseup', 'dblclick'];
for (const eventType of mouseEvents) {
if (listeners[eventType] && listeners[eventType].length > 0) {
return true; // Found a mouse interaction listener
}
}
} else {
// Fallback: Check common event attributes if getEventListeners is not available
const commonMouseAttrs = ['onclick', 'onmousedown', 'onmouseup', 'ondblclick'];
if (commonMouseAttrs.some(attr => element.hasAttribute(attr))) {
return true;
}
}
} catch (e) {
// console.warn(`Could not check event listeners for ${element.tagName}:`, e);
// If checking listeners fails, rely on other checks
}
return false
}
/**
* Checks if an element is the topmost element at its position.
*/
function isTopElement(element) {
const rects = element.getClientRects(); // Use getClientRects
if (!rects || rects.length === 0) {
return false; // No geometry, cannot be top
}
let isAnyRectInViewport = false;
for (const rect of rects) {
// Use the same logic as isInExpandedViewport check
if (rect.width > 0 && rect.height > 0 && !( // Only check non-empty rects
rect.bottom < -viewportExpansion ||
rect.top > window.innerHeight + viewportExpansion ||
rect.right < -viewportExpansion ||
rect.left > window.innerWidth + viewportExpansion
) || viewportExpansion === -1) {
isAnyRectInViewport = true;
break;
}
}
if (!isAnyRectInViewport) {
return false; // All rects are outside the viewport area
}
// Find the correct document context and root element
let doc = element.ownerDocument;
// If we're in an iframe, elements are considered top by default
if (doc !== window.document) {
return true;
}
// For shadow DOM, we need to check within its own root context
const shadowRoot = element.getRootNode();
if (shadowRoot instanceof ShadowRoot) {
const centerX = rects[Math.floor(rects.length / 2)].left + rects[Math.floor(rects.length / 2)].width / 2;
const centerY = rects[Math.floor(rects.length / 2)].top + rects[Math.floor(rects.length / 2)].height / 2;
try {
const topEl = measureDomOperation(
() => shadowRoot.elementFromPoint(centerX, centerY),
'elementFromPoint'
);
if (!topEl) return false;
let current = topEl;
while (current && current !== shadowRoot) {
if (current === element) return true;
current = current.parentElement;
}
return false;
} catch (e) {
return true;
}
}
// For elements in viewport, check if they're topmost
const centerX = rects[Math.floor(rects.length / 2)].left + rects[Math.floor(rects.length / 2)].width / 2;
const centerY = rects[Math.floor(rects.length / 2)].top + rects[Math.floor(rects.length / 2)].height / 2;
try {
const topEl = document.elementFromPoint(centerX, centerY);
if (!topEl) return false;
let current = topEl;
while (current && current !== document.documentElement) {
if (current === element) return true;
current = current.parentElement;
}
return false;
} catch (e) {
return true;
}
}
/**
* Checks if an element is within the expanded viewport.
*/
function isInExpandedViewport(element, viewportExpansion) {
return true
if (viewportExpansion === -1) {
return true;
}
const rects = element.getClientRects(); // Use getClientRects
if (!rects || rects.length === 0) {
// Fallback to getBoundingClientRect if getClientRects is empty,
// useful for elements like <svg> that might not have client rects but have a bounding box.
const boundingRect = getCachedBoundingRect(element);
if (!boundingRect || boundingRect.width === 0 || boundingRect.height === 0) {
return false;
}
return !(
boundingRect.bottom < -viewportExpansion ||
boundingRect.top > window.innerHeight + viewportExpansion ||
boundingRect.right < -viewportExpansion ||
boundingRect.left > window.innerWidth + viewportExpansion
);
}
// Check if *any* client rect is within the viewport
for (const rect of rects) {
if (rect.width === 0 || rect.height === 0) continue; // Skip empty rects
if (!(
rect.bottom < -viewportExpansion ||
rect.top > window.innerHeight + viewportExpansion ||
rect.right < -viewportExpansion ||
rect.left > window.innerWidth + viewportExpansion
)) {
return true; // Found at least one rect in the viewport
}
}
return false; // No rects were found in the viewport
}
// Add this new helper function
function getEffectiveScroll(element) {
let currentEl = element;
let scrollX = 0;
let scrollY = 0;
return measureDomOperation(() => {
while (currentEl && currentEl !== document.documentElement) {
if (currentEl.scrollLeft || currentEl.scrollTop) {
scrollX += currentEl.scrollLeft;
scrollY += currentEl.scrollTop;
}
currentEl = currentEl.parentElement;
}
scrollX += window.scrollX;
scrollY += window.scrollY;
return { scrollX, scrollY };
}, 'scrollOperations');
}
// Add these helper functions at the top level
function isInteractiveCandidate(element) {
if (!element || element.nodeType !== Node.ELEMENT_NODE) return false;
const tagName = element.tagName.toLowerCase();
// Fast-path for common interactive elements
const interactiveElements = new Set([
"a", "button", "input", "select", "textarea", "details", "summary"
]);
if (interactiveElements.has(tagName)) return true;
// Quick attribute checks without getting full lists
const hasQuickInteractiveAttr = element.hasAttribute("onclick") ||
element.hasAttribute("role") ||
element.hasAttribute("tabindex") ||
element.hasAttribute("aria-") ||
element.hasAttribute("data-action") ||
element.getAttribute("contenteditable") == "true";
return hasQuickInteractiveAttr;
}
// --- Define constants for distinct interaction check ---
const DISTINCT_INTERACTIVE_TAGS = new Set([
'a', 'button', 'input', 'select', 'textarea', 'summary', 'details', 'label', 'option'
]);
const INTERACTIVE_ROLES = new Set([
'button', 'link', 'menuitem', 'menuitemradio', 'menuitemcheckbox',
'radio', 'checkbox', 'tab', 'switch', 'slider', 'spinbutton',
'combobox', 'searchbox', 'textbox', 'listbox', 'option', 'scrollbar'
]);
/**
* Checks if an element likely represents a distinct interaction
* separate from its parent (if the parent is also interactive).
*/
function isElementDistinctInteraction(element) {
if (!element || element.nodeType !== Node.ELEMENT_NODE) {
return false;
}
const tagName = element.tagName.toLowerCase();
const role = element.getAttribute('role');
// Check if it's an iframe - always distinct boundary
if (tagName === 'iframe') {
return true;
}
// Check tag name
if (DISTINCT_INTERACTIVE_TAGS.has(tagName)) {
return true;
}
// Check interactive roles
if (role && INTERACTIVE_ROLES.has(role)) {
return true;
}
// Check contenteditable
if (element.isContentEditable || element.getAttribute('contenteditable') === 'true') {
return true;
}
// Check for common testing/automation attributes
if (element.hasAttribute('data-testid') || element.hasAttribute('data-cy') || element.hasAttribute('data-test')) {
return true;
}
// Check for explicit onclick handler (attribute or property)
if (element.hasAttribute('onclick') || typeof element.onclick === 'function') {
return true;
}
// Check for other common interaction event listeners
try {
if (typeof getEventListeners === 'function') {
const listeners = getEventListeners(element);
const interactionEvents = ['mousedown', 'mouseup', 'keydown', 'keyup', 'submit', 'change', 'input', 'focus', 'blur'];
for (const eventType of interactionEvents) {
if (listeners[eventType] && listeners[eventType].length > 0) {
return true; // Found a common interaction listener
}
}
} else {
// Fallback: Check common event attributes if getEventListeners is not available
const commonEventAttrs = ['onmousedown', 'onmouseup', 'onkeydown', 'onkeyup', 'onsubmit', 'onchange', 'oninput', 'onfocus', 'onblur'];
if (commonEventAttrs.some(attr => element.hasAttribute(attr))) {
return true;
}
}
} catch (e) {
// console.warn(`Could not check event listeners for ${element.tagName}:`, e);
// If checking listeners fails, rely on other checks
}
// Default to false: if it's interactive but doesn't match above,
// assume it triggers the same action as the parent.
return false;
}
// --- End distinct interaction check ---
/**
* Handles the logic for deciding whether to highlight an element and performing the highlight.
*/
function handleHighlighting(nodeData, node, parentIframe, isParentHighlighted) {
if (!nodeData.isInteractive) return false; // Not interactive, definitely don't highlight
let shouldHighlight = false;
if (!isParentHighlighted) {
// Parent wasn't highlighted, this interactive node can be highlighted.
shouldHighlight = true;
} else {
// Parent *was* highlighted. Only highlight this node if it represents a distinct interaction.
if (isElementDistinctInteraction(node)) {
shouldHighlight = true;
} else {
// console.log(`Skipping highlight for ${nodeData.tagName} (parent highlighted)`);
shouldHighlight = false;
}
}
if (shouldHighlight) {
// Check viewport status before assigning index and highlighting
nodeData.isInViewport = isInExpandedViewport(node, viewportExpansion);
if (nodeData.isInViewport) {
nodeData.highlightIndex = highlightIndex++;
if (doHighlightElements) {
if (focusHighlightIndex >= 0) {
if (focusHighlightIndex === nodeData.highlightIndex) {
highlightElement(node, nodeData.highlightIndex, parentIframe);
}
} else {
highlightElement(node, nodeData.highlightIndex, parentIframe);
}
return true; // Successfully highlighted
}
} else {
// console.log(`Skipping highlight for ${nodeData.tagName} (outside viewport)`);
}
}
return false; // Did not highlight
}
/**
* Creates a node data object for a given node and its descendants.
*/
function buildDomTree(node, parentIframe = null, isParentHighlighted = false) {
if (debugMode) PERF_METRICS.nodeMetrics.totalNodes++;
if (!node || node.id === HIGHLIGHT_CONTAINER_ID) {
if (debugMode) PERF_METRICS.nodeMetrics.skippedNodes++;
return null;
}
// Special handling for root node (body)
if (node === document.body) {
const nodeData = {
tagName: 'body',
attributes: {},
xpath: '/body',
children: [],
};
// Process children of body
for (const child of node.childNodes) {
const domElement = buildDomTree(child, parentIframe, false); // Body's children have no highlighted parent initially
if (domElement) nodeData.children.push(domElement);
}
const id = `${ID.current++}`;
DOM_HASH_MAP[id] = nodeData;
if (debugMode) PERF_METRICS.nodeMetrics.processedNodes++;
return id;
}
// Early bailout for non-element nodes except text
if (node.nodeType !== Node.ELEMENT_NODE && node.nodeType !== Node.TEXT_NODE) {
if (debugMode) PERF_METRICS.nodeMetrics.skippedNodes++;
return null;
}
// Process text nodes
if (node.nodeType === Node.TEXT_NODE) {
const textContent = node.textContent.trim();
if (!textContent) {
if (debugMode) PERF_METRICS.nodeMetrics.skippedNodes++;
return null;
}
// Only check visibility for text nodes that might be visible
const parentElement = node.parentElement;
if (!parentElement || parentElement.tagName.toLowerCase() === 'script') {
if (debugMode) PERF_METRICS.nodeMetrics.skippedNodes++;
return null;
}
const id = `${ID.current++}`;
DOM_HASH_MAP[id] = {
type: "TEXT_NODE",
text: textContent,
isVisible: isTextNodeVisible(node),
};
if (debugMode) PERF_METRICS.nodeMetrics.processedNodes++;
return id;
}
// Quick checks for element nodes
if (node.nodeType === Node.ELEMENT_NODE && !isElementAccepted(node)) {
if (debugMode) PERF_METRICS.nodeMetrics.skippedNodes++;
return null;
}
// Early viewport check - only filter out elements clearly outside viewport
if (viewportExpansion !== -1) {
const rect = getCachedBoundingRect(node); // Keep for initial quick check
const style = getCachedComputedStyle(node);
// Skip viewport check for fixed/sticky elements as they may appear anywhere
const isFixedOrSticky = style && (style.position === 'fixed' || style.position === 'sticky');
// Check if element has actual dimensions using offsetWidth/Height (quick check)
const hasSize = node.offsetWidth > 0 || node.offsetHeight > 0;
// Use getBoundingClientRect for the quick OUTSIDE check.
// isInExpandedViewport will do the more accurate check later if needed.
if (!rect || (!isFixedOrSticky && !hasSize && (
rect.bottom < -viewportExpansion ||
rect.top > window.innerHeight + viewportExpansion ||
rect.right < -viewportExpansion ||
rect.left > window.innerWidth + viewportExpansion
))) {
// console.log("Skipping node outside viewport (quick check):", node.tagName, rect);
if (debugMode) PERF_METRICS.nodeMetrics.skippedNodes++;
return null;
}
}
// Process element node
const nodeData = {
tagName: node.tagName.toLowerCase(),
attributes: {},
xpath: getXPathTree(node, true),
children: [],
};
// Get attributes for interactive elements or potential text containers
if (isInteractiveCandidate(node) || node.tagName.toLowerCase() === 'iframe' || node.tagName.toLowerCase() === 'body') {
const attributeNames = node.getAttributeNames?.() || [];
for (const name of attributeNames) {
nodeData.attributes[name] = node.getAttribute(name);
}
}
let nodeWasHighlighted = false;
// Perform visibility, interactivity, and highlighting checks
if (node.nodeType === Node.ELEMENT_NODE) {
nodeData.isVisible = isElementVisible(node); // isElementVisible uses offsetWidth/Height, which is fine
if (nodeData.isVisible) {
nodeData.isTopElement = isTopElement(node);
let shouldCheckInteractivity = false;
if (viewportExpansion === -1) {
// *** CHANGE: If including all, always check interactivity for visible elements ***
shouldCheckInteractivity = true;
} else {
// Original logic: Only check interactivity if it's the top element within the viewport/expansion zone
shouldCheckInteractivity = nodeData.isTopElement;
}
if (shouldCheckInteractivity) {
nodeData.isInteractive = isInteractiveElement(node);
nodeWasHighlighted = handleHighlighting(nodeData, node, parentIframe, isParentHighlighted);
}
}
}
// Process children, with special handling for iframes and rich text editors
if (node.tagName) {
const tagName = node.tagName.toLowerCase();
// Handle iframes
if (tagName === "iframe") {
try {
const iframeDoc = node.contentDocument || node.contentWindow?.document;
if (iframeDoc) {
for (const child of iframeDoc.childNodes) {
const domElement = buildDomTree(child, node, false);
if (domElement) nodeData.children.push(domElement);
}
}
} catch (e) {
console.warn("Unable to access iframe:", e);
}
}
// Handle rich text editors and contenteditable elements
else if (
node.isContentEditable ||
node.getAttribute("contenteditable") === "true" ||
node.id === "tinymce" ||
node.classList.contains("mce-content-body") ||
(tagName === "body" && node.getAttribute("data-id")?.startsWith("mce_"))
) {
// Process all child nodes to capture formatted text
for (const child of node.childNodes) {
const domElement = buildDomTree(child, parentIframe, nodeWasHighlighted);
if (domElement) nodeData.children.push(domElement);
}
}
else {
// Handle shadow DOM
if (node.shadowRoot) {
nodeData.shadowRoot = true;
for (const child of node.shadowRoot.childNodes) {
const domElement = buildDomTree(child, parentIframe, nodeWasHighlighted);
if (domElement) nodeData.children.push(domElement);
}
}
// Handle regular elements
for (const child of node.childNodes) {
// Pass the highlighted status of the *current* node to its children
const passHighlightStatusToChild = nodeWasHighlighted || isParentHighlighted;
const domElement = buildDomTree(child, parentIframe, passHighlightStatusToChild);
if (domElement) nodeData.children.push(domElement);
}
}
}
// Skip empty anchor tags
if (nodeData.tagName === 'a' && nodeData.children.length === 0 && !nodeData.attributes.href) {
if (debugMode) PERF_METRICS.nodeMetrics.skippedNodes++;
return null;
}
const id = `${ID.current++}`;
DOM_HASH_MAP[id] = nodeData;
if (debugMode) PERF_METRICS.nodeMetrics.processedNodes++;
return id;
}
// After all functions are defined, wrap them with performance measurement
// Remove buildDomTree from here as we measure it separately
highlightElement = measureTime(highlightElement);
isInteractiveElement = measureTime(isInteractiveElement);
isElementVisible = measureTime(isElementVisible);
isTopElement = measureTime(isTopElement);
isInExpandedViewport = measureTime(isInExpandedViewport);
isTextNodeVisible = measureTime(isTextNodeVisible);
getEffectiveScroll = measureTime(getEffectiveScroll);
const rootId = buildDomTree(document.body);
// Clear the cache before starting
DOM_CACHE.clearCache();
// Only process metrics in debug mode
if (debugMode && PERF_METRICS) {
// Convert timings to seconds and add useful derived metrics
Object.keys(PERF_METRICS.timings).forEach(key => {
PERF_METRICS.timings[key] = PERF_METRICS.timings[key] / 1000;
});
Object.keys(PERF_METRICS.buildDomTreeBreakdown).forEach(key => {
if (typeof PERF_METRICS.buildDomTreeBreakdown[key] === 'number') {
PERF_METRICS.buildDomTreeBreakdown[key] = PERF_METRICS.buildDomTreeBreakdown[key] / 1000;
}
});
// Add some useful derived metrics
if (PERF_METRICS.buildDomTreeBreakdown.buildDomTreeCalls > 0) {
PERF_METRICS.buildDomTreeBreakdown.averageTimePerNode =
PERF_METRICS.buildDomTreeBreakdown.totalTime / PERF_METRICS.buildDomTreeBreakdown.buildDomTreeCalls;
}
PERF_METRICS.buildDomTreeBreakdown.timeInChildCalls =
PERF_METRICS.buildDomTreeBreakdown.totalTime - PERF_METRICS.buildDomTreeBreakdown.totalSelfTime;
// Add average time per operation to the metrics
Object.keys(PERF_METRICS.buildDomTreeBreakdown.domOperations).forEach(op => {
const time = PERF_METRICS.buildDomTreeBreakdown.domOperations[op];
const count = PERF_METRICS.buildDomTreeBreakdown.domOperationCounts[op];
if (count > 0) {
PERF_METRICS.buildDomTreeBreakdown.domOperations[`${op}Average`] = time / count;
}
});
// Calculate cache hit rates
const boundingRectTotal = PERF_METRICS.cacheMetrics.boundingRectCacheHits + PERF_METRICS.cacheMetrics.boundingRectCacheMisses;
const computedStyleTotal = PERF_METRICS.cacheMetrics.computedStyleCacheHits + PERF_METRICS.cacheMetrics.computedStyleCacheMisses;
if (boundingRectTotal > 0) {
PERF_METRICS.cacheMetrics.boundingRectHitRate = PERF_METRICS.cacheMetrics.boundingRectCacheHits / boundingRectTotal;
}
if (computedStyleTotal > 0) {
PERF_METRICS.cacheMetrics.computedStyleHitRate = PERF_METRICS.cacheMetrics.computedStyleCacheHits / computedStyleTotal;
}
if ((boundingRectTotal + computedStyleTotal) > 0) {
PERF_METRICS.cacheMetrics.overallHitRate =
(PERF_METRICS.cacheMetrics.boundingRectCacheHits + PERF_METRICS.cacheMetrics.computedStyleCacheHits) /
(boundingRectTotal + computedStyleTotal);
}
}
return debugMode ?
{ rootId, map: DOM_HASH_MAP, perfMetrics: PERF_METRICS } :
{ rootId, map: DOM_HASH_MAP };
}
```
--------------------------------------------------------------------------------
/src/browser/browser_controller.py:
--------------------------------------------------------------------------------
```python
# /src/browser/browser_controller.py
from patchright.sync_api import sync_playwright, Page, Browser, Playwright, TimeoutError as PlaywrightTimeoutError, Error as PlaywrightError, Response, Request, Locator, ConsoleMessage, expect
import logging
import time
import random
import json
import os
from typing import Optional, Any, Dict, List, Callable, Tuple
import threading
import platform
from ..dom.service import DomService
from ..dom.views import DOMState, DOMElementNode, SelectorMap
from .panel.panel import Panel
logger = logging.getLogger(__name__)
COMMON_HEADERS = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
}
HIDE_WEBDRIVER_SCRIPT = """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
});
"""
# --- JavaScript for click listener and selector generation ---
CLICK_LISTENER_JS = """
async () => {
// Reset/Initialize the global flag/variable
window._recorder_override_selector = undefined;
console.log('[Recorder Listener] Attaching click listener...');
const PANEL_ID = 'bw-recorder-panel';
const clickHandler = async (event) => {
const targetElement = event.target;
let isPanelClick = false; // Flag to track if click is inside the panel
// Check if the click target is the panel or inside the panel
// using element.closest()
if (targetElement && targetElement.closest && targetElement.closest(`#${PANEL_ID}`)) {
console.log('[Recorder Listener] Click inside panel detected. Allowing event to proceed normally.');
isPanelClick = true;
// DO ABSOLUTELY NOTHING HERE - let the event continue to the button's own listener
}
// --- Only process as an override attempt if it was NOT a panel click ---
if (!isPanelClick) {
console.log('[Recorder Listener] Click detected (Outside panel)! Processing as override.');
event.preventDefault(); // Prevent default action (like navigation) ONLY for override clicks
event.stopPropagation(); // Stop propagation ONLY for override clicks
if (!targetElement) {
console.warn('[Recorder Listener] Override click event has no target.');
// Remove listener even if target is null to avoid getting stuck
document.body.removeEventListener('click', clickHandler, { capture: true });
console.log('[Recorder Listener] Listener removed due to null target.');
return;
}
// --- Simple Selector Generation (enhance as needed) ---
let selector = '';
function escapeCSS(value) {
if (!value) return '';
// Basic escape for common CSS special chars in identifiers/strings
// For robust escaping, a library might be better, but this covers many cases.
return value.replace(/([!"#$%&'()*+,./:;<=>?@\\[\\]^`{|}~])/g, '\\$1');
}
if (targetElement.id && targetElement.id !== PANEL_ID && targetElement.id !== 'playwright-highlight-container') {
selector = `#${escapeCSS(targetElement.id.trim())}`;
} else if (targetElement.getAttribute('data-testid')) {
selector = `[data-testid="${escapeCSS(targetElement.getAttribute('data-testid').trim())}"]`;
} else if (targetElement.name) {
selector = `${targetElement.tagName.toLowerCase()}[name="${escapeCSS(targetElement.name.trim())}"]`;
} else {
// Fallback: Basic XPath -> CSS approximation (needs improvement)
let path = '';
let current = targetElement;
while (current && current.tagName && current.tagName.toLowerCase() !== 'body' && current.parentNode) {
let segment = current.tagName.toLowerCase();
const parent = current.parentElement;
if (parent) {
const siblings = Array.from(parent.children);
const sameTagSiblings = siblings.filter(sib => sib.tagName === current.tagName);
if (sameTagSiblings.length > 1) {
let index = 1;
for(let i=0; i < sameTagSiblings.length; i++) { // Find index correctly
if(sameTagSiblings[i] === current) {
index = i + 1;
break;
}
}
// Prefer nth-child if possible, might be slightly more stable
try {
const siblingIndex = Array.prototype.indexOf.call(parent.children, current) + 1;
segment += `:nth-child(${siblingIndex})`;
} catch(e) { // Fallback if indexOf fails
segment += `:nth-of-type(${index})`;
}
}
}
path = segment + (path ? ' > ' + path : '');
current = parent;
}
selector = path ? `body > ${path}` : targetElement.tagName.toLowerCase();
console.log(`[Recorder Listener] Generated fallback selector: ${selector}`);
}
// --- End Selector Generation ---
console.log(`[Recorder Listener] Override Target: ${targetElement.tagName}, Generated selector: ${selector}`);
// Only set override if a non-empty selector was generated
if (selector) {
window._recorder_override_selector = selector;
console.log('[Recorder Listener] Override selector variable set.');
} else {
console.warn('[Recorder Listener] Could not generate a valid selector for the override click.');
}
// ---- IMPORTANT: Remove the listener AFTER processing an override click ----
// This prevents it interfering further and ensures it's gone before panel interaction waits
document.body.removeEventListener('click', clickHandler, { capture: true });
console.log('[Recorder Listener] Listener removed after processing override click.');
};
// If it WAS a panel click (isPanelClick = true), we did nothing in this handler.
// The event continues to the button's specific onclick handler.
// The listener remains attached to the body for subsequent clicks outside the panel.
};
// --- Add listener ---
// Ensure no previous listener exists before adding a new one
if (window._recorderClickListener) {
console.warn('[Recorder Listener] Removing potentially lingering listener before attaching new one.');
document.body.removeEventListener('click', window._recorderClickListener, { capture: true });
}
// Add listener in capture phase to catch clicks first
document.body.addEventListener('click', clickHandler, { capture: true });
window._recorderClickListener = clickHandler; // Store reference to remove later
}
"""
REMOVE_CLICK_LISTENER_JS = """
() => {
let removed = false;
// Remove listener
if (window._recorderClickListener) {
document.body.removeEventListener('click', window._recorderClickListener, { capture: true });
delete window._recorderClickListener;
console.log('[Recorder Listener] Listener explicitly removed.');
removed = true;
} else {
console.log('[Recorder Listener] No active listener found to remove.');
}
// Clean up global variable
if (window._recorder_override_selector !== undefined) {
delete window._recorder_override_selector;
console.log('[Recorder Listener] Override selector variable cleaned up.');
}
return removed;
}
"""
class BrowserController:
"""Handles Playwright browser automation tasks, including console message capture."""
def __init__(self, headless=True, viewport_size=None, auth_state_path: Optional[str] = None):
self.playwright: Playwright | None = None
self.browser: Browser | None = None
self.context: Optional[Any] = None # Keep context reference
self.page: Page | None = None
self.headless = headless
self.default_navigation_timeout = 9000
self.default_action_timeout = 9000
self._dom_service: Optional[DomService] = None
self.console_messages: List[Dict[str, Any]] = [] # <-- Add list to store messages
self.viewport_size = viewport_size
self.network_requests: List[Dict[str, Any]] = []
self.page_performance_timing: Optional[Dict[str, Any]] = None
self.auth_state_path = auth_state_path
self.panel = Panel(headless=headless, page=self.page)
logger.info(f"BrowserController initialized (headless={headless}).")
def _handle_response(self, response: Response):
"""Callback function to handle network responses."""
request = response.request
timing = request.timing
# Calculate duration robustly
start_time = timing.get('requestStart', -1)
end_time = timing.get('responseEnd', -1)
duration_ms = None
if start_time >= 0 and end_time >= 0 and end_time >= start_time:
duration_ms = round(end_time - start_time)
req_data = {
"url": response.url,
"method": request.method,
"status": response.status,
"status_text": response.status_text,
"start_time_ms": start_time if start_time >= 0 else None, # Use ms relative to navigationStart
"end_time_ms": end_time if end_time >= 0 else None, # Use ms relative to navigationStart
"duration_ms": duration_ms,
"resource_type": request.resource_type,
"headers": dict(response.headers), # Store response headers
"request_headers": dict(request.headers), # Store request headers
# Timing breakdown (optional, can be verbose)
# "timing_details": timing,
}
self.network_requests.append(req_data)
def _handle_request_failed(self, request: Request):
"""Callback function to handle failed network requests."""
try:
failure_text = request.failure
logger.warning(f"[NETWORK.FAILED] {request.method} {request.url} - Error: {failure_text}")
req_data = {
"url": request.url,
"method": request.method,
"status": None, # No status code available for request failure typically
"status_text": "Request Failed",
"start_time_ms": request.timing.get('requestStart', -1) if request.timing else None, # May still have start time
"end_time_ms": None, # Failed before response end
"duration_ms": None,
"resource_type": request.resource_type,
"headers": None, # No response headers
"request_headers": dict(request.headers),
"error_text": failure_text # Store the failure reason
}
self.network_requests.append(req_data)
except Exception as e:
logger.error(f"Error within _handle_request_failed for URL {request.url}: {e}", exc_info=True)
def _handle_console_message(self, message: ConsoleMessage):
"""Callback function to handle console messages."""
msg_type = message.type
msg_text = message.text
timestamp = time.time()
log_entry = {
"timestamp": timestamp,
"type": msg_type,
"text": msg_text,
# Optional: Add location if needed, but can be verbose
# "location": message.location()
}
self.console_messages.append(log_entry)
# Optional: Log immediately to agent's log file for real-time debugging
log_level = logging.WARNING if msg_type in ['error', 'warning'] else logging.DEBUG
logger.log(log_level, f"[CONSOLE.{msg_type.upper()}] {msg_text}")
def _get_random_user_agent(self):
"""Provides a random choice from a list of common user agents."""
user_agents = [
# Chrome on Windows
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
# Chrome on Mac
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
# Firefox on Windows
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0',
# Add more variations if desired (Edge, Safari etc.)
]
return random.choice(user_agents)
def _get_random_viewport(self):
"""Provides a slightly randomized common viewport size."""
common_sizes = [
# {'width': 1280, 'height': 720},
# {'width': 1366, 'height': 768},
{'width': 800, 'height': 600},
# {'width': 1536, 'height': 864},
]
base = random.choice(common_sizes)
# Add small random offset
if not self.viewport_size:
base['width'] += random.randint(-10, 10)
base['height'] += random.randint(-5, 5)
else:
base = self.viewport_size
return base
def _human_like_delay(self, min_secs: float, max_secs: float):
""" Sleeps for a random duration within the specified range. """
delay = random.uniform(min_secs, max_secs)
logger.debug(f"Applying human-like delay: {delay:.2f} seconds")
time.sleep(delay)
def _get_locator(self, selector: str):
"""
Gets a Playwright locator for the first matching element,
handling potential XPath selectors passed as CSS.
"""
if not self.page:
raise PlaywrightError("Page is not initialized.")
if not selector:
raise ValueError("Selector cannot be empty.")
# Basic check to see if it looks like XPath
# Playwright's locator handles 'xpath=...' automatically,
# but sometimes plain XPaths are passed. Let's try to detect them.
is_likely_xpath = selector.startswith(('/', '(', '.')) or \
('/' in selector and not any(c in selector for c in ['#', '.', '[', '>', '+', '~', '='])) # Avoid CSS chars
processed_selector = selector
if is_likely_xpath and not selector.startswith(('css=', 'xpath=')):
# If it looks like XPath, explicitly prefix it for Playwright's locator
logger.debug(f"Selector '{selector}' looks like XPath. Using explicit 'xpath=' prefix.")
processed_selector = f"xpath={selector}"
# If it starts with css= or xpath=, Playwright handles it.
# Otherwise, it's assumed to be a CSS selector.
try:
logger.debug(f"Attempting to create locator using: '{processed_selector}'")
# Use .first to always target a single element, consistent with other actions
locator = self.page.locator(processed_selector).first
return locator
except Exception as e:
# Catch errors during locator creation itself (e.g., invalid selector syntax)
logger.error(f"Failed to create locator for processed selector: '{processed_selector}'. Original: '{selector}'. Error: {e}")
# Re-raise using the processed selector in the message for clarity
raise PlaywrightError(f"Invalid selector syntax or error creating locator: '{processed_selector}'. Error: {e}") from e
# Recorder Methods =============
def setup_click_listener(self) -> bool:
"""Injects JS to listen for the next user click and report the selector."""
if self.headless:
logger.error("Cannot set up click listener in headless mode.")
return False
if not self.page:
logger.error("Page not initialized. Cannot set up click listener.")
return False
try:
# Inject and run the listener setup JS
# It now resets the flag internally before adding the listener
self.page.evaluate(CLICK_LISTENER_JS)
logger.info("JavaScript click listener attached (using pre-exposed callback).")
return True
except Exception as e:
logger.error(f"Failed to set up recorder click listener: {e}", exc_info=True)
return False
def remove_click_listener(self) -> bool:
"""Removes the injected JS click listener."""
if self.headless: return True # Nothing to remove
if not self.page:
logger.warning("Page not initialized. Cannot remove click listener.")
return False
try:
removed = self.page.evaluate(REMOVE_CLICK_LISTENER_JS)
return removed
except Exception as e:
logger.error(f"Failed to remove recorder click listener: {e}", exc_info=True)
return False
def wait_for_user_click_or_timeout(self, timeout_seconds: float) -> Optional[str]:
"""
Waits for the user to click (triggering the callback) or for the timeout.
Returns the selector if clicked, None otherwise.
MUST be called after setup_click_listener.
"""
if self.headless: return None
if not self.page:
logger.error("Page not initialized. Cannot wait for click function.")
return None
selector_result = None
js_condition = "() => window._recorder_override_selector !== undefined"
timeout_ms = timeout_seconds * 1000
logger.info(f"Waiting up to {timeout_seconds}s for user click (checking JS flag)...")
try:
# Wait for the JS condition to become true
self.page.wait_for_function(js_condition, timeout=timeout_ms)
# If wait_for_function completes without timeout, the flag was set
logger.info("User click detected (JS flag set)!")
# Retrieve the value set by the click handler
selector_result = self.page.evaluate("window._recorder_override_selector")
logger.debug(f"Retrieved selector from JS flag: {selector_result}")
except PlaywrightTimeoutError:
logger.info("Timeout reached waiting for user click (JS flag not set).")
selector_result = None # Timeout occurred
except Exception as e:
logger.error(f"Error during page.wait_for_function: {e}", exc_info=True)
selector_result = None # Treat other errors as timeout/failure
finally:
# Clean up the JS listener and the flag regardless of outcome
self.remove_click_listener()
return selector_result
# Highlighting elements
def highlight_element(self, selector: str, index: int, color: str = "#FF0000", text: Optional[str] = None, node_xpath: Optional[str] = None):
"""Highlights an element using a specific selector and index label."""
if self.headless or not self.page: return
try:
self.page.evaluate("""
(args) => {
const { selector, index, color, text, node_xpath } = args;
const HIGHLIGHT_CONTAINER_ID = "bw-highlight-container"; // Unique ID
let container = document.getElementById(HIGHLIGHT_CONTAINER_ID);
if (!container) {
container = document.createElement("div");
container.id = HIGHLIGHT_CONTAINER_ID;
container.style.position = "fixed";
container.style.pointerEvents = "none";
container.style.top = "0";
container.style.left = "0";
container.style.width = "0"; // Occupy no space
container.style.height = "0";
container.style.zIndex = "2147483646"; // Below listener potentially
document.body.appendChild(container);
}
let element = null;
try {
element = document.querySelector(selector);
} catch (e) {
console.warn(`[Highlighter] querySelector failed for '${selector}': ${e.message}.`);
element = null; // Ensure element is null if querySelector fails
}
// --- Fallback to XPath if CSS failed AND xpath is available ---
if (!element && node_xpath) {
console.log(`[Highlighter] Falling back to XPath: ${node_xpath}`);
try {
element = document.evaluate(
node_xpath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
).singleNodeValue;
} catch (e) {
console.error(`[Highlighter] XPath evaluation failed for '${node_xpath}': ${e.message}`);
element = null;
}
}
// ------------------------------------------------------------
if (!element) {
console.warn(`[Highlighter] Element not found using selector '${selector}' or XPath '${node_xpath}'. Cannot highlight.`);
return;
}
const rect = element.getBoundingClientRect();
if (!rect || rect.width === 0 || rect.height === 0) return; // Don't highlight non-rendered
const overlay = document.createElement("div");
overlay.style.position = "fixed";
overlay.style.border = `2px solid ${color}`;
overlay.style.backgroundColor = color + '1A'; // 10% opacity
overlay.style.pointerEvents = "none";
overlay.style.boxSizing = "border-box";
overlay.style.top = `${rect.top}px`;
overlay.style.left = `${rect.left}px`;
overlay.style.width = `${rect.width}px`;
overlay.style.height = `${rect.height}px`;
overlay.style.zIndex = "2147483646";
overlay.setAttribute('data-highlight-selector', selector); // Mark for cleanup
container.appendChild(overlay);
const label = document.createElement("div");
const labelText = text ? `${index}: ${text}` : `${index}`;
label.style.position = "fixed";
label.style.background = color;
label.style.color = "white";
label.style.padding = "1px 4px";
label.style.borderRadius = "4px";
label.style.fontSize = "10px";
label.style.fontWeight = "bold";
label.style.zIndex = "2147483647";
label.textContent = labelText;
label.setAttribute('data-highlight-selector', selector); // Mark for cleanup
// Position label top-left, slightly offset
let labelTop = rect.top - 18;
let labelLeft = rect.left;
// Adjust if label would go off-screen top
if (labelTop < 0) labelTop = rect.top + 2;
label.style.top = `${labelTop}px`;
label.style.left = `${labelLeft}px`;
container.appendChild(label);
}
""", {"selector": selector, "index": index, "color": color, "text": text, "node_xpath": node_xpath})
except Exception as e:
logger.warning(f"Failed to highlight element '{selector}': {e}")
def clear_highlights(self):
"""Removes all highlight overlays and labels added by highlight_element."""
if self.headless or not self.page: return
try:
self.page.evaluate("""
() => {
const container = document.getElementById("bw-highlight-container");
if (container) {
container.innerHTML = ''; // Clear contents efficiently
}
}
""")
# logger.debug("Cleared highlights.")
except Exception as e:
logger.warning(f"Could not clear highlights: {e}")
# Getters
def get_structured_dom(self, highlight_all_clickable_elements: bool = True, viewport_expansion: int = 0) -> Optional[DOMState]:
"""
Uses DomService to get a structured representation of the interactive DOM elements.
Args:
highlight_all_clickable_elements: Whether to visually highlight elements in the browser.
viewport_expansion: Pixel value to expand the viewport for element detection (0=viewport only, -1=all).
Returns:
A DOMState object containing the element tree and selector map, or None on error.
"""
highlight_all_clickable_elements = False # SETTING TO FALSE TO AVOID CONFUSION WITH NEXT ACTION HIGHLIGHT
if not self.page:
logger.error("Browser/Page not initialized or DomService unavailable.")
return None
if not self._dom_service:
self._dom_service = DomService(self.page)
# --- RECORDER MODE: Never highlight via JS during DOM build ---
# Highlighting is done separately by BrowserController.highlight_element
if self.headless == False: # Assume non-headless is recorder mode context
highlight_all_clickable_elements = False
# --- END RECORDER MODE ---
if not self._dom_service:
logger.error("DomService unavailable.")
return None
try:
logger.info(f"Requesting structured DOM (highlight={highlight_all_clickable_elements}, expansion={viewport_expansion})...")
start_time = time.time()
dom_state = self._dom_service.get_clickable_elements(
highlight_elements=highlight_all_clickable_elements,
focus_element=-1, # Not focusing on a specific element for now
viewport_expansion=viewport_expansion
)
end_time = time.time()
logger.info(f"Structured DOM retrieved in {end_time - start_time:.2f}s. Found {len(dom_state.selector_map)} interactive elements.")
# Generate selectors immediately for recorder use
if dom_state and dom_state.selector_map:
for node in dom_state.selector_map.values():
if not node.css_selector:
node.css_selector = self.get_selector_for_node(node)
return dom_state
except Exception as e:
logger.error(f"Error getting structured DOM: {type(e).__name__}: {e}", exc_info=True)
return None
def get_selector_for_node(self, node: DOMElementNode) -> Optional[str]:
"""Generates a robust CSS selector for a given DOMElementNode."""
if not node: return None
try:
# Use the static method from DomService
return DomService._enhanced_css_selector_for_element(node)
except Exception as e:
logger.error(f"Error generating selector for node {node.xpath}: {e}", exc_info=True)
return node.xpath # Fallback to xpath
def get_performance_timing(self) -> Optional[Dict[str, Any]]:
"""Gets the window.performance.timing object from the page."""
if not self.page:
logger.error("Cannot get performance timing, page not initialized.")
return None
try:
# Evaluate script to get the performance timing object as JSON
timing_json = self.page.evaluate("() => JSON.stringify(window.performance.timing)")
if timing_json:
self.page_performance_timing = json.loads(timing_json) # Store it
logger.debug("Retrieved window.performance.timing.")
return self.page_performance_timing
else:
logger.warning("window.performance.timing unavailable or empty.")
return None
except Exception as e:
logger.error(f"Error getting performance timing: {e}", exc_info=True)
return None
def get_current_url(self) -> str:
"""Returns the current URL of the page."""
if not self.page:
return "Error: Browser not started."
try:
return self.page.url
except Exception as e:
logger.error(f"Error getting current URL: {e}", exc_info=True)
return f"Error retrieving URL: {e}"
def get_browser_version(self) -> str:
if not self.browser:
return "Unknown"
try:
# Browser version might be available directly
return f"{self.browser.browser_type.name} {self.browser.version}"
except Exception:
logger.warning("Could not retrieve exact browser version.")
return self.browser.browser_type.name if self.browser else "Unknown"
def get_os_info(self) -> str:
try:
return f"{platform.system()} {platform.release()}"
except Exception:
logger.warning("Could not retrieve OS information.")
return "Unknown"
def get_viewport_size(self) -> Optional[Dict[str, int]]:
if not self.page:
return None
try:
return self.page.viewport_size # Returns {'width': W, 'height': H} or None
except Exception:
logger.warning("Could not retrieve viewport size.")
return None
def get_console_messages(self) -> List[Dict[str, Any]]:
"""Returns a copy of the captured console messages."""
return list(self.console_messages) # Return a copy
def clear_console_messages(self):
"""Clears the stored console messages."""
logger.debug("Clearing captured console messages.")
self.console_messages = []
def get_network_requests(self) -> List[Dict[str, Any]]:
"""Returns a copy of the captured network request data."""
return list(self.network_requests)
def clear_network_requests(self):
"""Clears the stored network request data."""
logger.debug("Clearing captured network requests.")
self.network_requests = []
def validate_assertion(self, assertion_type: str, selector: str, params: Dict[str, Any], timeout_ms: int = 3000) -> Tuple[bool, Optional[str]]:
"""
Performs a quick Playwright check to validate a proposed assertion.
Args:
assertion_type: The type of assertion (e.g., 'assert_visible').
selector: The CSS selector for the target element.
params: Dictionary of parameters for the assertion (e.g., expected_text).
timeout_ms: Short timeout for the validation check.
Returns:
Tuple (bool, Optional[str]): (True, None) if validation passes,
(False, error_message) if validation fails.
"""
if not self.page:
return False, "Page not initialized."
if not selector:
# Assertions like 'assert_llm_verification' might not have a selector
if assertion_type == 'assert_llm_verification':
logger.info("Skipping validation for 'assert_llm_verification' as it relies on external LLM check.")
return True, None
return False, "Selector is required for validation."
if not assertion_type:
return False, "Assertion type is required for validation."
logger.info(f"Validating assertion: {assertion_type} on '{selector}' with params {params} (timeout: {timeout_ms}ms)")
try:
locator = self._get_locator(selector) # Use helper to handle xpath/css
# Use Playwright's expect() for efficient checks
if assertion_type == 'assert_visible':
expect(locator).to_be_visible(timeout=timeout_ms)
elif assertion_type == 'assert_hidden':
expect(locator).to_be_hidden(timeout=timeout_ms)
elif assertion_type == 'assert_text_equals':
expected_text = params.get('expected_text')
if expected_text is None: return False, "Missing 'expected_text' parameter for assert_text_equals"
expect(locator).to_have_text(expected_text, timeout=timeout_ms)
elif assertion_type == 'assert_text_contains':
expected_text = params.get('expected_text')
if expected_text is None: return False, "Missing 'expected_text' parameter for assert_text_contains"
expect(locator).to_contain_text(expected_text, timeout=timeout_ms)
elif assertion_type == 'assert_attribute_equals':
attr_name = params.get('attribute_name')
expected_value = params.get('expected_value')
if not attr_name: return False, "Missing 'attribute_name' parameter"
# Note: Playwright's to_have_attribute handles presence and value check
expect(locator).to_have_attribute(attr_name, expected_value if expected_value is not None else "", timeout=timeout_ms) # Check empty string if value is None/missing? Or require value? Let's require non-None value.
# if expected_value is None: return False, "Missing 'expected_value' parameter" # Stricter check
# expect(locator).to_have_attribute(attr_name, expected_value, timeout=timeout_ms)
elif assertion_type == 'assert_element_count':
expected_count = params.get('expected_count')
if expected_count is None: return False, "Missing 'expected_count' parameter"
# Re-evaluate locator to get all matches for count
all_matches_locator = self.page.locator(selector)
expect(all_matches_locator).to_have_count(expected_count, timeout=timeout_ms)
elif assertion_type == 'assert_checked':
expect(locator).to_be_checked(timeout=timeout_ms)
elif assertion_type == 'assert_not_checked':
# Use expect(...).not_to_be_checked()
expect(locator).not_to_be_checked(timeout=timeout_ms)
elif assertion_type == 'assert_enabled':
expect(locator).to_be_enabled(timeout=timeout_ms)
elif assertion_type == 'assert_disabled':
expect(locator).to_be_disabled(timeout=timeout_ms)
elif assertion_type == 'assert_llm_verification':
logger.info("Skipping Playwright validation for 'assert_llm_verification'.")
# This assertion type is validated externally by the LLM during execution.
pass # Treat as passed for this quick check
else:
return False, f"Unsupported assertion type for validation: {assertion_type}"
# If no exception was raised by expect()
logger.info(f"Validation successful for {assertion_type} on '{selector}'.")
return True, None
except PlaywrightTimeoutError as e:
err_msg = f"Validation failed for {assertion_type} on '{selector}': Timeout ({timeout_ms}ms) - {str(e).splitlines()[0]}"
logger.warning(err_msg)
return False, err_msg
except AssertionError as e: # Catch expect() assertion failures
err_msg = f"Validation failed for {assertion_type} on '{selector}': Condition not met - {str(e).splitlines()[0]}"
logger.warning(err_msg)
return False, err_msg
except PlaywrightError as e:
err_msg = f"Validation failed for {assertion_type} on '{selector}': PlaywrightError - {str(e).splitlines()[0]}"
logger.warning(err_msg)
return False, err_msg
except Exception as e:
err_msg = f"Unexpected error during validation for {assertion_type} on '{selector}': {type(e).__name__} - {e}"
logger.error(err_msg, exc_info=True)
return False, err_msg
def goto(self, url: str):
"""Navigates the page to a specific URL."""
if not self.page:
raise PlaywrightError("Browser not started. Call start() first.")
try:
logger.info(f"Navigating to URL: {url}")
# Use default navigation timeout set in context
response = self.page.goto(url, wait_until='load', timeout=self.default_navigation_timeout)
# Add a small stable delay after load
time.sleep(1)
status = response.status if response else 'unknown'
# --- Capture performance timing after navigation ---
self.get_performance_timing()
logger.info(f"Navigation to {url} finished with status: {status}.")
if response and not response.ok:
logger.warning(f"Navigation to {url} resulted in non-OK status: {status}")
# Optionally raise an error here if needed
except PlaywrightTimeoutError as e:
logger.error(f"Timeout navigating to {url}: {e}")
# Re-raise with a clearer message for the agent
raise PlaywrightTimeoutError(f"Timeout loading page {url}. The page might be too slow or unresponsive.") from e
except PlaywrightError as e: # Catch broader Playwright errors
logger.error(f"Playwright error navigating to {url}: {e}")
raise PlaywrightError(f"Error navigating to {url}: {e}") from e
except Exception as e:
logger.error(f"Unexpected error navigating to {url}: {e}", exc_info=True)
raise # Re-raise for the agent to handle
def check(self, selector: str):
"""Checks a checkbox or radio button."""
if not self.page:
raise PlaywrightError("Browser not started.")
try:
logger.info(f"Attempting to check element: {selector}")
locator = self.page.locator(selector).first
# check() includes actionability checks (visible, enabled)
locator.check(timeout=self.default_action_timeout)
logger.info(f"Checked element: {selector}")
self._human_like_delay(0.2, 0.5) # Small delay after checking
except PlaywrightTimeoutError as e:
logger.error(f"Timeout ({self.default_action_timeout}ms) waiting for element '{selector}' to be actionable for check.")
# Add screenshot on failure
screenshot_path = f"output/check_timeout_{selector.replace(' ','_').replace(':','_').replace('>','_')[:30]}_{int(time.time())}.png"
self.save_screenshot(screenshot_path)
logger.error(f"Saved screenshot on check timeout to: {screenshot_path}")
raise PlaywrightTimeoutError(f"Timeout trying to check element: '{selector}'. Check visibility and enabled state. Screenshot: {screenshot_path}") from e
except PlaywrightError as e:
logger.error(f"PlaywrightError checking element '{selector}': {e}")
raise PlaywrightError(f"Failed to check element '{selector}': {e}") from e
except Exception as e:
logger.error(f"Unexpected error checking '{selector}': {e}", exc_info=True)
raise PlaywrightError(f"Unexpected error checking element '{selector}': {e}") from e
def uncheck(self, selector: str):
"""Unchecks a checkbox."""
if not self.page:
raise PlaywrightError("Browser not started.")
try:
logger.info(f"Attempting to uncheck element: {selector}")
locator = self.page.locator(selector).first
# uncheck() includes actionability checks
locator.uncheck(timeout=self.default_action_timeout)
logger.info(f"Unchecked element: {selector}")
self._human_like_delay(0.2, 0.5) # Small delay
except PlaywrightTimeoutError as e:
logger.error(f"Timeout ({self.default_action_timeout}ms) waiting for element '{selector}' to be actionable for uncheck.")
screenshot_path = f"output/uncheck_timeout_{selector.replace(' ','_').replace(':','_').replace('>','_')[:30]}_{int(time.time())}.png"
self.save_screenshot(screenshot_path)
logger.error(f"Saved screenshot on uncheck timeout to: {screenshot_path}")
raise PlaywrightTimeoutError(f"Timeout trying to uncheck element: '{selector}'. Screenshot: {screenshot_path}") from e
except PlaywrightError as e:
logger.error(f"PlaywrightError unchecking element '{selector}': {e}")
raise PlaywrightError(f"Failed to uncheck element '{selector}': {e}") from e
except Exception as e:
logger.error(f"Unexpected error unchecking '{selector}': {e}", exc_info=True)
raise PlaywrightError(f"Unexpected error unchecking element '{selector}': {e}") from e
def take_screenshot(self) -> bytes | None:
"""Takes a screenshot of the current page and returns bytes."""
if not self.page:
logger.error("Cannot take screenshot, browser not started.")
return None
try:
screenshot_bytes = self.page.screenshot()
logger.info("Screenshot taken (bytes).")
return screenshot_bytes
except Exception as e:
logger.error(f"Error taking screenshot: {e}", exc_info=True)
return None
def save_screenshot(self, file_path: str) -> bool:
"""Takes a screenshot and saves it to the specified file path."""
if not self.page:
logger.error(f"Cannot save screenshot to {file_path}, browser not started.")
return False
try:
# Ensure directory exists
abs_file_path = os.path.abspath(file_path)
os.makedirs(os.path.dirname(abs_file_path), exist_ok=True)
self.page.screenshot(path=abs_file_path)
logger.info(f"Screenshot saved to: {abs_file_path}")
return True
except Exception as e:
logger.error(f"Error saving screenshot to {file_path}: {e}", exc_info=True)
return False
def click(self, selector: str):
"""Clicks an element, relying on Playwright's built-in actionability checks."""
if not self.page:
raise PlaywrightError("Browser not started.")
try:
logger.info(f"Attempting to click element: {selector}")
locator = self.page.locator(selector).first #
logger.debug(f"Executing click on locator for '{selector}' (with built-in checks)...")
click_delay = random.uniform(50, 150)
# Optional: Try hover first
try:
locator.hover(timeout=3000) # Short timeout for hover
self._human_like_delay(0.1, 0.3)
except Exception:
logger.debug(f"Hover failed or timed out for {selector}, proceeding with click.")
# Perform the click with its own timeout
locator.click(delay=click_delay, timeout=self.default_action_timeout)
logger.info(f"Clicked element: {selector}")
self._human_like_delay(0.5, 1.5) # Post-click delay
except PlaywrightTimeoutError as e:
# Timeout occurred *during* the click action's internal waits
logger.error(f"Timeout ({self.default_action_timeout}ms) waiting for element '{selector}' to be actionable for click. Element might be obscured, disabled, unstable, or not found.")
# Add more context to the error message
screenshot_path = f"output/click_timeout_{selector.replace(' ','_').replace(':','_').replace('>','_')[:30]}_{int(time.time())}.png"
self.save_screenshot(screenshot_path)
logger.error(f"Saved screenshot on click timeout to: {screenshot_path}")
raise PlaywrightTimeoutError(f"Timeout trying to click element: '{selector}'. Check visibility, interactability, and selector correctness. Screenshot saved to {screenshot_path}") from e
except PlaywrightError as e:
# Other errors during click
logger.error(f"PlaywrightError clicking element '{selector}': {e}")
raise PlaywrightError(f"Failed to click element '{selector}': {e}") from e
except Exception as e:
logger.error(f"Unexpected error clicking '{selector}': {e}", exc_info=True)
raise PlaywrightError(f"Unexpected error clicking element '{selector}': {e}") from e
def type(self, selector: str, text: str):
"""
Inputs text into an element, prioritizing the robust `fill` method.
Includes fallback to `type`.
"""
if not self.page:
raise PlaywrightError("Browser not started.")
try:
logger.info(f"Attempting to input text '{text[:30]}...' into element: {selector}")
locator = self.page.locator(selector).first
# --- Strategy 1: Use fill() ---
# fill() clears the field first and inputs text.
# It performs actionability checks (visible, enabled, editable etc.)
logger.debug(f"Trying to 'fill' locator for '{selector}' (includes actionability checks)...")
try:
if not self.headless: time.sleep(0.2)
locator.fill(text, timeout=self.default_action_timeout) # Use default action timeout
logger.info(f"'fill' successful for element: {selector}")
self._human_like_delay(0.3, 0.8) # Delay after successful input
return # Success! Exit the method.
except (PlaywrightTimeoutError, PlaywrightError) as fill_error:
logger.warning(f"'fill' action failed for '{selector}': {fill_error}. Attempting fallback to 'type'.")
# Proceed to fallback
# --- Strategy 2: Fallback to type() ---
logger.debug(f"Trying fallback 'type' for locator '{selector}'...")
try:
# Ensure element is clear before typing as a fallback precaution
locator.clear(timeout=self.default_action_timeout * 0.5) # Quick clear attempt
self._human_like_delay(0.1, 0.3)
typing_delay_ms = random.uniform(90, 180)
locator.type(text, delay=typing_delay_ms, timeout=self.default_action_timeout)
logger.info(f"Fallback 'type' successful for element: {selector}")
self._human_like_delay(0.3, 0.8)
return # Success!
except (PlaywrightTimeoutError, PlaywrightError) as type_error:
logger.error(f"Both 'fill' and fallback 'type' failed for '{selector}'. Last error ('type'): {type_error}")
# Raise the error from the 'type' attempt as it was the last one tried
screenshot_path = f"output/type_fail_{selector.replace(' ','_').replace(':','_').replace('>','_')[:30]}_{int(time.time())}.png"
self.save_screenshot(screenshot_path)
logger.error(f"Saved screenshot on type failure to: {screenshot_path}")
# Raise a combined error or the last one
raise PlaywrightError(f"Failed to input text into element '{selector}' using both fill and type. Last error: {type_error}. Screenshot: {screenshot_path}") from type_error
# Catch errors related to finding/interacting
except PlaywrightTimeoutError as e:
# This might catch timeouts from clear() or the actionability checks within fill/type
logger.error(f"Timeout ({self.default_action_timeout}ms) during input operation stages for selector: '{selector}'. Element might not become actionable.")
screenshot_path = f"output/input_timeout_{selector.replace(' ','_').replace(':','_').replace('>','_')[:30]}_{int(time.time())}.png"
self.save_screenshot(screenshot_path)
logger.error(f"Saved screenshot on input timeout to: {screenshot_path}")
raise PlaywrightTimeoutError(f"Timeout trying to input text into element: '{selector}'. Check interactability. Screenshot: {screenshot_path}") from e
except PlaywrightError as e:
# Covers other Playwright issues like element detached during operation
logger.error(f"PlaywrightError inputting text into element '{selector}': {e}")
raise PlaywrightError(f"Failed to input text into element '{selector}': {e}") from e
except Exception as e:
logger.error(f"Unexpected error inputting text into '{selector}': {e}", exc_info=True)
raise PlaywrightError(f"Unexpected error inputting text into element '{selector}': {e}") from e
def scroll(self, direction: str):
"""Scrolls the page up or down with a slight delay."""
if not self.page:
raise PlaywrightError("Browser not started.")
try:
scroll_amount = "window.innerHeight"
if direction == "down":
self.page.evaluate(f"window.scrollBy(0, {scroll_amount})")
logger.info("Scrolled down.")
elif direction == "up":
self.page.evaluate(f"window.scrollBy(0, -{scroll_amount})")
logger.info("Scrolled up.")
else:
logger.warning(f"Invalid scroll direction: {direction}")
return # Don't delay for invalid direction
self._human_like_delay(0.4, 0.8) # Delay after scrolling
except Exception as e:
logger.error(f"Error scrolling {direction}: {e}", exc_info=True)
def press(self, selector: str, keys: str):
"""Presses key(s) on a specific element."""
if not self.page:
raise PlaywrightError("Browser not started.")
try:
logger.info(f"Attempting to press '{keys}' on element: {selector}")
locator = self._get_locator(selector)
# Ensure element is actionable first (visible, enabled) before pressing
expect(locator).to_be_enabled(timeout=self.default_action_timeout / 2) # Quick check
expect(locator).to_be_visible(timeout=self.default_action_timeout / 2)
locator.press(keys, timeout=self.default_action_timeout)
logger.info(f"Pressed '{keys}' on element: {selector}")
self._human_like_delay(0.2, 0.6) # Small delay after key press
except (PlaywrightTimeoutError, PlaywrightError, AssertionError) as e: # Catch expect failures too
error_msg = f"Timeout or error pressing '{keys}' on element '{selector}': {type(e).__name__} - {e}"
logger.error(error_msg)
screenshot_path = f"output/press_fail_{selector.replace(' ','_').replace(':','_').replace('>','_')[:30]}_{int(time.time())}.png"
self.save_screenshot(screenshot_path)
logger.error(f"Saved screenshot on press failure to: {screenshot_path}")
raise PlaywrightError(f"{error_msg}. Screenshot: {screenshot_path}") from e
except Exception as e:
logger.error(f"Unexpected error pressing '{keys}' on '{selector}': {e}", exc_info=True)
raise PlaywrightError(f"Unexpected error pressing '{keys}' on element '{selector}': {e}") from e
def drag_and_drop(self, source_selector: str, target_selector: str):
"""Drags an element defined by source_selector to an element defined by target_selector."""
if not self.page:
raise PlaywrightError("Browser not started.")
try:
logger.info(f"Attempting to drag '{source_selector}' to '{target_selector}'")
source_locator = self._get_locator(source_selector)
target_locator = self._get_locator(target_selector)
# Optional: Check visibility/existence before drag attempt
expect(source_locator).to_be_visible(timeout=self.default_action_timeout / 2)
expect(target_locator).to_be_visible(timeout=self.default_action_timeout / 2)
# Perform drag_to with default timeout
source_locator.drag_to(target_locator, timeout=self.default_action_timeout)
logger.info(f"Successfully dragged '{source_selector}' to '{target_selector}'")
self._human_like_delay(0.5, 1.2) # Delay after drag/drop
except (PlaywrightTimeoutError, PlaywrightError, AssertionError) as e:
error_msg = f"Timeout or error dragging '{source_selector}' to '{target_selector}': {type(e).__name__} - {e}"
logger.error(error_msg)
screenshot_path = f"output/drag_fail_{source_selector.replace(' ','_')[:20]}_{target_selector.replace(' ','_')[:20]}_{int(time.time())}.png"
self.save_screenshot(screenshot_path)
logger.error(f"Saved screenshot on drag failure to: {screenshot_path}")
raise PlaywrightError(f"{error_msg}. Screenshot: {screenshot_path}") from e
except Exception as e:
logger.error(f"Unexpected error dragging '{source_selector}' to '{target_selector}': {e}", exc_info=True)
raise PlaywrightError(f"Unexpected error dragging '{source_selector}' to '{target_selector}': {e}") from e
def wait(self,
timeout_seconds: Optional[float] = None,
selector: Optional[str] = None,
state: Optional[str] = None, # 'visible', 'hidden', 'enabled', 'disabled', 'attached', 'detached'
url: Optional[str] = None, # String, regex, or function
):
"""Performs various types of waits based on provided parameters."""
if not self.page:
raise PlaywrightError("Browser not started.")
try:
if timeout_seconds is not None and selector is None and state is None and url is None:
# Simple time wait
logger.info(f"Waiting for {timeout_seconds:.2f} seconds...")
self.page.wait_for_timeout(timeout_seconds * 1000)
logger.info(f"Wait finished after {timeout_seconds:.2f} seconds.")
elif selector and state:
# Wait for element state
wait_timeout = self.default_action_timeout # Use default action timeout for element waits
logger.info(f"Waiting for element '{selector}' to be '{state}' (max {wait_timeout}ms)...")
locator = self._get_locator(selector) # Handles potential errors
locator.wait_for(state=state, timeout=wait_timeout)
logger.info(f"Wait finished: Element '{selector}' is now '{state}'.")
elif url:
# Wait for URL
wait_timeout = self.default_navigation_timeout # Use navigation timeout for URL waits
logger.info(f"Waiting for URL matching '{url}' (max {wait_timeout}ms)...")
self.page.wait_for_url(url, timeout=wait_timeout)
logger.info(f"Wait finished: URL now matches '{url}'.")
else:
logger.info(f"Waiting for 5 seconds...")
self.page.wait_for_timeout(5 * 1000)
logger.info(f"Wait finished after {5:.2f} seconds.")
# Optional small delay after successful condition wait
if selector or url:
self._human_like_delay(0.1, 0.3)
return {"success": True, "message": "Wait condition met successfully."}
except PlaywrightTimeoutError as e:
error_msg = f"Timeout waiting for condition: {e}"
logger.error(error_msg)
# Don't save screenshot for wait timeouts usually, unless specifically needed
return {"success": False, "message": error_msg}
except (PlaywrightError, ValueError) as e:
error_msg = f"Error during wait: {type(e).__name__}: {e}"
logger.error(error_msg)
return {"success": False, "message": error_msg}
except Exception as e:
error_msg = f"Unexpected error during wait: {e}"
logger.error(error_msg, exc_info=True)
return {"success": False, "message": error_msg}
def start(self):
"""Starts Playwright, launches browser, creates context/page, and attaches console listener."""
try:
logger.info("Starting Playwright...")
self.playwright = sync_playwright().start()
# Consider adding args for anti-detection if needed:
browser_args = ['--disable-blink-features=AutomationControlled']
self.browser = self.playwright.chromium.launch(headless=self.headless, args=browser_args)
# self.browser = self.playwright.chromium.launch(headless=self.headless)
context_options = self.browser.new_context(
user_agent=self._get_random_user_agent(),
viewport=self._get_random_viewport(),
ignore_https_errors=True,
java_script_enabled=True,
extra_http_headers=COMMON_HEADERS,
)
context_options = {
"user_agent": self._get_random_user_agent(),
"viewport": self._get_random_viewport(),
"ignore_https_errors": True,
"java_script_enabled": True,
"extra_http_headers": COMMON_HEADERS,
}
loaded_state = False
if self.auth_state_path and os.path.exists(self.auth_state_path):
try:
logger.info(f"Attempting to load authentication state from: {self.auth_state_path}")
context_options["storage_state"] = self.auth_state_path
loaded_state = True
except Exception as e:
logger.error(f"Failed to load storage state from '{self.auth_state_path}': {e}. Proceeding without saved state.", exc_info=True)
# Remove the invalid option if loading failed
if "storage_state" in context_options:
del context_options["storage_state"]
elif self.auth_state_path:
logger.warning(f"Authentication state file not found at '{self.auth_state_path}'. Proceeding without saved state. Run generation script if needed.")
else:
logger.info("No authentication state path provided. Proceeding without saved state.")
self.context = self.browser.new_context(**context_options)
self.context.set_default_navigation_timeout(self.default_navigation_timeout)
self.context.set_default_timeout(self.default_action_timeout)
self.context.add_init_script(HIDE_WEBDRIVER_SCRIPT)
self.page = self.context.new_page()
# Initialize DomService with the created page
self._dom_service = DomService(self.page) # Instantiate here
# --- Attach Console Listener ---
self.page.on('console', self._handle_console_message)
logger.info("Attached console message listener.")
self.page.on('response', self._handle_response) # <<< Attach network listener
logger.info("Attached network response listener.")
self.page.on('requestfailed', self._handle_request_failed)
logger.info("Attached network failed listener.")
self.panel.inject_recorder_ui_scripts() # inject recorder ui
# -----------------------------
logger.info("Browser context and page created.")
except Exception as e:
logger.error(f"Failed to start Playwright or launch browser: {e}", exc_info=True)
self.close() # Ensure cleanup on failure
raise
def close(self):
"""Closes the browser and stops Playwright."""
self.panel.remove_recorder_panel()
self.remove_click_listener()
try:
if self.page and not self.page.is_closed():
try:
self.page.remove_listener('response', self._handle_response) # <<< Remove network listener
logger.debug("Removed network response listener.")
except Exception as e: logger.warning(f"Could not remove response listener: {e}")
try:
self.page.remove_listener('console', self._handle_console_message)
logger.debug("Removed console message listener.")
except Exception as e: logger.warning(f"Could not remove console listener: {e}")
try:
self.page.remove_listener('requestfailed', self._handle_request_failed) # <<< Remove requestfailed listener
logger.debug("Removed network requestfailed listener.")
except Exception as e: logger.warning(f"Could not remove requestfailed listener: {e}")
self._dom_service = None
if self.page and not self.page.is_closed():
# logger.debug("Closing page...") # Added for clarity
self.page.close()
# logger.debug("Page closed.")
else:
logger.debug("Page already closed or not initialized.")
if self.context:
self.context.close()
logger.info("Browser context closed.")
if self.browser:
self.browser.close()
logger.info("Browser closed.")
if self.playwright:
self.playwright.stop()
logger.info("Playwright stopped.")
except Exception as e:
logger.error(f"Error during browser/Playwright cleanup: {e}", exc_info=True)
finally:
self.page = None
self.context = None
self.browser = None
self.playwright = None
self.console_messages = [] # Clear messages on final close
self.network_requests = [] # Clear network data on final close
self._recorder_ui_injected = False
```
--------------------------------------------------------------------------------
/src/execution/executor.py:
--------------------------------------------------------------------------------
```python
# /src/executor.py
import json
import logging
import time
import os
from patchright.sync_api import sync_playwright, Page, TimeoutError as PlaywrightTimeoutError, Error as PlaywrightError, expect
from typing import Optional, Dict, Any, Tuple, List
from pydantic import BaseModel, Field
import re
from PIL import Image
from pixelmatch.contrib.PIL import pixelmatch
import io
from ..browser.browser_controller import BrowserController # Re-use for browser setup/teardown
from ..llm.llm_client import LLMClient
from ..agents.recorder_agent import WebAgent
from ..utils.image_utils import compare_images
# Define a short timeout specifically for selector validation during healing
HEALING_SELECTOR_VALIDATION_TIMEOUT_MS = 2000
class HealingSelectorSuggestion(BaseModel):
"""Schema for the LLM's suggested replacement selector during healing."""
new_selector: Optional[str] = Field(None, description="The best suggested alternative CSS selector based on visual and DOM context, or null if no suitable alternative is found.")
reasoning: str = Field(..., description="Explanation for the suggested selector choice or the reason why healing could not determine a better selector.")
logger = logging.getLogger(__name__)
class TestExecutor:
"""
Executes a recorded test case from a JSON file deterministically using Playwright.
"""
def __init__(self,
llm_client: Optional[LLMClient],
headless: bool = True,
default_timeout: int = 5000, # Default timeout for actions/assertions
enable_healing: bool = False, # Flag for healing
healing_mode: str = 'soft', # Healing mode ('soft' or 'hard')
healing_retries: int = 1, # Max soft healing attempts per step
baseline_dir: str = "./visual_baselines", # Add baseline dir
pixel_threshold: float = 0.01, # Default 1% pixel difference threshold
get_performance: bool = False,
get_network_requests: bool = False
):
self.headless = headless
self.default_timeout = default_timeout # Milliseconds
self.llm_client = llm_client
self.browser_controller: Optional[BrowserController] = None
self.page: Optional[Page] = None
self.enable_healing = enable_healing
self.healing_mode = healing_mode
self.healing_retries_per_step = healing_retries
self.healing_attempts_log: List[Dict] = [] # To store healing attempts info
self.get_performance = get_performance
self.get_network_requests = get_network_requests
logger.info(f"TestExecutor initialized (headless={headless}, timeout={default_timeout}ms).")
log_message = ""
if self.enable_healing:
log_message += f" with Healing ENABLED (mode={self.healing_mode}, retries={self.healing_retries_per_step})"
if not self.llm_client:
logger.warning("Self-healing enabled, but LLMClient not provided. Healing will not function.")
else:
log_message += f" using LLM provider '{self.llm_client.provider}'."
else:
log_message += "."
logger.info(log_message)
if not self.llm_client and not headless: # Vision verification needs LLM
logger.warning("TestExecutor initialized without LLMClient. Vision-based assertions ('assert_passed_verification') will fail.")
elif self.llm_client:
logger.info(f"TestExecutor initialized (headless={headless}, timeout={default_timeout}ms) with LLMClient for provider '{self.llm_client.provider}'.")
else:
logger.info(f"TestExecutor initialized (headless={headless}, timeout={default_timeout}ms). LLMClient not provided (headless mode or vision assertions not needed).")
self.baseline_dir = os.path.abspath(baseline_dir)
self.pixel_threshold = pixel_threshold # Store threshold
logger.info(f"TestExecutor initialized (visual baseline dir: {self.baseline_dir}, pixel threshold: {self.pixel_threshold*100:.2f}%)")
os.makedirs(self.baseline_dir, exist_ok=True) # Ensure baseline dir exists
def _get_locator(self, selector: str):
"""Helper to get a Playwright locator, handling potential errors."""
if not self.page:
raise PlaywrightError("Page is not initialized.")
if not selector:
raise ValueError("Selector cannot be empty.")
is_likely_xpath = selector.startswith(('/', '(', '//')) or \
('/' in selector and not any(c in selector for c in ['#', '.', '[', '>', '+', '~']))
# If it looks like XPath but doesn't have a prefix, add 'css='
# Playwright's locator treats "css=<xpath>" as an XPath selector.
processed_selector = selector
if is_likely_xpath and not selector.startswith(('css=', 'xpath=')):
logger.warning(f"Selector '{selector}' looks like XPath but lacks prefix. Assuming XPath and adding 'css=' prefix.")
processed_selector = f"xpath={selector}"
try:
logger.debug(f"Attempting to locate using: '{processed_selector}'")
return self.page.locator(processed_selector).first
except Exception as e:
# Catch errors during locator creation itself (e.g., invalid selector syntax)
logger.error(f"Failed to create locator for processed selector: '{processed_selector}'. Original: '{selector}'. Error: {e}")
# Re-raise using the processed selector in the message for clarity
raise PlaywrightError(f"Invalid selector syntax or error creating locator: '{processed_selector}'. Error: {e}") from e
def _load_baseline(self, baseline_id: str) -> Tuple[Optional[Image.Image], Optional[Dict]]:
"""Loads the baseline image and metadata."""
metadata_path = os.path.join(self.baseline_dir, f"{baseline_id}.json")
image_path = os.path.join(self.baseline_dir, f"{baseline_id}.png") # Assume PNG
if not os.path.exists(metadata_path) or not os.path.exists(image_path):
logger.error(f"Baseline files not found for ID '{baseline_id}' in {self.baseline_dir}")
return None, None
try:
with open(metadata_path, 'r', encoding='utf-8') as f:
metadata = json.load(f)
baseline_img = Image.open(image_path).convert("RGBA") # Load and ensure RGBA
logger.info(f"Loaded baseline '{baseline_id}' (Image: {image_path}, Metadata: {metadata_path})")
return baseline_img, metadata
except Exception as e:
logger.error(f"Error loading baseline files for ID '{baseline_id}': {e}", exc_info=True)
return None, None
def _attempt_soft_healing(
self,
failed_step: Dict[str, Any],
failed_selector: Optional[str],
error_message: str
) -> Tuple[bool, Optional[str], str]:
"""
Attempts to find a new selector using the LLM based on the failed step's context and validate it.
Returns:
Tuple[bool, Optional[str], str]: (healing_success, new_selector, reasoning)
"""
if not self.llm_client:
logger.error("Soft Healing: LLMClient not available.")
return False, None, "LLMClient not configured for healing."
if not self.browser_controller or not self.page:
logger.error("Soft Healing: BrowserController or Page not available.")
return False, None, "Browser state unavailable for healing."
logger.info(f"Soft Healing: Gathering context for step {failed_step.get('step_id')}")
try:
current_url = self.browser_controller.get_current_url()
screenshot_bytes = self.browser_controller.take_screenshot()
dom_state = self.browser_controller.get_structured_dom(highlight_all_clickable_elements=False, viewport_expansion=-1)
dom_context_str = "DOM context could not be retrieved."
if dom_state and dom_state.element_tree:
dom_context_str, _ = dom_state.element_tree.generate_llm_context_string(context_purpose='verification')
else:
logger.warning("Soft Healing: Failed to get valid DOM state.")
if not screenshot_bytes:
logger.error("Soft Healing: Failed to capture screenshot.")
return False, None, "Failed to capture screenshot for context."
except Exception as e:
logger.error(f"Soft Healing: Error gathering context: {e}", exc_info=True)
return False, None, f"Error gathering context: {e}"
# Construct the prompt
prompt = f"""You are an AI Test Self-Healing Assistant. A step in an automated test failed, likely due to an incorrect or outdated CSS selector. Your goal is to analyze the current page state and suggest a more robust replacement selector for the intended element.
**Failed Test Step Information:**
- Step Description: "{failed_step.get('description', 'N/A')}"
- Original Action: "{failed_step.get('action', 'N/A')}"
- Failed Selector: `{failed_selector or 'N/A'}`
- Error Message: "{error_message}"
**Current Page State:**
- URL: {current_url}
- Attached Screenshot: Analyze the visual layout to identify the target element corresponding to the step description.
- HTML Context (Visible elements, interactive `[index]`, static `(Static)`):
```html
{dom_context_str}
```
**Your Task:**
1. Based on the step description, the original action, the visual screenshot, AND the HTML context, identify the element the test likely intended to interact with.
2. Suggest a **single, robust CSS selector** for this element using **NATIVE attributes** (like `id`, `name`, `data-testid`, `data-cy`, `aria-label`, `placeholder`, unique visible text combined with tag, stable class combinations).
3. **CRITICAL: Do NOT suggest selectors based on `data-ai-id` or unstable attributes (e.g., dynamic classes, complex positional selectors like :nth-child unless absolutely necessary and combined with other stable attributes).**
4. Prioritize standard, semantic, and test-specific attributes (`id`, `data-testid`, `name`).
5. If you cannot confidently identify the intended element or find a robust selector, return `null` for `new_selector`.
**Output Format:** Respond ONLY with a JSON object matching the following schema:
```json
{{
"new_selector": "YOUR_SUGGESTED_CSS_SELECTOR_OR_NULL",
"reasoning": "Explain your choice of selector, referencing visual cues, HTML attributes, and the original step description. If returning null, explain why."
}}
```
"""
try:
logger.info("Soft Healing: Requesting selector suggestion from LLM...")
response_obj = self.llm_client.generate_json(
HealingSelectorSuggestion,
prompt,
image_bytes=screenshot_bytes
)
if isinstance(response_obj, HealingSelectorSuggestion):
if response_obj.new_selector:
suggested_selector = response_obj.new_selector
logger.info(f"Soft Healing: LLM suggested new selector: '{response_obj.new_selector}'. Reasoning: {response_obj.reasoning}")
logger.info(f"Soft Healing: Validating suggested selector '{suggested_selector}'...")
validation_passed = False
validation_reasoning_suffix = ""
try:
# Use page.locator() with a short timeout for existence check
count = self.page.locator(suggested_selector).count()
if count > 0:
validation_passed = True
logger.info(f"Soft Healing: Validation PASSED. Selector '{suggested_selector}' found {count} element(s).")
if count > 1:
logger.warning(f"Soft Healing: Suggested selector '{suggested_selector}' found {count} elements (expected 1). Will target the first.")
else: # count == 0
logger.warning(f"Soft Healing: Validation FAILED. Selector '{suggested_selector}' found 0 elements within {HEALING_SELECTOR_VALIDATION_TIMEOUT_MS}ms.")
validation_reasoning_suffix = " [Validation Failed: Selector found 0 elements]"
except PlaywrightTimeoutError:
logger.warning(f"Soft Healing: Validation TIMEOUT ({HEALING_SELECTOR_VALIDATION_TIMEOUT_MS}ms) checking selector '{suggested_selector}'.")
validation_reasoning_suffix = f" [Validation Failed: Timeout after {HEALING_SELECTOR_VALIDATION_TIMEOUT_MS}ms]"
except PlaywrightError as e: # Catch invalid selector syntax errors
logger.warning(f"Soft Healing: Validation FAILED. Invalid selector syntax for '{suggested_selector}'. Error: {e}")
validation_reasoning_suffix = f" [Validation Failed: Invalid selector syntax - {e}]"
except Exception as e:
logger.error(f"Soft Healing: Unexpected error during selector validation for '{suggested_selector}': {e}", exc_info=True)
validation_reasoning_suffix = f" [Validation Error: {type(e).__name__}]"
# --- End Validation Step ---
# Return success only if validation passed
if validation_passed:
return True, suggested_selector, response_obj.reasoning
else:
# Update reasoning with validation failure details
return False, None, response_obj.reasoning + validation_reasoning_suffix
else:
logger.warning(f"Soft Healing: LLM could not suggest a new selector. Reasoning: {response_obj.reasoning}")
return False, None, response_obj.reasoning
elif isinstance(response_obj, str): # LLM returned an error string
logger.error(f"Soft Healing: LLM returned an error: {response_obj}")
return False, None, f"LLM Error: {response_obj}"
else: # Unexpected response type
logger.error(f"Soft Healing: Unexpected response type from LLM: {type(response_obj)}")
return False, None, f"Unexpected LLM response type: {type(response_obj)}"
except Exception as llm_e:
logger.error(f"Soft Healing: Error during LLM communication: {llm_e}", exc_info=True)
return False, None, f"LLM communication error: {llm_e}"
def _trigger_hard_healing(self, feature_description: str, original_file_path: str) -> None:
"""
Closes the current browser and triggers the WebAgent to re-record the test.
"""
logger.warning("--- Triggering Hard Healing (Re-Recording) ---")
if not feature_description:
logger.error("Hard Healing: Cannot re-record without the original feature description.")
return
if not self.llm_client:
logger.error("Hard Healing: Cannot re-record without an LLMClient.")
return
# 1. Close current browser
try:
if self.browser_controller:
self.browser_controller.close()
self.browser_controller = None
self.page = None
logger.info("Hard Healing: Closed executor browser.")
except Exception as close_err:
logger.error(f"Hard Healing: Error closing executor browser: {close_err}")
# Continue anyway, try to re-record
# 2. Instantiate Recorder Agent
# NOTE: Assume re-recording is automated. Add flag if interactive needed.
try:
logger.info("Hard Healing: Initializing WebAgent for automated re-recording...")
# Use the existing LLM client
recorder_agent = WebAgent(
llm_client=self.llm_client,
headless=False, # Re-recording needs visible browser initially
is_recorder_mode=True,
automated_mode=True, # Run re-recording automatically
# Pass original filename stem to maybe overwrite or create variant
filename=os.path.splitext(os.path.basename(original_file_path))[0] + "_healed_"
)
# 3. Run Recorder
logger.info(f"Hard Healing: Starting re-recording for feature: '{feature_description}'")
recording_result = recorder_agent.record(feature_description)
# 4. Log Outcome
if recording_result.get("success"):
logger.info(f"✅ Hard Healing: Re-recording successful. New test file saved to: {recording_result.get('output_file')}")
else:
logger.error(f"❌ Hard Healing: Re-recording FAILED. Message: {recording_result.get('message')}")
except Exception as record_err:
logger.critical(f"❌ Hard Healing: Critical error during re-recording setup or execution: {record_err}", exc_info=True)
def run_test(self, json_file_path: str) -> Dict[str, Any]:
"""Loads and executes the test steps from the JSON file."""
start_time = time.time()
self.healing_attempts_log = [] # Reset log for this run
any_step_successfully_healed = False
run_status = {
"test_file": json_file_path,
"status": "FAIL", # Default to fail
"message": "Execution initiated.",
"steps_executed": 0,
"failed_step": None,
"error_details": None,
"screenshot_on_failure": None,
"console_messages_on_failure": [],
"all_console_messages": [],
"performance_timing": None,
"network_requests": [],
"duration_seconds": 0.0,
"healing_enabled": self.enable_healing,
"healing_mode": self.healing_mode if self.enable_healing else "disabled",
"healing_attempts": self.healing_attempts_log, # Reference the list
"healed_file_saved": False,
"healed_steps_count": 0,
"visual_assertion_results": []
}
try:
# --- Load Test Data ---
logger.info(f"Loading test case from: {json_file_path}")
if not os.path.exists(json_file_path):
raise FileNotFoundError(f"Test file not found: {json_file_path}")
with open(json_file_path, 'r', encoding='utf-8') as f:
test_data = json.load(f)
modified_test_data = test_data.copy()
steps = modified_test_data.get("steps", [])
viewport = next((json.load(open(os.path.join(self.baseline_dir, f"{step.get('parameters', {}).get('baseline_id')}.json"))).get("viewport_size") for step in steps if step.get("action") == "assert_visual_match" and step.get('parameters', {}).get('baseline_id') and os.path.exists(os.path.join(self.baseline_dir, f"{step.get('parameters', {}).get('baseline_id')}.json"))), None)
test_name = modified_test_data.get("test_name", "Unnamed Test")
feature_description = modified_test_data.get("feature_description", "")
first_navigation_done = False
run_status["test_name"] = test_name
logger.info(f"Executing test: '{test_name}' with {len(steps)} steps.")
if not steps:
raise ValueError("No steps found in the test file.")
# --- Setup Browser ---
self.browser_controller = BrowserController(headless=self.headless, viewport_size=viewport)
# Set default timeout before starting the page
self.browser_controller.default_action_timeout = self.default_timeout
self.browser_controller.default_navigation_timeout = max(self.default_timeout, 30000) # Ensure navigation timeout is reasonable
self.browser_controller.start()
self.page = self.browser_controller.page
if not self.page:
raise PlaywrightError("Failed to initialize browser page.")
# Re-apply default timeout to the page context AFTER it's created
self.page.set_default_timeout(self.default_timeout)
logger.info(f"Browser page initialized with default action timeout: {self.default_timeout}ms")
self.browser_controller.clear_console_messages()
self.browser_controller.clear_network_requests()
# --- Execute Steps ---
for i, step in enumerate(steps):
step_id = step.get("step_id", i + 1)
action = step.get("action")
original_selector = step.get("selector")
params = step.get("parameters", {})
description = step.get("description", f"Step {step_id}")
wait_after = step.get("wait_after_secs", 0) # Get wait time
run_status["steps_executed"] = i + 1 # Track steps attempted
logger.info(f"--- Executing Step {step_id}: {action} - {description} ---")
if original_selector: logger.info(f"Original Selector: {original_selector}")
if params: logger.info(f"Parameters: {params}")
# --- Healing Loop ---
step_healed = False
current_healing_attempts = 0
current_selector = original_selector # Start with the recorded selector
last_error = None # Store the last error encountered
successful_healed_selector_for_step = None
run_status["visual_assertion_results"] = []
while not step_healed and current_healing_attempts <= self.healing_retries_per_step:
try:
if action == "navigate":
url = params.get("url")
if not url: raise ValueError("Missing 'url' parameter for navigate.")
self.browser_controller.goto(url)# Uses default navigation timeout from context
if not first_navigation_done:
if self.get_performance:
run_status["performance_timing"] = self.browser_controller.page_performance_timing
first_navigation_done = True
elif action == "click":
if not current_selector: raise ValueError("Missing 'current_selector' for click.")
locator = self._get_locator(current_selector)
locator.click(timeout=self.default_timeout) # Explicit timeout for action
elif action == "type":
text = params.get("text")
if not current_selector: raise ValueError("Missing 'current_selector' for type.")
if text is None: raise ValueError("Missing 'text' parameter for type.")
locator = self._get_locator(current_selector)
locator.fill(text, timeout=self.default_timeout) # Use fill for robustness
elif action == "scroll": # Less common, but support if recorded
direction = params.get("direction")
if direction not in ["up", "down"]: raise ValueError("Invalid 'direction'.")
amount = "window.innerHeight" if direction=="down" else "-window.innerHeight"
self.page.evaluate(f"window.scrollBy(0, {amount})")
elif action == "check":
if not current_selector: raise ValueError("Missing 'current_selector' for check action.")
# Use the browser_controller method which handles locator/timeout
self.browser_controller.check(current_selector)
elif action == "uncheck":
if not current_selector: raise ValueError("Missing 'current_selector' for uncheck action.")
# Use the browser_controller method
self.browser_controller.uncheck(current_selector)
elif action == "select":
option_label = params.get("option_label")
option_value = params.get("option_value") # Support value too if recorded
option_index_str = params.get("option_index") # Support index if recorded
option_param = None
param_type = None
if option_label is not None:
option_param = {"label": option_label}
param_type = f"label '{option_label}'"
elif option_value is not None:
option_param = {"value": option_value}
param_type = f"value '{option_value}'"
elif option_index_str is not None and option_index_str.isdigit():
option_param = {"index": int(option_index_str)}
param_type = f"index {option_index_str}"
else:
raise ValueError("Missing 'option_label', 'option_value', or 'option_index' parameter for select action.")
if not current_selector: raise ValueError("Missing 'current_selector' for select action.")
logger.info(f"Selecting option by {param_type} in element: {current_selector}")
locator = self._get_locator(current_selector)
locator.select_option(**option_param, timeout=self.default_timeout)
elif action == "wait": # Generic wait action
timeout_s = params.get("timeout_seconds")
target_url = params.get("url")
element_state = params.get("state") # e.g., 'visible', 'hidden'
wait_selector = current_selector # Use current (potentially healed) selector if waiting for element
if timeout_s is not None and not target_url and not element_state:
# Simple time wait
logger.info(f"Waiting for {timeout_s} seconds...")
self.page.wait_for_timeout(timeout_s * 1000)
elif wait_selector and element_state:
# Wait for element state
logger.info(f"Waiting for element '{wait_selector}' to be '{element_state}' (max {self.default_timeout}ms)...")
locator = self._get_locator(wait_selector)
locator.wait_for(state=element_state, timeout=self.default_timeout)
elif target_url:
# Wait for URL
logger.info(f"Waiting for URL matching '{target_url}' (max {self.browser_controller.default_navigation_timeout}ms)...")
self.page.wait_for_url(target_url, timeout=self.browser_controller.default_navigation_timeout)
else:
raise ValueError("Invalid parameters for 'wait' action. Need timeout_seconds OR (selector and state) OR url.")
elif action == "wait_for_load_state":
state = params.get("state", "load")
self.page.wait_for_load_state(state, timeout=self.browser_controller.default_navigation_timeout) # Use navigation timeout
elif action == "wait_for_selector": # Explicit wait
wait_state = params.get("state", "visible")
timeout = params.get("timeout_ms", self.default_timeout)
if not current_selector: raise ValueError("Missing 'current_selector' for wait_for_selector.")
locator = self._get_locator(current_selector)
locator.wait_for(state=wait_state, timeout=timeout)
elif action == "key_press":
keys = params.get("keys")
if not current_selector: raise ValueError("Missing 'selector' for key_press.")
if not keys: raise ValueError("Missing 'keys' parameter for key_press.")
# Use controller method or locator directly
locator = self._get_locator(current_selector)
locator.press(keys, timeout=self.default_timeout)
# self.browser_controller.press(current_selector, keys) # Alt: if using controller method
elif action == "drag_and_drop":
target_selector = params.get("target_selector")
source_selector = current_selector # Source is in the main 'selector' field
if not source_selector: raise ValueError("Missing source 'selector' for drag_and_drop.")
if not target_selector: raise ValueError("Missing 'target_selector' in parameters for drag_and_drop.")
# Use controller method or locators directly
source_locator = self._get_locator(source_selector)
target_locator = self._get_locator(target_selector)
source_locator.drag_to(target_locator, timeout=self.default_timeout)
# self.browser_controller.drag_and_drop(source_selector, target_selector) # Alt: if using controller
# --- Assertions ---
elif action == "assert_text_contains":
expected_text = params.get("expected_text")
if not current_selector: raise ValueError("Missing 'current_selector' for assertion.")
if expected_text is None: raise ValueError("Missing 'expected_text'.")
locator = self._get_locator(current_selector)
expect(locator).to_contain_text(expected_text, timeout=self.default_timeout)
elif action == "assert_text_equals":
expected_text = params.get("expected_text")
if not current_selector: raise ValueError("Missing 'current_selector' for assertion.")
if expected_text is None: raise ValueError("Missing 'expected_text'.")
locator = self._get_locator(current_selector)
expect(locator).to_have_text(expected_text, timeout=self.default_timeout)
elif action == "assert_visible":
if not current_selector: raise ValueError("Missing 'current_selector' for assertion.")
locator = self._get_locator(current_selector)
expect(locator).to_be_visible(timeout=self.default_timeout)
elif action == "assert_hidden":
if not current_selector: raise ValueError("Missing 'current_selector' for assertion.")
locator = self._get_locator(current_selector)
expect(locator).to_be_hidden(timeout=self.default_timeout)
elif action == "assert_attribute_equals":
attr_name = params.get("attribute_name")
expected_value = params.get("expected_value")
if not current_selector: raise ValueError("Missing 'current_selector' for assertion.")
if not attr_name: raise ValueError("Missing 'attribute_name'.")
if expected_value is None: raise ValueError("Missing 'expected_value'.")
locator = self._get_locator(current_selector)
expect(locator).to_have_attribute(attr_name, expected_value, timeout=self.default_timeout)
elif action == "assert_element_count":
expected_count = params.get("expected_count")
if not current_selector: raise ValueError("Missing 'current_selector' for assertion.")
if expected_count is None: raise ValueError("Missing 'expected_count'.")
if not isinstance(expected_count, int): raise ValueError("'expected_count' must be an integer.") # Add type check
# --- FIX: Get locator for count without using .first ---
# Apply the same current_selector processing as in _get_locator if needed
is_likely_xpath = current_selector.startswith(('/', '(', '//')) or \
('/' in current_selector and not any(c in current_selector for c in ['#', '.', '[', '>', '+', '~']))
processed_selector = current_selector
if is_likely_xpath and not current_selector.startswith(('css=', 'xpath=')):
processed_selector = f"xpath={current_selector}"
# Get the locator for potentially MULTIPLE elements
count_locator = self.page.locator(processed_selector)
# --- End FIX ---
logger.info(f"Asserting count of elements matching '{processed_selector}' to be {expected_count}")
expect(count_locator).to_have_count(expected_count, timeout=self.default_timeout)
elif action == "assert_checked":
if not current_selector: raise ValueError("Missing 'current_selector' for assert_checked.")
locator = self._get_locator(current_selector)
# Use Playwright's dedicated assertion for checked state
expect(locator).to_be_checked(timeout=self.default_timeout)
elif action == "assert_not_checked":
if not current_selector: raise ValueError("Missing 'current_selector' for assert_not_checked.")
locator = self._get_locator(current_selector)
# Use .not modifier with the checked assertion
expect(locator).not_to_be_checked(timeout=self.default_timeout)
elif action == "assert_disabled":
if not current_selector: raise ValueError("Missing 'current_selector' for assert_disabled.")
locator = self._get_locator(current_selector)
# Use Playwright's dedicated assertion for disabled state
expect(locator).to_be_disabled(timeout=self.default_timeout)
elif action == "assert_enabled":
if not current_selector: raise ValueError("Missing 'current_selector' for assert_enabled.")
locator = self._get_locator(current_selector)
expect(locator).to_be_enabled(timeout=self.default_timeout)
elif action == "task_replanned":
pass
elif action == "assert_visual_match":
baseline_id = params.get("baseline_id")
element_selector = step.get("selector") # Use step's selector if available
use_llm = params.get("use_llm_fallback", True)
# Allow overriding threshold per step
step_threshold = params.get("pixel_threshold", self.pixel_threshold)
if not baseline_id:
raise ValueError("Missing 'baseline_id' parameter for assert_visual_match.")
logger.info(f"--- Performing Visual Assertion: '{baseline_id}' (Selector: {element_selector}, Threshold: {step_threshold*100:.2f}%, LLM: {use_llm}) ---")
# 1. Load Baseline
baseline_img, baseline_meta = self._load_baseline(baseline_id)
if not baseline_img or not baseline_meta:
raise FileNotFoundError(f"Baseline '{baseline_id}' not found or failed to load.")
# 2. Capture Current State
current_screenshot_bytes = None
if element_selector:
current_screenshot_bytes = self.browser_controller.take_screenshot_element(element_selector)
else:
current_screenshot_bytes = self.browser_controller.take_screenshot() # Full page
if not current_screenshot_bytes:
raise PlaywrightError("Failed to capture current screenshot for visual comparison.")
try:
# Create a BytesIO buffer to treat the bytes like a file
buffer = io.BytesIO(current_screenshot_bytes)
# Open the image from the buffer using Pillow
img = Image.open(buffer)
# Ensure the image is in RGBA format for consistency,
# especially important for pixel comparisons that might expect an alpha channel.
logger.info("received")
current_img = img.convert("RGBA")
except Exception as e:
logger.error(f"Failed to convert bytes to PIL Image: {e}", exc_info=True)
current_img = None
if not current_img:
raise RuntimeError("Failed to process current screenshot bytes into an image.")
# 3. Pre-check Dimensions
if baseline_img.size != current_img.size:
size_mismatch_msg = f"Visual Assertion Failed: Image dimensions mismatch for '{baseline_id}'. Baseline: {baseline_img.size}, Current: {current_img.size}."
logger.error(size_mismatch_msg)
# Save current image for debugging
ts = time.strftime("%Y%m%d_%H%M%S")
current_img_path = os.path.join("output", f"visual_fail_{baseline_id}_current_{ts}.png")
current_img.save(current_img_path)
logger.info(f"Saved current image (dimension mismatch) to: {current_img_path}")
raise AssertionError(size_mismatch_msg) # Fail the assertion
# 4. Pixel Comparison
img_diff = Image.new("RGBA", baseline_img.size) # Image to store diff pixels
try:
mismatched_pixels = pixelmatch(baseline_img, current_img, img_diff, includeAA=True, threshold=0.1) # Use default pixelmatch threshold first
except Exception as pm_error:
logger.error(f"Error during pixelmatch comparison for '{baseline_id}': {pm_error}", exc_info=True)
raise RuntimeError(f"Pixelmatch library error: {pm_error}") from pm_error
total_pixels = baseline_img.width * baseline_img.height
diff_ratio = mismatched_pixels / total_pixels if total_pixels > 0 else 0
logger.info(f"Pixel comparison for '{baseline_id}': Mismatched Pixels = {mismatched_pixels}, Total Pixels = {total_pixels}, Difference = {diff_ratio*100:.4f}%")
# 5. Check against threshold
pixel_match_passed = diff_ratio <= step_threshold
llm_reasoning = None
diff_image_path = None
if pixel_match_passed:
logger.info(f"✅ Visual Assertion PASSED (Pixel Diff <= Threshold) for '{baseline_id}'.")
# Step completed successfully
else:
logger.warning(f"Visual Assertion: Pixel difference ({diff_ratio*100:.4f}%) exceeds threshold ({step_threshold*100:.2f}%) for '{baseline_id}'.")
# Save diff image regardless of LLM outcome
ts = time.strftime("%Y%m%d_%H%M%S")
diff_image_path = os.path.join("output", f"visual_diff_{baseline_id}_{ts}.png")
try:
img_diff.save(diff_image_path)
logger.info(f"Saved pixel difference image to: {diff_image_path}")
except Exception as save_err:
logger.error(f"Failed to save diff image: {save_err}")
diff_image_path = None # Mark as failed
# 6. LLM Fallback
if use_llm and self.llm_client:
logger.info(f"Attempting LLM visual comparison fallback for '{baseline_id}'...")
baseline_bytes = io.BytesIO()
baseline_img.save(baseline_bytes, format='PNG')
baseline_bytes = baseline_bytes.getvalue()
# --- UPDATED LLM PROMPT for Stitched Image ---
llm_prompt = f"""Analyze the combined image provided below for the purpose of automated software testing.
The LEFT half (labeled '1: Baseline') is the established baseline screenshot.
The RIGHT half (labeled '2: Current') is the current state screenshot.
Compare these two halves to determine if they are SEMANTICALLY equivalent from a user's perspective.
IGNORE minor differences like:
- Anti-aliasing variations
- Single-pixel shifts
- Tiny rendering fluctuations
- Small, insignificant dynamic content changes (e.g., blinking cursors, exact timestamps if not the focus).
FOCUS ON significant differences like:
- Layout changes (elements moved, resized, missing, added)
- Major color changes of key elements
- Text content changes (errors, different labels, etc.)
- Missing or fundamentally different images/icons.
Baseline ID: "{baseline_id}"
Captured URL (Baseline): "{baseline_meta.get('url_captured', 'N/A')}"
Selector (Baseline): "{baseline_meta.get('selector_captured', 'Full Page')}"
Based on these criteria, are the two halves (baseline vs. current) functionally and visually equivalent enough to PASS a visual regression test?
Respond ONLY with "YES" or "NO", followed by a brief explanation justifying your answer by referencing differences between the left and right halves.
Example YES: YES - The left (baseline) and right (current) images are visually equivalent. Minor text rendering differences are ignored.
Example NO: NO - The primary call-to-action button visible on the left (baseline) is missing on the right (current).
"""
# --- END UPDATED PROMPT ---
try:
# No change here, compare_images handles the stitching internally
llm_response = compare_images(llm_prompt, baseline_bytes, current_screenshot_bytes, self.llm_client)
logger.info(f"LLM visual comparison response for '{baseline_id}': {llm_response}")
llm_reasoning = llm_response # Store reasoning
if llm_response.strip().upper().startswith("YES"):
logger.info(f"✅ Visual Assertion PASSED (LLM Override) for '{baseline_id}'.")
pixel_match_passed = True # Override pixel result
elif llm_response.strip().upper().startswith("NO"):
logger.warning(f"Visual Assertion: LLM confirmed significant difference for '{baseline_id}'.")
pixel_match_passed = False # Confirm failure
else:
logger.warning(f"Visual Assertion: LLM response unclear for '{baseline_id}'. Treating as failure.")
pixel_match_passed = False
except Exception as llm_err:
logger.error(f"LLM visual comparison failed: {llm_err}", exc_info=True)
llm_reasoning = f"LLM Error: {llm_err}"
pixel_match_passed = False # Treat LLM error as failure
else: # LLM fallback not enabled or LLM not available
logger.warning(f"Visual Assertion: LLM fallback skipped for '{baseline_id}'. Failing based on pixel difference.")
pixel_match_passed = False
# 7. Handle Final Failure
if not pixel_match_passed:
failure_msg = f"Visual Assertion Failed for '{baseline_id}'. Pixel diff: {diff_ratio*100:.4f}% (Threshold: {step_threshold*100:.2f}%)."
if llm_reasoning: failure_msg += f" LLM Reason: {llm_reasoning}"
logger.error(failure_msg)
# Add details to run_status before raising
visual_failure_details = {
"baseline_id": baseline_id,
"pixel_difference_ratio": diff_ratio,
"pixel_threshold": step_threshold,
"mismatched_pixels": mismatched_pixels,
"diff_image_path": diff_image_path,
"llm_reasoning": llm_reasoning
}
# We need to store this somewhere accessible when raising the final error
# Let's add it directly to the step dict temporarily? Or a dedicated failure context?
# For now, log it and include basics in the AssertionError
run_status["visual_failure_details"] = visual_failure_details # Add to main run status
raise AssertionError(failure_msg) # Fail the step
visual_result = {
"step_id": step_id,
"baseline_id": baseline_id,
"status": "PASS" if pixel_match_passed else "FAIL",
"pixel_difference_ratio": diff_ratio,
"mismatched_pixels": mismatched_pixels,
"pixel_threshold": step_threshold,
"llm_override": use_llm and not pixel_match_passed and llm_response.strip().upper().startswith("YES") if 'llm_response' in locals() else False,
"llm_reasoning": llm_reasoning,
"diff_image_path": diff_image_path,
"element_selector": element_selector
}
run_status["visual_assertion_results"].append(visual_result)
elif action == "assert_passed_verification" or action == "assert_llm_verification":
if not self.llm_client:
raise PlaywrightError("LLMClient not available for vision-based verification step.")
if not description:
raise ValueError("Missing 'description' field for 'assert_passed_verification' step.")
if not self.browser_controller:
raise PlaywrightError("BrowserController not available for state gathering.")
logger.info("Performing vision-based verification with DOM context...")
# --- Gather Context ---
screenshot_bytes = self.browser_controller.take_screenshot()
current_url = self.browser_controller.get_current_url()
dom_context_str = "DOM context could not be retrieved." # Default
try:
dom_state = self.browser_controller.get_structured_dom(highlight_all_clickable_elements=False, viewport_expansion=-1) # No highlight during execution verification
if dom_state and dom_state.element_tree:
# Use 'verification' purpose for potentially richer context
dom_context_str, _ = dom_state.element_tree.generate_llm_context_string(context_purpose='verification')
else:
logger.warning("Failed to get valid DOM state for vision verification.")
except Exception as dom_err:
logger.error(f"Error getting DOM context for vision verification: {dom_err}", exc_info=True)
# --------------------
if not screenshot_bytes:
raise PlaywrightError("Failed to capture screenshot for vision verification.")
prompt = f"""Analyze the provided webpage screenshot AND the accompanying HTML context.
The goal during testing was to verify the following condition: "{description}"
Current URL: {current_url}
HTML Context (Visible elements, interactive elements marked with `[index]`, static with `(Static)`):
```html
{dom_context_str}
```
Based on BOTH the visual evidence in the screenshot AND the HTML context (Prioritize html context more as screenshot will have some delay from when it was asked and when it was taken), is the verification condition "{description}" currently met?
If you think due to the delay in html AND screenshot, state might have changed from where the condition was met, then also respond with YES
IMPORTANT: Consider that elements might be in a loading state (e.g., placeholders described) OR a fully loaded state (e.g., actual images shown visually). If the current state reasonably fulfills the ultimate goal implied by the description (even if the exact visual differs due to loading, like placeholders becoming images), respond YES.
Respond with only "YES" or "NO", followed by a brief explanation justifying your answer using evidence from the screenshot and/or HTML context.
Example Response (Success): YES - The 'Welcome, User!' message [Static id='s15'] is visible in the HTML and visually present at the top of the screenshot.
Example Response (Failure): NO - The HTML context shows an error message element [12] and the screenshot visually confirms the 'Invalid credentials' error.
Example Response (Success - Placeholder Intent): YES - The description asked for 5 placeholders, but the screenshot and HTML show 5 fully loaded images within the expected containers ('div.image-container'). This fulfills the intent of ensuring the 5 image sections are present and populated.
"""
llm_response = self.llm_client.generate_multimodal(prompt, screenshot_bytes)
logger.debug(f"Vision verification LLM response: {llm_response}")
if llm_response.strip().upper().startswith("YES"):
logger.info("✅ Vision verification PASSED (with DOM context).")
elif llm_response.strip().upper().startswith("NO"):
logger.error(f"❌ Vision verification FAILED (with DOM context). LLM Reasoning: {llm_response}")
raise AssertionError(f"Vision verification failed: Condition '{description}' not met. LLM Reason: {llm_response}")
elif llm_response.startswith("Error:"):
logger.error(f"❌ Vision verification FAILED due to LLM error: {llm_response}")
raise PlaywrightError(f"Vision verification LLM error: {llm_response}")
else:
logger.error(f"❌ Vision verification FAILED due to unclear LLM response: {llm_response}")
raise AssertionError(f"Vision verification failed: Unclear LLM response. Response: {llm_response}")
# --- Add more actions/assertions as needed ---
else:
logger.warning(f"Unsupported action type '{action}' found in step {step_id}. Skipping.")
# Optionally treat as failure: raise ValueError(f"Unsupported action: {action}")
step_healed = True
log_suffix = ""
if current_healing_attempts > 0:
# Store the selector that *worked* (which is current_selector)
successful_healed_selector_for_step = current_selector
log_suffix = f" (Healed after {current_healing_attempts} attempt(s) using selector '{current_selector}')"
logger.info(f"Step {step_id} completed successfully{log_suffix}.")
logger.info(f"Step {step_id} completed successfully.")
# Optional wait after successful step execution
if wait_after > 0:
logger.debug(f"Waiting for {wait_after}s after step {step_id}...")
time.sleep(wait_after)
except (PlaywrightError, PlaywrightTimeoutError, ValueError, AssertionError) as e:
# Catch Playwright errors, input errors, and assertion failures (from expect)
last_error = e # Store the error
error_type = type(e).__name__
error_msg = str(e)
logger.warning(f"Attempt {current_healing_attempts + 1} for Step {step_id} failed. Error: {error_type}: {error_msg}")
# --- Healing Decision Logic ---
is_healable_error = isinstance(e, (PlaywrightTimeoutError, PlaywrightError)) and current_selector is not None
# Refine healable conditions:
# - Timeout finding/interacting with an element
# - Element detached, not visible, not interactable (if selector exists)
# - Exclude navigation errors, value errors from missing params, count mismatches
if isinstance(e, ValueError) or (isinstance(e, AssertionError) and "count" in error_msg.lower()):
is_healable_error = False
if action == "navigate":
is_healable_error = False
if action == "assert_visual_match":
is_healable_error = False
can_attempt_healing = self.enable_healing and is_healable_error and current_healing_attempts < self.healing_retries_per_step
if can_attempt_healing:
logger.info(f"Attempting Healing (Mode: {self.healing_mode}) for Step {step_id}...")
healing_success = False
new_selector = None
healing_log_entry = {
"step_id": step_id,
"attempt": current_healing_attempts + 1,
"mode": self.healing_mode,
"success": False,
"original_selector": original_selector,
"failed_selector": current_selector,
"error": f"{error_type}: {error_msg}",
"new_selector": None,
"reasoning": None,
}
if self.healing_mode == 'soft':
healing_success, new_selector, reasoning = self._attempt_soft_healing(step, current_selector, error_msg)
healing_log_entry["new_selector"] = new_selector
healing_log_entry["reasoning"] = reasoning
if healing_success:
logger.info(f"Soft healing successful for Step {step_id}. New selector: '{new_selector}'")
current_selector = new_selector # Update selector for the next loop iteration
healing_log_entry["success"] = True
else:
logger.warning(f"Soft healing failed for Step {step_id}. Reason: {reasoning}")
# Let the loop proceed to final failure state below
elif self.healing_mode == 'hard':
logger.warning(f"Hard Healing triggered for Step {step_id} due to error: {error_msg}")
if self.browser_controller:
self.browser_controller.clear_console_messages()
self.browser_controller.clear_network_requests()
healing_log_entry["mode"] = "hard" # Log mode
healing_log_entry["success"] = True # Mark attempt as 'successful' in triggering re-record
self.healing_attempts_log.append(healing_log_entry) # Log before triggering
self._trigger_hard_healing(feature_description, json_file_path)
run_status["status"] = "HEALING_TRIGGERED"
run_status["message"] = f"Hard Healing (re-recording) triggered on Step {step_id}."
run_status["failed_step"] = step # Store the step that triggered it
run_status["error_details"] = f"Hard healing triggered by {error_type}: {error_msg}"
return run_status # Stop execution and return status
self.healing_attempts_log.append(healing_log_entry) # Log soft healing attempt
if healing_success:
current_healing_attempts += 1
continue # Go to the next iteration of the while loop to retry with new selector
else:
# Soft healing failed, break the while loop to handle final failure
current_healing_attempts = self.healing_retries_per_step + 1
else:
# Healing not enabled, max attempts reached, or not a healable error
logger.error(f"❌ Step {step_id} failed permanently. Healing skipped or failed.")
raise last_error # Re-raise the last error to trigger final failure handling
# --- End Healing Loop ---
if successful_healed_selector_for_step:
logger.info(f"Persisting healed selector for Step {step_id}: '{successful_healed_selector_for_step}'")
# Modify the step in the IN-MEMORY list 'steps'
if i < len(steps): # Check index boundary
steps[i]['selector'] = successful_healed_selector_for_step
any_step_successfully_healed = True
run_status["healed_steps_count"] += 1
else:
logger.error(f"Index {i} out of bounds for steps list while persisting healed selector for step {step_id}.")
# If the while loop finished because max attempts were reached without success
if not step_healed:
logger.error(f"❌ Step {step_id} ('{description}') Failed definitively after {current_healing_attempts} attempt(s).")
run_status["status"] = "FAIL"
run_status["message"] = f"Test failed on step {step_id}: {description}"
run_status["failed_step"] = step
# Use the last captured error
error_type = type(last_error).__name__ if last_error else "UnknownError"
error_msg = str(last_error) if last_error else "Step failed after healing attempts."
run_status["error_details"] = f"{error_type}: {error_msg}"
if run_status["status"] == "FAIL" and step.get("action") == "assert_visual_match" and "visual_failure_details" in run_status:
run_status["error_details"] += f"\nVisual Failure Details: {run_status['visual_failure_details']}"
# Failure Handling (Screenshot/Logs)
try:
ts = time.strftime("%Y%m%d_%H%M%S")
safe_test_name = re.sub(r'[^\w\-]+', '_', test_name)[:50]
screenshot_path = os.path.join("output", f"failure_{safe_test_name}_step{step_id}_{ts}.png")
if self.browser_controller and self.browser_controller.save_screenshot(screenshot_path):
run_status["screenshot_on_failure"] = screenshot_path
logger.info(f"Failure screenshot saved to: {screenshot_path}")
if self.browser_controller:
run_status["all_console_messages"] = self.browser_controller.get_console_messages()
run_status["console_messages_on_failure"] = [
msg for msg in run_status["all_console_messages"]
if msg['type'] in ['error', 'warning']
][-5:]
except Exception as fail_handle_e:
logger.error(f"Error during failure handling: {fail_handle_e}")
# Stop the entire test execution
logger.info("Stopping test execution due to permanent step failure.")
return run_status # Return immediately
# If loop completes without breaking due to permanent failure
logger.info("--- Setting final status to PASS ---")
run_status["status"] = "PASS"
run_status["message"] = "✅ Test executed successfully."
if any_step_successfully_healed:
run_status["message"] += f" ({run_status['healed_steps_count']} step(s) healed)."
logger.info(run_status["message"])
except (FileNotFoundError, ValueError, json.JSONDecodeError) as e:
logger.error(f"Error loading or parsing test file '{json_file_path}': {e}")
run_status["message"] = f"Failed to load/parse test file: {e}"
run_status["error_details"] = f"{type(e).__name__}: {str(e)}"
# status is already FAIL by default
except PlaywrightError as e:
logger.critical(f"A Playwright error occurred during execution: {e}", exc_info=True)
if run_status["error_details"] is None: # If this is the first detailed error
run_status["message"] = f"Playwright error: {str(e)}"
run_status["error_details"] = f"{type(e).__name__}: {str(e)}"
run_status["status"] = "FAIL" # Ensure status is Fail
except Exception as e:
logger.critical(f"An unexpected error occurred during execution: {e}", exc_info=True)
if run_status["error_details"] is None: # If this is the first detailed error
run_status["message"] = f"Unexpected execution error: {str(e)}"
run_status["error_details"] = f"{type(e).__name__}: {str(e)}" # Ensure error_details is set
run_status["status"] = "FAIL" # Ensure status is Fail
finally:
logger.info("--- Ending Test Execution ---")
if self.browser_controller:
if self.get_network_requests:
try: run_status["network_requests"] = self.browser_controller.get_network_requests()
except: logger.error("Failed to retrieve final network requests.")
# Performance timing is captured after navigation, check if it exists
if run_status.get("performance_timing") is None and self.get_performance is not False:
try: run_status["performance_timing"] = self.browser_controller.get_performance_timing()
except: logger.error("Failed to retrieve final performance timing.")
# Console messages captured on failure or here
if "all_console_messages" not in run_status or not run_status["all_console_messages"]:
try: run_status["all_console_messages"] = self.browser_controller.get_console_messages()
except: logger.error("Failed to retrieve final console messages.")
self.browser_controller.close()
self.browser_controller = None
self.page = None
end_time = time.time()
run_status["duration_seconds"] = round(end_time - start_time, 2)
run_status["healing_attempts"] = self.healing_attempts_log
if any_step_successfully_healed and run_status["status"] != "HEALING_TRIGGERED" and run_status["status"] == "PASS": # Save if healing occurred and not hard-healing
try:
logger.info(f"Saving updated test file with {run_status['healed_steps_count']} healed step(s) to: {json_file_path}")
# modified_test_data should contain the updated steps list
with open(json_file_path, 'w', encoding='utf-8') as f:
json.dump(modified_test_data, f, indent=2, ensure_ascii=False)
run_status["healed_file_saved"] = True
logger.info(f"Successfully saved healed test file: {json_file_path}")
# Adjust final message if test passed after healing
if run_status["status"] == "PASS":
run_status["message"] = f"✅ Test passed with {run_status['healed_steps_count']} step(s) healed. Updated test file saved."
except Exception as save_err:
logger.error(f"Failed to save healed test file '{json_file_path}': {save_err}", exc_info=True)
run_status["healed_file_saved"] = False
# Add warning to message if save failed
if run_status["status"] == "PASS":
run_status["message"] += " (Warning: Failed to save healed selectors)"
logger.info(f"Execution finished in {run_status['duration_seconds']:.2f} seconds. Status: {run_status['status']}")
return run_status
```