#
tokens: 49061/50000 19/616 files (page 8/28)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 8 of 28. Use http://codebase.md/trycua/cua?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .cursorignore
├── .dockerignore
├── .editorconfig
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── scripts
│   │   ├── get_pyproject_version.py
│   │   └── tests
│   │       ├── __init__.py
│   │       ├── README.md
│   │       └── test_get_pyproject_version.py
│   └── workflows
│       ├── bump-version.yml
│       ├── ci-lume.yml
│       ├── docker-publish-cua-linux.yml
│       ├── docker-publish-cua-windows.yml
│       ├── docker-publish-kasm.yml
│       ├── docker-publish-xfce.yml
│       ├── docker-reusable-publish.yml
│       ├── link-check.yml
│       ├── lint.yml
│       ├── npm-publish-cli.yml
│       ├── npm-publish-computer.yml
│       ├── npm-publish-core.yml
│       ├── publish-lume.yml
│       ├── pypi-publish-agent.yml
│       ├── pypi-publish-computer-server.yml
│       ├── pypi-publish-computer.yml
│       ├── pypi-publish-core.yml
│       ├── pypi-publish-mcp-server.yml
│       ├── pypi-publish-som.yml
│       ├── pypi-reusable-publish.yml
│       ├── python-tests.yml
│       ├── test-cua-models.yml
│       └── test-validation-script.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .prettierignore
├── .prettierrc.yaml
├── .vscode
│   ├── docs.code-workspace
│   ├── extensions.json
│   ├── launch.json
│   ├── libs-ts.code-workspace
│   ├── lume.code-workspace
│   ├── lumier.code-workspace
│   ├── py.code-workspace
│   └── settings.json
├── blog
│   ├── app-use.md
│   ├── assets
│   │   ├── composite-agents.png
│   │   ├── docker-ubuntu-support.png
│   │   ├── hack-booth.png
│   │   ├── hack-closing-ceremony.jpg
│   │   ├── hack-cua-ollama-hud.jpeg
│   │   ├── hack-leaderboard.png
│   │   ├── hack-the-north.png
│   │   ├── hack-winners.jpeg
│   │   ├── hack-workshop.jpeg
│   │   ├── hud-agent-evals.png
│   │   └── trajectory-viewer.jpeg
│   ├── bringing-computer-use-to-the-web.md
│   ├── build-your-own-operator-on-macos-1.md
│   ├── build-your-own-operator-on-macos-2.md
│   ├── cloud-windows-ga-macos-preview.md
│   ├── composite-agents.md
│   ├── computer-use-agents-for-growth-hacking.md
│   ├── cua-hackathon.md
│   ├── cua-playground-preview.md
│   ├── cua-vlm-router.md
│   ├── hack-the-north.md
│   ├── hud-agent-evals.md
│   ├── human-in-the-loop.md
│   ├── introducing-cua-cli.md
│   ├── introducing-cua-cloud-containers.md
│   ├── lume-to-containerization.md
│   ├── neurips-2025-cua-papers.md
│   ├── sandboxed-python-execution.md
│   ├── training-computer-use-models-trajectories-1.md
│   ├── trajectory-viewer.md
│   ├── ubuntu-docker-support.md
│   └── windows-sandbox.md
├── CONTRIBUTING.md
├── Development.md
├── Dockerfile
├── docs
│   ├── .env.example
│   ├── .gitignore
│   ├── content
│   │   └── docs
│   │       ├── agent-sdk
│   │       │   ├── agent-loops.mdx
│   │       │   ├── benchmarks
│   │       │   │   ├── index.mdx
│   │       │   │   ├── interactive.mdx
│   │       │   │   ├── introduction.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── osworld-verified.mdx
│   │       │   │   ├── screenspot-pro.mdx
│   │       │   │   └── screenspot-v2.mdx
│   │       │   ├── callbacks
│   │       │   │   ├── agent-lifecycle.mdx
│   │       │   │   ├── cost-saving.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── logging.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── pii-anonymization.mdx
│   │       │   │   └── trajectories.mdx
│   │       │   ├── chat-history.mdx
│   │       │   ├── custom-tools.mdx
│   │       │   ├── customizing-computeragent.mdx
│   │       │   ├── integrations
│   │       │   │   ├── hud.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── observability.mdx
│   │       │   ├── mcp-server
│   │       │   │   ├── client-integrations.mdx
│   │       │   │   ├── configuration.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── llm-integrations.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── tools.mdx
│   │       │   │   └── usage.mdx
│   │       │   ├── message-format.mdx
│   │       │   ├── meta.json
│   │       │   ├── migration-guide.mdx
│   │       │   ├── prompt-caching.mdx
│   │       │   ├── supported-agents
│   │       │   │   ├── composed-agents.mdx
│   │       │   │   ├── computer-use-agents.mdx
│   │       │   │   ├── grounding-models.mdx
│   │       │   │   ├── human-in-the-loop.mdx
│   │       │   │   └── meta.json
│   │       │   ├── supported-model-providers
│   │       │   │   ├── cua-vlm-router.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   └── local-models.mdx
│   │       │   ├── telemetry.mdx
│   │       │   └── usage-tracking.mdx
│   │       ├── cli-playbook
│   │       │   ├── commands.mdx
│   │       │   ├── index.mdx
│   │       │   └── meta.json
│   │       ├── computer-sdk
│   │       │   ├── cloud-vm-management.mdx
│   │       │   ├── commands.mdx
│   │       │   ├── computer-server
│   │       │   │   ├── Commands.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── REST-API.mdx
│   │       │   │   └── WebSocket-API.mdx
│   │       │   ├── computer-ui.mdx
│   │       │   ├── computers.mdx
│   │       │   ├── custom-computer-handlers.mdx
│   │       │   ├── meta.json
│   │       │   ├── sandboxed-python.mdx
│   │       │   └── tracing-api.mdx
│   │       ├── example-usecases
│   │       │   ├── form-filling.mdx
│   │       │   ├── gemini-complex-ui-navigation.mdx
│   │       │   ├── meta.json
│   │       │   ├── post-event-contact-export.mdx
│   │       │   └── windows-app-behind-vpn.mdx
│   │       ├── get-started
│   │       │   ├── meta.json
│   │       │   └── quickstart.mdx
│   │       ├── index.mdx
│   │       ├── macos-vm-cli-playbook
│   │       │   ├── lume
│   │       │   │   ├── cli-reference.mdx
│   │       │   │   ├── faq.md
│   │       │   │   ├── http-api.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── prebuilt-images.mdx
│   │       │   ├── lumier
│   │       │   │   ├── building-lumier.mdx
│   │       │   │   ├── docker-compose.mdx
│   │       │   │   ├── docker.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   └── meta.json
│   │       │   └── meta.json
│   │       └── meta.json
│   ├── next.config.mjs
│   ├── package-lock.json
│   ├── package.json
│   ├── pnpm-lock.yaml
│   ├── postcss.config.mjs
│   ├── public
│   │   └── img
│   │       ├── agent_gradio_ui.png
│   │       ├── agent.png
│   │       ├── bg-dark.jpg
│   │       ├── bg-light.jpg
│   │       ├── cli.png
│   │       ├── computer.png
│   │       ├── grounding-with-gemini3.gif
│   │       ├── hero.png
│   │       ├── laminar_trace_example.png
│   │       ├── som_box_threshold.png
│   │       └── som_iou_threshold.png
│   ├── README.md
│   ├── source.config.ts
│   ├── src
│   │   ├── app
│   │   │   ├── (home)
│   │   │   │   ├── [[...slug]]
│   │   │   │   │   └── page.tsx
│   │   │   │   └── layout.tsx
│   │   │   ├── api
│   │   │   │   ├── posthog
│   │   │   │   │   └── [...path]
│   │   │   │   │       └── route.ts
│   │   │   │   └── search
│   │   │   │       └── route.ts
│   │   │   ├── favicon.ico
│   │   │   ├── global.css
│   │   │   ├── layout.config.tsx
│   │   │   ├── layout.tsx
│   │   │   ├── llms.mdx
│   │   │   │   └── [[...slug]]
│   │   │   │       └── route.ts
│   │   │   ├── llms.txt
│   │   │   │   └── route.ts
│   │   │   ├── robots.ts
│   │   │   └── sitemap.ts
│   │   ├── assets
│   │   │   ├── discord-black.svg
│   │   │   ├── discord-white.svg
│   │   │   ├── logo-black.svg
│   │   │   └── logo-white.svg
│   │   ├── components
│   │   │   ├── analytics-tracker.tsx
│   │   │   ├── cookie-consent.tsx
│   │   │   ├── doc-actions-menu.tsx
│   │   │   ├── editable-code-block.tsx
│   │   │   ├── footer.tsx
│   │   │   ├── hero.tsx
│   │   │   ├── iou.tsx
│   │   │   ├── mermaid.tsx
│   │   │   └── page-feedback.tsx
│   │   ├── lib
│   │   │   ├── llms.ts
│   │   │   └── source.ts
│   │   ├── mdx-components.tsx
│   │   └── providers
│   │       └── posthog-provider.tsx
│   └── tsconfig.json
├── examples
│   ├── agent_examples.py
│   ├── agent_ui_examples.py
│   ├── browser_tool_example.py
│   ├── cloud_api_examples.py
│   ├── computer_examples_windows.py
│   ├── computer_examples.py
│   ├── computer_ui_examples.py
│   ├── computer-example-ts
│   │   ├── .env.example
│   │   ├── .gitignore
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── README.md
│   │   ├── src
│   │   │   ├── helpers.ts
│   │   │   └── index.ts
│   │   └── tsconfig.json
│   ├── docker_examples.py
│   ├── evals
│   │   ├── hud_eval_examples.py
│   │   └── wikipedia_most_linked.txt
│   ├── pylume_examples.py
│   ├── sandboxed_functions_examples.py
│   ├── som_examples.py
│   ├── tracing_examples.py
│   ├── utils.py
│   └── winsandbox_example.py
├── img
│   ├── agent_gradio_ui.png
│   ├── agent.png
│   ├── cli.png
│   ├── computer.png
│   ├── logo_black.png
│   └── logo_white.png
├── libs
│   ├── kasm
│   │   ├── Dockerfile
│   │   ├── LICENSE
│   │   ├── README.md
│   │   └── src
│   │       └── ubuntu
│   │           └── install
│   │               └── firefox
│   │                   ├── custom_startup.sh
│   │                   ├── firefox.desktop
│   │                   └── install_firefox.sh
│   ├── lume
│   │   ├── .cursorignore
│   │   ├── CONTRIBUTING.md
│   │   ├── Development.md
│   │   ├── img
│   │   │   └── cli.png
│   │   ├── Package.resolved
│   │   ├── Package.swift
│   │   ├── README.md
│   │   ├── resources
│   │   │   └── lume.entitlements
│   │   ├── scripts
│   │   │   ├── build
│   │   │   │   ├── build-debug.sh
│   │   │   │   ├── build-release-notarized.sh
│   │   │   │   └── build-release.sh
│   │   │   └── install.sh
│   │   ├── src
│   │   │   ├── Commands
│   │   │   │   ├── Clone.swift
│   │   │   │   ├── Config.swift
│   │   │   │   ├── Create.swift
│   │   │   │   ├── Delete.swift
│   │   │   │   ├── Get.swift
│   │   │   │   ├── Images.swift
│   │   │   │   ├── IPSW.swift
│   │   │   │   ├── List.swift
│   │   │   │   ├── Logs.swift
│   │   │   │   ├── Options
│   │   │   │   │   └── FormatOption.swift
│   │   │   │   ├── Prune.swift
│   │   │   │   ├── Pull.swift
│   │   │   │   ├── Push.swift
│   │   │   │   ├── Run.swift
│   │   │   │   ├── Serve.swift
│   │   │   │   ├── Set.swift
│   │   │   │   └── Stop.swift
│   │   │   ├── ContainerRegistry
│   │   │   │   ├── ImageContainerRegistry.swift
│   │   │   │   ├── ImageList.swift
│   │   │   │   └── ImagesPrinter.swift
│   │   │   ├── Errors
│   │   │   │   └── Errors.swift
│   │   │   ├── FileSystem
│   │   │   │   ├── Home.swift
│   │   │   │   ├── Settings.swift
│   │   │   │   ├── VMConfig.swift
│   │   │   │   ├── VMDirectory.swift
│   │   │   │   └── VMLocation.swift
│   │   │   ├── LumeController.swift
│   │   │   ├── Main.swift
│   │   │   ├── Server
│   │   │   │   ├── Handlers.swift
│   │   │   │   ├── HTTP.swift
│   │   │   │   ├── Requests.swift
│   │   │   │   ├── Responses.swift
│   │   │   │   └── Server.swift
│   │   │   ├── Utils
│   │   │   │   ├── CommandRegistry.swift
│   │   │   │   ├── CommandUtils.swift
│   │   │   │   ├── Logger.swift
│   │   │   │   ├── NetworkUtils.swift
│   │   │   │   ├── Path.swift
│   │   │   │   ├── ProcessRunner.swift
│   │   │   │   ├── ProgressLogger.swift
│   │   │   │   ├── String.swift
│   │   │   │   └── Utils.swift
│   │   │   ├── Virtualization
│   │   │   │   ├── DarwinImageLoader.swift
│   │   │   │   ├── DHCPLeaseParser.swift
│   │   │   │   ├── ImageLoaderFactory.swift
│   │   │   │   └── VMVirtualizationService.swift
│   │   │   ├── VM
│   │   │   │   ├── DarwinVM.swift
│   │   │   │   ├── LinuxVM.swift
│   │   │   │   ├── VM.swift
│   │   │   │   ├── VMDetails.swift
│   │   │   │   ├── VMDetailsPrinter.swift
│   │   │   │   ├── VMDisplayResolution.swift
│   │   │   │   └── VMFactory.swift
│   │   │   └── VNC
│   │   │       ├── PassphraseGenerator.swift
│   │   │       └── VNCService.swift
│   │   └── tests
│   │       ├── Mocks
│   │       │   ├── MockVM.swift
│   │       │   ├── MockVMVirtualizationService.swift
│   │       │   └── MockVNCService.swift
│   │       ├── VM
│   │       │   └── VMDetailsPrinterTests.swift
│   │       ├── VMTests.swift
│   │       ├── VMVirtualizationServiceTests.swift
│   │       └── VNCServiceTests.swift
│   ├── lumier
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   └── src
│   │       ├── bin
│   │       │   └── entry.sh
│   │       ├── config
│   │       │   └── constants.sh
│   │       ├── hooks
│   │       │   └── on-logon.sh
│   │       └── lib
│   │           ├── utils.sh
│   │           └── vm.sh
│   ├── python
│   │   ├── agent
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── agent
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── adapters
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cua_adapter.py
│   │   │   │   │   ├── huggingfacelocal_adapter.py
│   │   │   │   │   ├── human_adapter.py
│   │   │   │   │   ├── mlxvlm_adapter.py
│   │   │   │   │   └── models
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── generic.py
│   │   │   │   │       ├── internvl.py
│   │   │   │   │       ├── opencua.py
│   │   │   │   │       └── qwen2_5_vl.py
│   │   │   │   ├── agent.py
│   │   │   │   ├── callbacks
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── budget_manager.py
│   │   │   │   │   ├── image_retention.py
│   │   │   │   │   ├── logging.py
│   │   │   │   │   ├── operator_validator.py
│   │   │   │   │   ├── pii_anonymization.py
│   │   │   │   │   ├── prompt_instructions.py
│   │   │   │   │   ├── telemetry.py
│   │   │   │   │   └── trajectory_saver.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── computers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cua.py
│   │   │   │   │   └── custom.py
│   │   │   │   ├── decorators.py
│   │   │   │   ├── human_tool
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── server.py
│   │   │   │   │   └── ui.py
│   │   │   │   ├── integrations
│   │   │   │   │   └── hud
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── agent.py
│   │   │   │   │       └── proxy.py
│   │   │   │   ├── loops
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── anthropic.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── composed_grounded.py
│   │   │   │   │   ├── gelato.py
│   │   │   │   │   ├── gemini.py
│   │   │   │   │   ├── generic_vlm.py
│   │   │   │   │   ├── glm45v.py
│   │   │   │   │   ├── gta1.py
│   │   │   │   │   ├── holo.py
│   │   │   │   │   ├── internvl.py
│   │   │   │   │   ├── model_types.csv
│   │   │   │   │   ├── moondream3.py
│   │   │   │   │   ├── omniparser.py
│   │   │   │   │   ├── openai.py
│   │   │   │   │   ├── opencua.py
│   │   │   │   │   ├── uiins.py
│   │   │   │   │   ├── uitars.py
│   │   │   │   │   └── uitars2.py
│   │   │   │   ├── proxy
│   │   │   │   │   ├── examples.py
│   │   │   │   │   └── handlers.py
│   │   │   │   ├── responses.py
│   │   │   │   ├── tools
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── browser_tool.py
│   │   │   │   ├── types.py
│   │   │   │   └── ui
│   │   │   │       ├── __init__.py
│   │   │   │       ├── __main__.py
│   │   │   │       └── gradio
│   │   │   │           ├── __init__.py
│   │   │   │           ├── app.py
│   │   │   │           └── ui_components.py
│   │   │   ├── benchmarks
│   │   │   │   ├── .gitignore
│   │   │   │   ├── contrib.md
│   │   │   │   ├── interactive.py
│   │   │   │   ├── models
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   └── gta1.py
│   │   │   │   ├── README.md
│   │   │   │   ├── ss-pro.py
│   │   │   │   ├── ss-v2.py
│   │   │   │   └── utils.py
│   │   │   ├── example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_computer_agent.py
│   │   ├── bench-ui
│   │   │   ├── bench_ui
│   │   │   │   ├── __init__.py
│   │   │   │   ├── api.py
│   │   │   │   └── child.py
│   │   │   ├── examples
│   │   │   │   ├── folder_example.py
│   │   │   │   ├── gui
│   │   │   │   │   ├── index.html
│   │   │   │   │   ├── logo.svg
│   │   │   │   │   └── styles.css
│   │   │   │   ├── output_overlay.png
│   │   │   │   └── simple_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       └── test_port_detection.py
│   │   ├── computer
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer
│   │   │   │   ├── __init__.py
│   │   │   │   ├── computer.py
│   │   │   │   ├── diorama_computer.py
│   │   │   │   ├── helpers.py
│   │   │   │   ├── interface
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── logger.py
│   │   │   │   ├── models.py
│   │   │   │   ├── providers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cloud
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── docker
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── lume
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── lume_api.py
│   │   │   │   │   ├── lumier
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── types.py
│   │   │   │   │   └── winsandbox
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── provider.py
│   │   │   │   │       └── setup_script.ps1
│   │   │   │   ├── tracing_wrapper.py
│   │   │   │   ├── tracing.py
│   │   │   │   ├── ui
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   └── gradio
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── app.py
│   │   │   │   └── utils.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_computer.py
│   │   ├── computer-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── browser.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── diorama
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── diorama_computer.py
│   │   │   │   │   ├── diorama.py
│   │   │   │   │   ├── draw.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── safezone.py
│   │   │   │   ├── handlers
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── main.py
│   │   │   │   ├── server.py
│   │   │   │   ├── utils
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── wallpaper.py
│   │   │   │   └── watchdog.py
│   │   │   ├── examples
│   │   │   │   ├── __init__.py
│   │   │   │   └── usage_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   ├── run_server.py
│   │   │   ├── test_connection.py
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_server.py
│   │   ├── core
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── core
│   │   │   │   ├── __init__.py
│   │   │   │   └── telemetry
│   │   │   │       ├── __init__.py
│   │   │   │       └── posthog.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_telemetry.py
│   │   ├── mcp-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── build-extension.py
│   │   │   ├── CONCURRENT_SESSIONS.md
│   │   │   ├── desktop-extension
│   │   │   │   ├── cua-extension.mcpb
│   │   │   │   ├── desktop_extension.png
│   │   │   │   ├── manifest.json
│   │   │   │   ├── README.md
│   │   │   │   ├── requirements.txt
│   │   │   │   ├── run_server.sh
│   │   │   │   └── setup.py
│   │   │   ├── mcp_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── server.py
│   │   │   │   └── session_manager.py
│   │   │   ├── pdm.lock
│   │   │   ├── pyproject.toml
│   │   │   ├── QUICK_TEST_COMMANDS.sh
│   │   │   ├── quick_test_local_option.py
│   │   │   ├── README.md
│   │   │   ├── scripts
│   │   │   │   ├── install_mcp_server.sh
│   │   │   │   └── start_mcp_server.sh
│   │   │   ├── test_mcp_server_local_option.py
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_mcp_server.py
│   │   ├── pylume
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_pylume.py
│   │   └── som
│   │       ├── .bumpversion.cfg
│   │       ├── LICENSE
│   │       ├── poetry.toml
│   │       ├── pyproject.toml
│   │       ├── README.md
│   │       ├── som
│   │       │   ├── __init__.py
│   │       │   ├── detect.py
│   │       │   ├── detection.py
│   │       │   ├── models.py
│   │       │   ├── ocr.py
│   │       │   ├── util
│   │       │   │   └── utils.py
│   │       │   └── visualization.py
│   │       └── tests
│   │           ├── conftest.py
│   │           └── test_omniparser.py
│   ├── qemu-docker
│   │   ├── linux
│   │   │   ├── Dockerfile
│   │   │   ├── README.md
│   │   │   └── src
│   │   │       ├── entry.sh
│   │   │       └── vm
│   │   │           ├── image
│   │   │           │   └── README.md
│   │   │           └── setup
│   │   │               ├── install.sh
│   │   │               ├── setup-cua-server.sh
│   │   │               └── setup.sh
│   │   ├── README.md
│   │   └── windows
│   │       ├── Dockerfile
│   │       ├── README.md
│   │       └── src
│   │           ├── entry.sh
│   │           └── vm
│   │               ├── image
│   │               │   └── README.md
│   │               └── setup
│   │                   ├── install.bat
│   │                   ├── on-logon.ps1
│   │                   ├── setup-cua-server.ps1
│   │                   ├── setup-utils.psm1
│   │                   └── setup.ps1
│   ├── typescript
│   │   ├── .gitignore
│   │   ├── .nvmrc
│   │   ├── agent
│   │   │   ├── examples
│   │   │   │   ├── playground-example.html
│   │   │   │   └── README.md
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── client.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   └── client.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── computer
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── computer
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── providers
│   │   │   │   │   │   ├── base.ts
│   │   │   │   │   │   ├── cloud.ts
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── base.ts
│   │   │   │   │   ├── factory.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── linux.ts
│   │   │   │   │   ├── macos.ts
│   │   │   │   │   └── windows.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   ├── computer
│   │   │   │   │   └── cloud.test.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── factory.test.ts
│   │   │   │   │   ├── index.test.ts
│   │   │   │   │   ├── linux.test.ts
│   │   │   │   │   ├── macos.test.ts
│   │   │   │   │   └── windows.test.ts
│   │   │   │   └── setup.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── core
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── index.ts
│   │   │   │   └── telemetry
│   │   │   │       ├── clients
│   │   │   │       │   ├── index.ts
│   │   │   │       │   └── posthog.ts
│   │   │   │       └── index.ts
│   │   │   ├── tests
│   │   │   │   └── telemetry.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── cua-cli
│   │   │   ├── .gitignore
│   │   │   ├── .prettierrc
│   │   │   ├── bun.lock
│   │   │   ├── CLAUDE.md
│   │   │   ├── index.ts
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── auth.ts
│   │   │   │   ├── cli.ts
│   │   │   │   ├── commands
│   │   │   │   │   ├── auth.ts
│   │   │   │   │   └── sandbox.ts
│   │   │   │   ├── config.ts
│   │   │   │   ├── http.ts
│   │   │   │   ├── storage.ts
│   │   │   │   └── util.ts
│   │   │   └── tsconfig.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── pnpm-workspace.yaml
│   │   └── README.md
│   └── xfce
│       ├── .dockerignore
│       ├── .gitignore
│       ├── Development.md
│       ├── Dockerfile
│       ├── Dockerfile.dev
│       ├── README.md
│       └── src
│           ├── scripts
│           │   ├── resize-display.sh
│           │   ├── start-computer-server.sh
│           │   ├── start-novnc.sh
│           │   ├── start-vnc.sh
│           │   └── xstartup.sh
│           ├── supervisor
│           │   └── supervisord.conf
│           └── xfce-config
│               ├── helpers.rc
│               ├── xfce4-power-manager.xml
│               └── xfce4-session.xml
├── LICENSE.md
├── Makefile
├── notebooks
│   ├── agent_nb.ipynb
│   ├── blog
│   │   ├── build-your-own-operator-on-macos-1.ipynb
│   │   └── build-your-own-operator-on-macos-2.ipynb
│   ├── composite_agents_docker_nb.ipynb
│   ├── computer_nb.ipynb
│   ├── computer_server_nb.ipynb
│   ├── customizing_computeragent.ipynb
│   ├── eval_osworld.ipynb
│   ├── ollama_nb.ipynb
│   ├── README.md
│   ├── sota_hackathon_cloud.ipynb
│   └── sota_hackathon.ipynb
├── package-lock.json
├── package.json
├── pnpm-lock.yaml
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── scripts
│   ├── install-cli.ps1
│   ├── install-cli.sh
│   ├── playground-docker.sh
│   ├── playground.sh
│   ├── run-docker-dev.sh
│   └── typescript-typecheck.js
├── TESTING.md
├── tests
│   ├── agent_loop_testing
│   │   ├── agent_test.py
│   │   └── README.md
│   ├── pytest.ini
│   ├── shell_cmd.py
│   ├── test_files.py
│   ├── test_mcp_server_session_management.py
│   ├── test_mcp_server_streaming.py
│   ├── test_shell_bash.py
│   ├── test_telemetry.py
│   ├── test_tracing.py
│   ├── test_venv.py
│   └── test_watchdog.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/libs/python/agent/agent/integrations/hud/__init__.py:
--------------------------------------------------------------------------------

```python
  1 | """HUD integration: dataset runners and MCP-based computer agent export.
  2 | 
  3 | This module exposes helpers to evaluate HUD-compatible datasets and exports
  4 | the MCP-compatible computer agent implementation.
  5 | 
  6 | Exports:
  7 | - run_single_task(dataset, ...)
  8 | - run_full_dataset(dataset, ...)
  9 | - MCPComputerAgent
 10 | """
 11 | 
 12 | import time
 13 | from typing import Any, Optional
 14 | 
 15 | from agent.computers import is_agent_computer
 16 | from datasets import Dataset, load_dataset
 17 | from hud import trace
 18 | from hud.datasets import Task, run_dataset
 19 | 
 20 | from .agent import MCPComputerAgent
 21 | 
 22 | # ---------------------------------------------------------------------------
 23 | # Single-task runner
 24 | # ---------------------------------------------------------------------------
 25 | 
 26 | 
 27 | async def run_single_task(
 28 |     dataset: str | Dataset | list[dict[str, Any]],
 29 |     *,
 30 |     task_id: int = 0,
 31 |     model: str | None = None,
 32 |     allowed_tools: list[str] | None = None,
 33 |     # === ComputerAgent kwargs ===
 34 |     tools: list[Any] | None = None,
 35 |     custom_loop: Any | None = None,
 36 |     only_n_most_recent_images: int | None = None,
 37 |     callbacks: list[Any] | None = None,
 38 |     instructions: str | None = None,
 39 |     verbosity: int | None = None,
 40 |     trajectory_dir: str | dict | None = None,
 41 |     max_retries: int | None = 3,
 42 |     screenshot_delay: float | int = 0.5,
 43 |     use_prompt_caching: bool | None = False,
 44 |     max_trajectory_budget: float | dict | None = None,
 45 |     telemetry_enabled: bool | None = True,
 46 | ) -> None:
 47 |     """Load one task from the dataset and execute it with MCPComputerAgent."""
 48 | 
 49 |     # Load dataset and pick a sample
 50 |     if isinstance(dataset, str):
 51 |         dataset = load_dataset(dataset, split="train")  # type: ignore[arg-type]
 52 |     elif isinstance(dataset, list):
 53 |         dataset = dataset
 54 |     else:
 55 |         dataset = dataset["train"]
 56 | 
 57 |     sample_task = dataset[task_id]  # type: ignore[index]
 58 |     task_prompt = sample_task.get("prompt", f"Task {sample_task.get('id', 0)}")  # type: ignore[attr-defined]
 59 | 
 60 |     # Filter any existing Computer tools
 61 |     # The eval framework will add its own Computer tool per task
 62 |     if tools:
 63 |         tools = [tool for tool in tools if not is_agent_computer(tool)]
 64 | 
 65 |     with trace(name=task_prompt):
 66 |         task = Task(**sample_task)  # type: ignore[arg-type]
 67 | 
 68 |         agent = MCPComputerAgent(
 69 |             model=model or "computer-use-preview",
 70 |             allowed_tools=allowed_tools or ["openai_computer"],
 71 |             # === ComputerAgent kwargs passthrough ===
 72 |             tools=tools,
 73 |             custom_loop=custom_loop,
 74 |             only_n_most_recent_images=only_n_most_recent_images,
 75 |             callbacks=callbacks,
 76 |             instructions=instructions,
 77 |             verbosity=verbosity,
 78 |             trajectory_dir=trajectory_dir,
 79 |             max_retries=max_retries,
 80 |             screenshot_delay=screenshot_delay,
 81 |             use_prompt_caching=use_prompt_caching,
 82 |             max_trajectory_budget=max_trajectory_budget,
 83 |             telemetry_enabled=telemetry_enabled,
 84 |         )
 85 |         print(f"Running: {task_prompt}")
 86 |         result = await agent.run(task, max_steps=10)
 87 |         print(f"✅ Reward: {result.reward}")
 88 | 
 89 | 
 90 | # ---------------------------------------------------------------------------
 91 | # Full-dataset runner
 92 | # ---------------------------------------------------------------------------
 93 | 
 94 | 
 95 | async def run_full_dataset(
 96 |     dataset: str | Dataset | list[dict[str, Any]],
 97 |     *,
 98 |     job_name: Optional[str] = None,
 99 |     model: str | None = None,
100 |     allowed_tools: list[str] | None = None,
101 |     max_concurrent: int = 30,
102 |     max_steps: int = 50,
103 |     split: str = "train",
104 |     trajectory_dir: str | dict | None = None,
105 |     # === ComputerAgent kwargs ===
106 |     tools: list[Any] | None = None,
107 |     custom_loop: Any | None = None,
108 |     only_n_most_recent_images: int | None = 5,
109 |     callbacks: list[Any] | None = None,
110 |     instructions: str | None = None,
111 |     verbosity: int | None = None,
112 |     max_retries: int | None = 3,
113 |     screenshot_delay: float | int = 0.5,
114 |     use_prompt_caching: bool | None = False,
115 |     max_trajectory_budget: float | dict | None = None,
116 |     telemetry_enabled: bool | None = True,
117 | ) -> list[Any]:
118 |     """Run evaluation across the entire dataset using hud.datasets.run_dataset."""
119 | 
120 |     # Run with our MCP-based agent class.
121 |     if isinstance(dataset, str):
122 |         dataset_name = dataset.split("/")[-1]
123 |         job_name = job_name or f"Evaluation {dataset_name}"
124 |         dataset = load_dataset(dataset, split=split)  # type: ignore[arg-type]
125 |     else:
126 |         dataset_name = "custom"
127 |         job_name = job_name or f"Evaluation {time.strftime('%H:%M %Y-%m-%d')}"
128 | 
129 |     # Filter any existing Computer tools
130 |     # The eval framework will add its own Computer tool per task
131 |     if tools:
132 |         tools = [tool for tool in tools if not is_agent_computer(tool)]
133 | 
134 |     # Execute evaluation
135 |     return await run_dataset(
136 |         name=job_name,
137 |         dataset=dataset,
138 |         agent_class=MCPComputerAgent,
139 |         agent_config={
140 |             "model": model,
141 |             "allowed_tools": allowed_tools,
142 |             "trajectory_dir": trajectory_dir,
143 |             # === ComputerAgent kwargs passthrough ===
144 |             "tools": tools,
145 |             "custom_loop": custom_loop,
146 |             "only_n_most_recent_images": only_n_most_recent_images,
147 |             "callbacks": callbacks,
148 |             "instructions": instructions,
149 |             "verbosity": verbosity,
150 |             "max_retries": max_retries,
151 |             "screenshot_delay": screenshot_delay,
152 |             "use_prompt_caching": use_prompt_caching,
153 |             "max_trajectory_budget": max_trajectory_budget,
154 |             "telemetry_enabled": telemetry_enabled,
155 |         },
156 |         max_concurrent=max_concurrent,
157 |         metadata={"dataset": dataset_name},
158 |         max_steps=max_steps,
159 |         auto_respond=True,
160 |     )
161 | 
162 | 
163 | __all__ = [
164 |     "run_single_task",
165 |     "run_full_dataset",
166 |     "MCPComputerAgent",
167 | ]
168 | 
```

--------------------------------------------------------------------------------
/libs/lume/tests/VMTests.swift:
--------------------------------------------------------------------------------

```swift
  1 | import Foundation
  2 | import Testing
  3 | 
  4 | @testable import lume
  5 | 
  6 | class MockProcessRunner: ProcessRunner {
  7 |     var runCalls: [(executable: String, arguments: [String])] = []
  8 | 
  9 |     func run(executable: String, arguments: [String]) throws {
 10 |         runCalls.append((executable, arguments))
 11 |     }
 12 | }
 13 | 
 14 | private func setupVMDirectory(_ tempDir: URL) throws -> VMDirectory {
 15 |     let vmDir = VMDirectory(Path(tempDir.path))
 16 | 
 17 |     // Create disk image file
 18 |     let diskPath = vmDir.diskPath
 19 |     let diskData = Data(repeating: 0, count: 1024 * 1024)  // 1MB mock disk
 20 |     try diskData.write(to: diskPath.url)
 21 | 
 22 |     // Create nvram file
 23 |     let nvramPath = vmDir.nvramPath
 24 |     let nvramData = Data(repeating: 0, count: 1024)  // 1KB mock nvram
 25 |     try nvramData.write(to: nvramPath.url)
 26 | 
 27 |     // Create initial config file
 28 |     var config = try VMConfig(
 29 |         os: "mock-os",
 30 |         cpuCount: 1,
 31 |         memorySize: 1024,
 32 |         diskSize: 1024,
 33 |         display: "1024x768"
 34 |     )
 35 |     config.setMacAddress("00:11:22:33:44:55")
 36 |     try vmDir.saveConfig(config)
 37 | 
 38 |     // Create .initialized file to mark VM as initialized
 39 |     let initializedPath = vmDir.dir.file(".initialized")
 40 |     try Data().write(to: initializedPath.url)
 41 | 
 42 |     return vmDir
 43 | }
 44 | 
 45 | @MainActor
 46 | @Test("VM initialization and configuration")
 47 | func testVMInitialization() async throws {
 48 |     let tempDir = try createTempDirectory()
 49 |     let vmDir = try setupVMDirectory(tempDir)
 50 |     var config = try VMConfig(
 51 |         os: "mock-os",
 52 |         cpuCount: 1,
 53 |         memorySize: 1024,
 54 |         diskSize: 1024,
 55 |         display: "1024x768"
 56 |     )
 57 |     config.setMacAddress("00:11:22:33:44:55")  // Set MAC address to avoid nil
 58 |     let home = Home(fileManager: FileManager.default)
 59 |     let context = VMDirContext(dir: vmDir, config: config, home: home, storage: nil)
 60 | 
 61 |     let vm = MockVM(
 62 |         vmDirContext: context,
 63 |         virtualizationServiceFactory: { _ in MockVMVirtualizationService() },
 64 |         vncServiceFactory: { MockVNCService(vmDirectory: $0) }
 65 |     )
 66 | 
 67 |     // Test initial state
 68 |     let details = vm.details
 69 |     #expect(details.name == vmDir.name)
 70 |     #expect(details.os == "mock-os")
 71 |     #expect(details.status == "stopped")
 72 |     #expect(details.vncUrl == nil)
 73 | }
 74 | 
 75 | @MainActor
 76 | @Test("VM run and stop operations")
 77 | func testVMRunAndStop() async throws {
 78 |     let tempDir = try createTempDirectory()
 79 |     let vmDir = try setupVMDirectory(tempDir)
 80 |     var config = try VMConfig(
 81 |         os: "mock-os",
 82 |         cpuCount: 2,
 83 |         memorySize: 2048,
 84 |         diskSize: 1024,
 85 |         display: "1024x768"
 86 |     )
 87 |     config.setMacAddress("00:11:22:33:44:55")
 88 |     let home = Home(fileManager: FileManager.default)
 89 |     let context = VMDirContext(dir: vmDir, config: config, home: home, storage: nil)
 90 | 
 91 |     let vm = MockVM(
 92 |         vmDirContext: context,
 93 |         virtualizationServiceFactory: { _ in MockVMVirtualizationService() },
 94 |         vncServiceFactory: { MockVNCService(vmDirectory: $0) }
 95 |     )
 96 | 
 97 |     // Test running VM
 98 |     let runTask = Task {
 99 |         try await vm.run(
100 |             noDisplay: false, sharedDirectories: [], mount: nil as Path?, vncPort: 0,
101 |             recoveryMode: false)
102 |     }
103 | 
104 |     // Give the VM time to start
105 |     try await Task.sleep(nanoseconds: UInt64(1e9))
106 | 
107 |     // Test stopping VM
108 |     try await vm.stop()
109 |     runTask.cancel()
110 | }
111 | 
112 | @MainActor
113 | @Test("VM configuration updates")
114 | func testVMConfigurationUpdates() async throws {
115 |     let tempDir = try createTempDirectory()
116 |     let vmDir = try setupVMDirectory(tempDir)
117 |     var config = try VMConfig(
118 |         os: "mock-os",
119 |         cpuCount: 1,
120 |         memorySize: 1024,
121 |         diskSize: 1024,
122 |         display: "1024x768"
123 |     )
124 |     config.setMacAddress("00:11:22:33:44:55")
125 |     let home = Home(fileManager: FileManager.default)
126 |     let context = VMDirContext(dir: vmDir, config: config, home: home, storage: nil)
127 | 
128 |     let vm = MockVM(
129 |         vmDirContext: context,
130 |         virtualizationServiceFactory: { _ in MockVMVirtualizationService() },
131 |         vncServiceFactory: { MockVNCService(vmDirectory: $0) }
132 |     )
133 | 
134 |     // Test CPU count update
135 |     try vm.setCpuCount(4)
136 |     #expect(vm.vmDirContext.config.cpuCount == 4)
137 | 
138 |     // Test memory size update
139 |     try vm.setMemorySize(4096)
140 |     #expect(vm.vmDirContext.config.memorySize == 4096)
141 | 
142 |     // Test MAC address update
143 |     try vm.setMacAddress("00:11:22:33:44:66")
144 |     #expect(vm.vmDirContext.config.macAddress == "00:11:22:33:44:66")
145 | }
146 | 
147 | @MainActor
148 | @Test("VM setup process")
149 | func testVMSetup() async throws {
150 |     let tempDir = try createTempDirectory()
151 |     let vmDir = try setupVMDirectory(tempDir)
152 |     var config = try VMConfig(
153 |         os: "mock-os",
154 |         cpuCount: 1,
155 |         memorySize: 1024,
156 |         diskSize: 1024,
157 |         display: "1024x768"
158 |     )
159 |     config.setMacAddress("00:11:22:33:44:55")
160 |     let home = Home(fileManager: FileManager.default)
161 |     let context = VMDirContext(dir: vmDir, config: config, home: home, storage: nil)
162 | 
163 |     let vm = MockVM(
164 |         vmDirContext: context,
165 |         virtualizationServiceFactory: { _ in MockVMVirtualizationService() },
166 |         vncServiceFactory: { MockVNCService(vmDirectory: $0) }
167 |     )
168 | 
169 |     let expectedDiskSize: UInt64 = 64 * 1024 * 1024 * 1024  // 64 GB
170 | 
171 |     try await vm.setup(
172 |         ipswPath: "/path/to/mock.ipsw",
173 |         cpuCount: 2,
174 |         memorySize: 2048,
175 |         diskSize: expectedDiskSize,
176 |         display: "1024x768"
177 |     )
178 | 
179 |     #expect(vm.vmDirContext.config.cpuCount == 2)
180 |     #expect(vm.vmDirContext.config.memorySize == 2048)
181 |     let actualDiskSize = vm.vmDirContext.config.diskSize ?? 0
182 |     #expect(
183 |         actualDiskSize == expectedDiskSize,
184 |         "Expected disk size \(expectedDiskSize), but got \(actualDiskSize)")
185 |     #expect(vm.vmDirContext.config.macAddress == "00:11:22:33:44:55")
186 | }
187 | 
188 | private func createTempDirectory() throws -> URL {
189 |     let tempDir = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString)
190 |     try FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: true)
191 |     return tempDir
192 | }
193 | 
```

--------------------------------------------------------------------------------
/tests/agent_loop_testing/agent_test.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | Simple CUA Agent Test
  4 | 
  5 | Tests the actual CUA ComputerAgent SDK with a mock computer.
  6 | Only provides screenshot functionality - no complex computer actions.
  7 | """
  8 | 
  9 | import asyncio
 10 | import base64
 11 | import sys
 12 | from io import BytesIO
 13 | from pathlib import Path
 14 | 
 15 | from PIL import Image, ImageDraw
 16 | 
 17 | # Add project root to path
 18 | project_root = Path(__file__).parent.parent.parent
 19 | sys.path.insert(0, str(project_root))
 20 | 
 21 | 
 22 | class MockComputer:
 23 |     """Mock computer that only provides screenshots."""
 24 | 
 25 |     def __init__(self):
 26 |         self.action_count = 0
 27 |         self._image = self._create_image()
 28 | 
 29 |     def _create_image(self) -> str:
 30 |         """Create a simple desktop image."""
 31 |         img = Image.new("RGB", (1920, 1080), color="lightblue")
 32 |         draw = ImageDraw.Draw(img)
 33 | 
 34 |         # Draw Safari icon
 35 |         draw.rectangle([100, 950, 150, 1000], fill="blue", outline="black", width=2)
 36 |         draw.text((110, 960), "Safari", fill="white")
 37 | 
 38 |         # Draw Terminal icon
 39 |         draw.rectangle([200, 950, 250, 1000], fill="green", outline="black", width=2)
 40 |         draw.text((210, 960), "Terminal", fill="white")
 41 | 
 42 |         # Convert to base64
 43 |         img_bytes = BytesIO()
 44 |         img.save(img_bytes, format="PNG")
 45 |         return base64.b64encode(img_bytes.getvalue()).decode("utf-8")
 46 | 
 47 |     async def screenshot(self) -> str:
 48 |         self.action_count += 1
 49 |         return self._image
 50 | 
 51 |     async def get_dimensions(self) -> tuple[int, int]:
 52 |         return (1920, 1080)
 53 | 
 54 |     # All other methods are no-ops (required by CUA interface)
 55 |     async def click(self, x: int, y: int, button: str = "left") -> None:
 56 |         await asyncio.sleep(0.1)
 57 | 
 58 |     async def double_click(self, x: int, y: int) -> None:
 59 |         await asyncio.sleep(0.1)
 60 | 
 61 |     async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
 62 |         await asyncio.sleep(0.1)
 63 | 
 64 |     async def type(self, text: str) -> None:
 65 |         await asyncio.sleep(0.1)
 66 | 
 67 |     async def wait(self, ms: int = 1000) -> None:
 68 |         await asyncio.sleep(ms / 1000.0)
 69 | 
 70 |     async def move(self, x: int, y: int) -> None:
 71 |         await asyncio.sleep(0.1)
 72 | 
 73 |     async def keypress(self, keys) -> None:
 74 |         await asyncio.sleep(0.1)
 75 | 
 76 |     async def drag(self, path) -> None:
 77 |         await asyncio.sleep(0.1)
 78 | 
 79 |     async def get_current_url(self) -> str:
 80 |         return "desktop://mock"
 81 | 
 82 |     async def get_environment(self) -> str:
 83 |         return "mac"
 84 | 
 85 |     # Required abstract methods
 86 |     async def left_mouse_down(self, x: int = 0, y: int = 0) -> None:
 87 |         await asyncio.sleep(0.1)
 88 | 
 89 |     async def left_mouse_up(self, x: int = 0, y: int = 0) -> None:
 90 |         await asyncio.sleep(0.1)
 91 | 
 92 |     async def right_mouse_down(self, x: int = 0, y: int = 0) -> None:
 93 |         await asyncio.sleep(0.1)
 94 | 
 95 |     async def right_mouse_up(self, x: int = 0, y: int = 0) -> None:
 96 |         await asyncio.sleep(0.1)
 97 | 
 98 |     async def mouse_move(self, x: int, y: int) -> None:
 99 |         await asyncio.sleep(0.1)
100 | 
101 |     async def key_down(self, key: str) -> None:
102 |         await asyncio.sleep(0.1)
103 | 
104 |     async def key_up(self, key: str) -> None:
105 |         await asyncio.sleep(0.1)
106 | 
107 |     async def type_text(self, text: str) -> None:
108 |         await asyncio.sleep(0.1)
109 | 
110 | 
111 | async def test_cua_agent(model_name: str):
112 |     """Test CUA agent with mock computer."""
113 |     print(f"🤖 Testing CUA Agent: {model_name}")
114 |     print("=" * 50)
115 | 
116 |     try:
117 |         # Import the real CUA agent
118 |         from agent import ComputerAgent
119 | 
120 |         # Create mock computer
121 |         mock_computer = MockComputer()
122 | 
123 |         # Create the real CUA ComputerAgent
124 |         agent = ComputerAgent(model=model_name, tools=[mock_computer], max_trajectory_budget=5.0)
125 | 
126 |         print("✅ CUA Agent created")
127 |         print("✅ Mock computer ready")
128 |         print("🚀 Running agent...")
129 |         print()
130 | 
131 |         # Run the agent with a specific task
132 |         message = "Open Safari browser"
133 | 
134 |         iteration = 0
135 |         async for result in agent.run([{"role": "user", "content": message}]):
136 |             iteration += 1
137 |             print(f"Iteration {iteration}:")
138 | 
139 |             # Print agent output
140 |             output_items = result.get("output", [])
141 |             if not output_items:
142 |                 print("  (No output from agent)")
143 |             else:
144 |                 for item in output_items:
145 |                     if item["type"] == "message":
146 |                         print(f"  Agent: {item['content'][0]['text']}")
147 |                     elif item["type"] == "tool_call":
148 |                         print(f"  Tool: {item.get('tool_name')} {item.get('arguments')}")
149 |                     else:
150 |                         print(f"  Unknown output type: {item}")
151 | 
152 |             # Debug: print full result for empty iterations
153 |             if not output_items:
154 |                 print(f"  Debug - Full result: {result}")
155 | 
156 |             # Let the agent decide when to stop (it should try to complete the task)
157 |             # Only stop after 5 iterations to prevent infinite loops
158 |             if iteration >= 5:
159 |                 print("🏁 Stopping after 5 iterations (safety limit)")
160 |                 break
161 | 
162 |         print()
163 |         print("=" * 50)
164 |         print("🎉 TEST COMPLETE!")
165 |         print("=" * 50)
166 |         print(f"✅ Model: {model_name}")
167 |         print(f"✅ Iterations: {iteration}")
168 |         print(f"✅ Screenshots: {mock_computer.action_count}")
169 |         print("✅ Agent executed successfully")
170 | 
171 |         return True
172 | 
173 |     except ImportError as e:
174 |         print(f"❌ Import error: {e}")
175 |         print("💡 Install CUA: pip install -e libs/python/agent -e libs/python/computer")
176 |         return False
177 |     except Exception as e:
178 |         print(f"❌ Test failed: {e}")
179 |         return False
180 | 
181 | 
182 | if __name__ == "__main__":
183 |     import argparse
184 | 
185 |     parser = argparse.ArgumentParser(description="Test CUA Agent with mock computer")
186 |     parser.add_argument(
187 |         "--model", default="anthropic/claude-sonnet-4-5-20250929", help="CUA model to test"
188 |     )
189 |     args = parser.parse_args()
190 | 
191 |     success = asyncio.run(test_cua_agent(args.model))
192 |     sys.exit(0 if success else 1)
193 | 
```

--------------------------------------------------------------------------------
/libs/python/bench-ui/bench_ui/api.py:
--------------------------------------------------------------------------------

```python
  1 | import json
  2 | import os
  3 | import subprocess
  4 | import sys
  5 | import tempfile
  6 | import time
  7 | from pathlib import Path
  8 | from typing import Any, Dict, Optional
  9 | from urllib import request
 10 | from urllib.error import HTTPError, URLError
 11 | 
 12 | import psutil
 13 | 
 14 | # Map child PID -> listening port
 15 | _pid_to_port: Dict[int, int] = {}
 16 | 
 17 | 
 18 | def _post_json(url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
 19 |     data = json.dumps(payload).encode("utf-8")
 20 |     req = request.Request(
 21 |         url, data=data, headers={"Content-Type": "application/json"}, method="POST"
 22 |     )
 23 |     try:
 24 |         with request.urlopen(req, timeout=5) as resp:
 25 |             text = resp.read().decode("utf-8")
 26 |             return json.loads(text)
 27 |     except HTTPError as e:
 28 |         try:
 29 |             body = (e.read() or b"").decode("utf-8", errors="ignore")
 30 |             return json.loads(body)
 31 |         except Exception:
 32 |             return {"error": "http_error", "status": getattr(e, "code", None)}
 33 |     except URLError as e:
 34 |         return {"error": "url_error", "reason": str(e.reason)}
 35 | 
 36 | 
 37 | def _detect_port_for_pid(pid: int) -> int:
 38 |     """Detect a listening local TCP port for the given PID using psutil.
 39 | 
 40 |     Fails fast if psutil is unavailable or if no suitable port is found.
 41 |     """
 42 |     if psutil is None:
 43 |         raise RuntimeError("psutil is required for PID->port detection. Please install psutil.")
 44 | 
 45 |     # Scan system-wide connections and filter by PID
 46 |     for c in psutil.net_connections(kind="tcp"):
 47 |         if getattr(c, "pid", None) != pid:
 48 |             continue
 49 |         laddr = getattr(c, "laddr", None)
 50 |         status = str(getattr(c, "status", ""))
 51 |         if not laddr or not isinstance(laddr, tuple) or len(laddr) < 2:
 52 |             continue
 53 |         lip, lport = laddr[0], int(laddr[1])
 54 |         if status.upper() != "LISTEN":
 55 |             continue
 56 |         if lip in ("127.0.0.1", "::1", "0.0.0.0", "::"):
 57 |             return lport
 58 | 
 59 |     raise RuntimeError(f"Could not detect listening port for pid {pid}")
 60 | 
 61 | 
 62 | def launch_window(
 63 |     url: Optional[str] = None,
 64 |     *,
 65 |     html: Optional[str] = None,
 66 |     folder: Optional[str] = None,
 67 |     title: str = "Window",
 68 |     x: Optional[int] = None,
 69 |     y: Optional[int] = None,
 70 |     width: int = 600,
 71 |     height: int = 400,
 72 |     icon: Optional[str] = None,
 73 |     use_inner_size: bool = False,
 74 |     title_bar_style: str = "default",
 75 | ) -> int:
 76 |     """Create a pywebview window in a child process and return its PID.
 77 | 
 78 |     Preferred input is a URL via the positional `url` parameter.
 79 |     To load inline HTML instead, pass `html=...`.
 80 |     To serve a static folder, pass `folder=...` (path to directory).
 81 | 
 82 |     Spawns `python -m bench_ui.child` with a JSON config passed via a temp file.
 83 |     The child prints a single JSON line: {"pid": <pid>, "port": <port>}.
 84 |     We cache pid->port for subsequent control calls like get_element_rect.
 85 |     """
 86 |     if not url and not html and not folder:
 87 |         raise ValueError("launch_window requires either a url, html, or folder")
 88 | 
 89 |     config = {
 90 |         "url": url,
 91 |         "html": html,
 92 |         "folder": folder,
 93 |         "title": title,
 94 |         "x": x,
 95 |         "y": y,
 96 |         "width": width,
 97 |         "height": height,
 98 |         "icon": icon,
 99 |         "use_inner_size": use_inner_size,
100 |         "title_bar_style": title_bar_style,
101 |     }
102 | 
103 |     with tempfile.NamedTemporaryFile("w", delete=False, suffix=".json") as f:
104 |         json.dump(config, f)
105 |         cfg_path = f.name
106 | 
107 |     try:
108 |         # Launch child process
109 |         proc = subprocess.Popen(
110 |             [sys.executable, "-m", "bench_ui.child", cfg_path],
111 |             stdout=subprocess.PIPE,
112 |             stderr=subprocess.STDOUT,
113 |             text=True,
114 |         )
115 |         assert proc.stdout is not None
116 |         # Read first line with startup info
117 |         line = proc.stdout.readline().strip()
118 |         info = json.loads(line)
119 |         pid = int(info["pid"]) if "pid" in info else proc.pid
120 |         port = int(info["port"])  # required
121 |         _pid_to_port[pid] = port
122 |         return pid
123 |     finally:
124 |         try:
125 |             os.unlink(cfg_path)
126 |         except Exception:
127 |             pass
128 | 
129 | 
130 | def get_element_rect(pid: int, selector: str, *, space: str = "window"):
131 |     """Ask the child process to compute element client rect via injected JS.
132 | 
133 |     Returns a dict like {"x": float, "y": float, "width": float, "height": float} or None if not found.
134 |     """
135 |     if pid not in _pid_to_port:
136 |         _pid_to_port[pid] = _detect_port_for_pid(pid)
137 |     port = _pid_to_port[pid]
138 |     url = f"http://127.0.0.1:{port}/rect"
139 |     last: Dict[str, Any] = {}
140 |     for _ in range(30):  # ~3s total
141 |         resp = _post_json(url, {"selector": selector, "space": space})
142 |         last = resp or {}
143 |         rect = last.get("rect") if isinstance(last, dict) else None
144 |         err = last.get("error") if isinstance(last, dict) else None
145 |         if rect is not None:
146 |             return rect
147 |         if err in ("window_not_ready", "invalid_json"):
148 |             time.sleep(0.1)
149 |             continue
150 |         # If other transient errors, brief retry
151 |         if err:
152 |             time.sleep(0.1)
153 |             continue
154 |         time.sleep(0.1)
155 |     raise RuntimeError(f"Failed to get element rect: {last}")
156 | 
157 | 
158 | def execute_javascript(pid: int, javascript: str):
159 |     """Execute arbitrary JavaScript in the window and return its result.
160 | 
161 |     Retries briefly while the window is still becoming ready.
162 |     """
163 |     if pid not in _pid_to_port:
164 |         _pid_to_port[pid] = _detect_port_for_pid(pid)
165 |     port = _pid_to_port[pid]
166 |     url = f"http://127.0.0.1:{port}/eval"
167 |     last: Dict[str, Any] = {}
168 |     for _ in range(30):  # ~3s total
169 |         resp = _post_json(url, {"javascript": javascript})
170 |         last = resp or {}
171 |         if isinstance(last, dict):
172 |             if "result" in last:
173 |                 return last["result"]
174 |             if last.get("error") in ("window_not_ready", "invalid_json"):
175 |                 time.sleep(0.1)
176 |                 continue
177 |             if last.get("error"):
178 |                 time.sleep(0.1)
179 |                 continue
180 |         time.sleep(0.1)
181 |     raise RuntimeError(f"Failed to execute JavaScript: {last}")
182 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/loops/uiins.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | UI-Ins agent loop implementation for click prediction using litellm.acompletion
  3 | Paper: https://arxiv.org/pdf/2510.202861
  4 | Code: https://github.com/alibaba/UI-Ins
  5 | """
  6 | 
  7 | import asyncio
  8 | import base64
  9 | import json
 10 | import math
 11 | import re
 12 | import uuid
 13 | from io import BytesIO
 14 | from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
 15 | 
 16 | import litellm
 17 | from PIL import Image
 18 | 
 19 | from ..decorators import register_agent
 20 | from ..loops.base import AsyncAgentConfig
 21 | from ..types import AgentCapability, AgentResponse, Messages, Tools
 22 | 
 23 | SYSTEM_PROMPT = """You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.\n\n## Output Format\nReturn a json object with a reasoning process in  tags, a function name and arguments within  XML tags:\n```\n\n...\n\n\n{"name": "grounding", "arguments": }\n\n```\n represents the following item of the action space:\n## Action Space{"action": "click", "coordinate": [x, y]}\nYour task is to accurately locate a UI element based on the instruction. You should first analyze instruction in  tags and finally output the function in  tags.\n"""
 24 | 
 25 | 
 26 | def parse_coordinates(raw_string: str) -> tuple[int, int]:
 27 |     matches = re.findall(r"\[(\d+),\s*(\d+)\]", raw_string)
 28 |     if matches:
 29 |         return tuple(map(int, matches[0]))
 30 |     return -1, -1
 31 | 
 32 | 
 33 | def smart_resize(
 34 |     height: int,
 35 |     width: int,
 36 |     factor: int = 28,
 37 |     min_pixels: int = 3136,
 38 |     max_pixels: int = 8847360,
 39 | ) -> Tuple[int, int]:
 40 |     """Smart resize function similar to qwen_vl_utils."""
 41 |     # Calculate the total pixels
 42 |     total_pixels = height * width
 43 | 
 44 |     # If already within bounds, return original dimensions
 45 |     if min_pixels <= total_pixels <= max_pixels:
 46 |         # Round to nearest factor
 47 |         new_height = (height // factor) * factor
 48 |         new_width = (width // factor) * factor
 49 |         return new_height, new_width
 50 | 
 51 |     # Calculate scaling factor
 52 |     if total_pixels > max_pixels:
 53 |         scale = (max_pixels / total_pixels) ** 0.5
 54 |     else:
 55 |         scale = (min_pixels / total_pixels) ** 0.5
 56 | 
 57 |     # Apply scaling
 58 |     new_height = int(height * scale)
 59 |     new_width = int(width * scale)
 60 | 
 61 |     # Round to nearest factor
 62 |     new_height = (new_height // factor) * factor
 63 |     new_width = (new_width // factor) * factor
 64 | 
 65 |     # Ensure minimum size
 66 |     new_height = max(new_height, factor)
 67 |     new_width = max(new_width, factor)
 68 | 
 69 |     return new_height, new_width
 70 | 
 71 | 
 72 | @register_agent(models=r".*UI-Ins.*")
 73 | class UIInsConfig(AsyncAgentConfig):
 74 |     """UI-Ins agent configuration implementing AsyncAgentConfig protocol for click prediction."""
 75 | 
 76 |     def __init__(self):
 77 |         self.current_model = None
 78 |         self.last_screenshot_b64 = None
 79 | 
 80 |     async def predict_step(
 81 |         self,
 82 |         messages: List[Dict[str, Any]],
 83 |         model: str,
 84 |         tools: Optional[List[Dict[str, Any]]] = None,
 85 |         max_retries: Optional[int] = None,
 86 |         stream: bool = False,
 87 |         computer_handler=None,
 88 |         _on_api_start=None,
 89 |         _on_api_end=None,
 90 |         _on_usage=None,
 91 |         _on_screenshot=None,
 92 |         **kwargs,
 93 |     ) -> Dict[str, Any]:
 94 |         raise NotImplementedError()
 95 | 
 96 |     async def predict_click(
 97 |         self, model: str, image_b64: str, instruction: str, **kwargs
 98 |     ) -> Optional[Tuple[float, float]]:
 99 |         """
100 |         Predict click coordinates using UI-Ins model via litellm.acompletion.
101 | 
102 |         Args:
103 |             model: The UI-Ins model name
104 |             image_b64: Base64 encoded image
105 |             instruction: Instruction for where to click
106 | 
107 |         Returns:
108 |             Tuple of (x, y) coordinates or None if prediction fails
109 |         """
110 |         # Decode base64 image
111 |         image_data = base64.b64decode(image_b64)
112 |         image = Image.open(BytesIO(image_data))
113 |         width, height = image.width, image.height
114 | 
115 |         # Smart resize the image (similar to qwen_vl_utils)
116 |         resized_height, resized_width = smart_resize(
117 |             height,
118 |             width,
119 |             factor=28,  # Default factor for Qwen models
120 |             min_pixels=3136,
121 |             max_pixels=4096 * 2160,
122 |         )
123 |         resized_image = image.resize((resized_width, resized_height))
124 |         scale_x, scale_y = width / resized_width, height / resized_height
125 | 
126 |         # Convert resized image back to base64
127 |         buffered = BytesIO()
128 |         resized_image.save(buffered, format="PNG")
129 |         resized_image_b64 = base64.b64encode(buffered.getvalue()).decode()
130 | 
131 |         # Prepare system and user messages
132 |         system_message = {
133 |             "role": "system",
134 |             "content": [
135 |                 {"type": "text", "text": "You are a helpful assistant."},
136 |                 {"type": "text", "text": SYSTEM_PROMPT},
137 |             ],
138 |         }
139 | 
140 |         user_message = {
141 |             "role": "user",
142 |             "content": [
143 |                 {
144 |                     "type": "image_url",
145 |                     "image_url": {"url": f"data:image/png;base64,{resized_image_b64}"},
146 |                 },
147 |                 {"type": "text", "text": instruction},
148 |             ],
149 |         }
150 | 
151 |         # Prepare API call kwargs
152 |         api_kwargs = {
153 |             "model": model,
154 |             "messages": [system_message, user_message],
155 |             "max_tokens": 2056,
156 |             "temperature": 0.0,
157 |             **kwargs,
158 |         }
159 | 
160 |         # Use liteLLM acompletion
161 |         response = await litellm.acompletion(**api_kwargs)
162 | 
163 |         # Extract response text
164 |         output_text = response.choices[0].message.content  # type: ignore
165 | 
166 |         # Extract and rescale coordinates
167 |         pred_x, pred_y = parse_coordinates(output_text)  # type: ignore
168 |         pred_x *= scale_x
169 |         pred_y *= scale_y
170 | 
171 |         return (math.floor(pred_x), math.floor(pred_y))
172 | 
173 |     def get_capabilities(self) -> List[AgentCapability]:
174 |         """Return the capabilities supported by this agent."""
175 |         return ["click"]
176 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/proxy/examples.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Example usage of the proxy server and client requests.
  3 | """
  4 | 
  5 | import dotenv
  6 | 
  7 | dotenv.load_dotenv()
  8 | 
  9 | import asyncio
 10 | import json
 11 | import os
 12 | from typing import Any, Dict
 13 | 
 14 | import aiohttp
 15 | 
 16 | 
 17 | async def test_http_endpoint():
 18 |     """Test the HTTP /responses endpoint."""
 19 | 
 20 |     anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
 21 |     assert isinstance(anthropic_api_key, str), "ANTHROPIC_API_KEY environment variable must be set"
 22 | 
 23 |     # Example 1: Simple text request
 24 |     simple_request = {
 25 |         "model": "anthropic/claude-sonnet-4-5-20250929",
 26 |         "input": "Tell me a three sentence bedtime story about a unicorn.",
 27 |         "env": {"ANTHROPIC_API_KEY": anthropic_api_key},
 28 |     }
 29 | 
 30 |     # Example 2: Multi-modal request with image
 31 |     multimodal_request = {
 32 |         "model": "anthropic/claude-sonnet-4-5-20250929",
 33 |         "input": [
 34 |             {
 35 |                 "role": "user",
 36 |                 "content": [
 37 |                     {"type": "input_text", "text": "what is in this image?"},
 38 |                     {
 39 |                         "type": "input_image",
 40 |                         "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
 41 |                     },
 42 |                 ],
 43 |             }
 44 |         ],
 45 |         "env": {"ANTHROPIC_API_KEY": anthropic_api_key},
 46 |     }
 47 | 
 48 |     # Example 3: Request with custom agent and computer kwargs
 49 |     custom_request = {
 50 |         "model": "anthropic/claude-sonnet-4-5-20250929",
 51 |         "input": "Take a screenshot and tell me what you see",
 52 |         "env": {"ANTHROPIC_API_KEY": anthropic_api_key},
 53 |     }
 54 | 
 55 |     # Test requests
 56 |     base_url = "https://m-linux-96lcxd2c2k.containers.cloud.trycua.com:8443"
 57 |     # base_url = "http://localhost:8000"
 58 |     api_key = os.getenv("CUA_API_KEY")
 59 |     assert isinstance(api_key, str), "CUA_API_KEY environment variable must be set"
 60 | 
 61 |     async with aiohttp.ClientSession() as session:
 62 |         for i, request_data in enumerate(
 63 |             [
 64 |                 simple_request,
 65 |                 # multimodal_request,
 66 |                 custom_request,
 67 |             ],
 68 |             1,
 69 |         ):
 70 |             print(f"\n--- Test {i} ---")
 71 |             print(f"Request: {json.dumps(request_data, indent=2)}")
 72 | 
 73 |             try:
 74 |                 print(f"Sending request to {base_url}/responses")
 75 |                 async with session.post(
 76 |                     f"{base_url}/responses",
 77 |                     json=request_data,
 78 |                     headers={"Content-Type": "application/json", "X-API-Key": api_key},
 79 |                 ) as response:
 80 |                     result = await response.json()
 81 |                     print(f"Status: {response.status}")
 82 |                     print(f"Response: {json.dumps(result, indent=2)}")
 83 | 
 84 |             except Exception as e:
 85 |                 print(f"Error: {e}")
 86 | 
 87 | 
 88 | def curl_examples():
 89 |     """Print curl command examples."""
 90 | 
 91 |     print("=== CURL Examples ===\n")
 92 | 
 93 |     print("1. Simple text request:")
 94 |     print(
 95 |         """curl http://localhost:8000/responses \\
 96 |   -H "Content-Type: application/json" \\
 97 |   -d '{
 98 |     "model": "anthropic/claude-sonnet-4-5-20250929",
 99 |     "input": "Tell me a three sentence bedtime story about a unicorn."
100 |   }'"""
101 |     )
102 | 
103 |     print("\n2. Multi-modal request with image:")
104 |     print(
105 |         """curl http://localhost:8000/responses \\
106 |   -H "Content-Type: application/json" \\
107 |   -d '{
108 |     "model": "anthropic/claude-sonnet-4-5-20250929",
109 |     "input": [
110 |       {
111 |         "role": "user",
112 |         "content": [
113 |           {"type": "input_text", "text": "what is in this image?"},
114 |           {
115 |             "type": "input_image",
116 |             "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
117 |           }
118 |         ]
119 |       }
120 |     ]
121 |   }'"""
122 |     )
123 | 
124 |     print("\n3. Request with custom configuration:")
125 |     print(
126 |         """curl http://localhost:8000/responses \\
127 |   -H "Content-Type: application/json" \\
128 |   -d '{
129 |     "model": "anthropic/claude-sonnet-4-5-20250929",
130 |     "input": "Take a screenshot and tell me what you see",
131 |     "agent_kwargs": {
132 |       "save_trajectory": true,
133 |       "verbosity": 20
134 |     },
135 |     "computer_kwargs": {
136 |       "os_type": "linux",
137 |       "provider_type": "cloud"
138 |     }
139 |   }'"""
140 |     )
141 | 
142 | 
143 | async def test_p2p_client():
144 |     """Example P2P client using peerjs-python."""
145 |     try:
146 |         from aiortc import RTCConfiguration, RTCIceServer
147 |         from peerjs import ConnectionEventType, Peer, PeerOptions
148 | 
149 |         # Set up client peer
150 |         options = PeerOptions(
151 |             host="0.peerjs.com",
152 |             port=443,
153 |             secure=True,
154 |             config=RTCConfiguration(iceServers=[RTCIceServer(urls="stun:stun.l.google.com:19302")]),
155 |         )
156 | 
157 |         client_peer = Peer(id="test-client", peer_options=options)
158 |         await client_peer.start()
159 | 
160 |         # Connect to proxy server
161 |         connection = client_peer.connect("computer-agent-proxy")
162 | 
163 |         @connection.on(ConnectionEventType.Open)
164 |         async def connection_open():
165 |             print("Connected to proxy server")
166 | 
167 |             # Send a test request
168 |             request = {
169 |                 "model": "anthropic/claude-sonnet-4-5-20250929",
170 |                 "input": "Hello from P2P client!",
171 |             }
172 |             await connection.send(json.dumps(request))
173 | 
174 |         @connection.on(ConnectionEventType.Data)
175 |         async def connection_data(data):
176 |             print(f"Received response: {data}")
177 |             await client_peer.destroy()
178 | 
179 |         # Wait for connection
180 |         await asyncio.sleep(10)
181 | 
182 |     except ImportError:
183 |         print("P2P dependencies not available. Install peerjs-python for P2P testing.")
184 |     except Exception as e:
185 |         print(f"P2P test error: {e}")
186 | 
187 | 
188 | if __name__ == "__main__":
189 |     import sys
190 | 
191 |     if len(sys.argv) > 1 and sys.argv[1] == "curl":
192 |         curl_examples()
193 |     elif len(sys.argv) > 1 and sys.argv[1] == "p2p":
194 |         asyncio.run(test_p2p_client())
195 |     else:
196 |         asyncio.run(test_http_endpoint())
197 | 
```

--------------------------------------------------------------------------------
/libs/python/computer-server/computer_server/diorama/safezone.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | UI Safezone Helper - A utility to get accurate bounds for macOS UI elements
  4 | 
  5 | This module provides helper functions to get accurate bounds for macOS UI elements
  6 | like the menubar and dock, which are needed for proper screenshot composition.
  7 | """
  8 | 
  9 | import sys
 10 | import time
 11 | from typing import Any, Dict, Optional, Tuple
 12 | 
 13 | # Import Objective-C bridge libraries
 14 | try:
 15 |     import AppKit
 16 |     import Foundation
 17 |     from AppKit import NSRunningApplication, NSWorkspace
 18 |     from ApplicationServices import (
 19 |         AXUIElementCopyAttributeValue,
 20 |         AXUIElementCopyAttributeValues,
 21 |         AXUIElementCreateApplication,
 22 |         AXUIElementCreateSystemWide,
 23 |         AXUIElementGetTypeID,
 24 |         AXValueGetType,
 25 |         AXValueGetValue,
 26 |         kAXChildrenAttribute,
 27 |         kAXErrorSuccess,
 28 |         kAXMenuBarAttribute,
 29 |         kAXPositionAttribute,
 30 |         kAXRoleAttribute,
 31 |         kAXSizeAttribute,
 32 |         kAXTitleAttribute,
 33 |         kAXValueCGPointType,
 34 |         kAXValueCGSizeType,
 35 |     )
 36 | except ImportError:
 37 |     print("Error: This script requires PyObjC to be installed.")
 38 |     print("Please install it with: pip install pyobjc")
 39 |     sys.exit(1)
 40 | 
 41 | # Constants for accessibility API
 42 | kAXErrorSuccess = 0
 43 | kAXRoleAttribute = "AXRole"
 44 | kAXSubroleAttribute = "AXSubrole"
 45 | kAXTitleAttribute = "AXTitle"
 46 | kAXPositionAttribute = "AXPosition"
 47 | kAXSizeAttribute = "AXSize"
 48 | kAXChildrenAttribute = "AXChildren"
 49 | kAXMenuBarAttribute = "AXMenuBar"
 50 | 
 51 | 
 52 | def element_attribute(element, attribute):
 53 |     """Get an attribute from an accessibility element"""
 54 |     if attribute == kAXChildrenAttribute:
 55 |         err, value = AXUIElementCopyAttributeValues(element, attribute, 0, 999, None)
 56 |         if err == kAXErrorSuccess:
 57 |             if isinstance(value, Foundation.NSArray):
 58 |                 return list(value)
 59 |             else:
 60 |                 return value
 61 |     err, value = AXUIElementCopyAttributeValue(element, attribute, None)
 62 |     if err == kAXErrorSuccess:
 63 |         return value
 64 |     return None
 65 | 
 66 | 
 67 | def element_value(element, type):
 68 |     """Get a value from an accessibility element"""
 69 |     err, value = AXValueGetValue(element, type, None)
 70 |     if err == True:
 71 |         return value
 72 |     return None
 73 | 
 74 | 
 75 | def get_element_bounds(element):
 76 |     """Get the bounds of an accessibility element"""
 77 |     bounds = {"x": 0, "y": 0, "width": 0, "height": 0}
 78 | 
 79 |     # Get position
 80 |     position_value = element_attribute(element, kAXPositionAttribute)
 81 |     if position_value:
 82 |         position_value = element_value(position_value, kAXValueCGPointType)
 83 |         if position_value:
 84 |             bounds["x"] = position_value.x
 85 |             bounds["y"] = position_value.y
 86 | 
 87 |     # Get size
 88 |     size_value = element_attribute(element, kAXSizeAttribute)
 89 |     if size_value:
 90 |         size_value = element_value(size_value, kAXValueCGSizeType)
 91 |         if size_value:
 92 |             bounds["width"] = size_value.width
 93 |             bounds["height"] = size_value.height
 94 | 
 95 |     return bounds
 96 | 
 97 | 
 98 | def find_dock_process():
 99 |     """Find the Dock process"""
100 |     running_apps = NSWorkspace.sharedWorkspace().runningApplications()
101 |     for app in running_apps:
102 |         if app.localizedName() == "Dock" and app.bundleIdentifier() == "com.apple.dock":
103 |             return app.processIdentifier()
104 |     return None
105 | 
106 | 
107 | def get_menubar_bounds():
108 |     """Get the bounds of the macOS menubar
109 | 
110 |     Returns:
111 |         Dictionary with x, y, width, height of the menubar
112 |     """
113 |     # Get the system-wide accessibility element
114 |     system_element = AXUIElementCreateSystemWide()
115 | 
116 |     # Try to find the menubar
117 |     menubar = element_attribute(system_element, kAXMenuBarAttribute)
118 |     if menubar is None:
119 |         # If we can't get it directly, try through the frontmost app
120 |         frontmost_app = NSWorkspace.sharedWorkspace().frontmostApplication()
121 |         if frontmost_app:
122 |             app_pid = frontmost_app.processIdentifier()
123 |             app_element = AXUIElementCreateApplication(app_pid)
124 |             menubar = element_attribute(app_element, kAXMenuBarAttribute)
125 | 
126 |     if menubar is None:
127 |         print("Error: Could not get menubar")
128 |         # Return default menubar bounds as fallback
129 |         return {"x": 0, "y": 0, "width": 1800, "height": 24}
130 | 
131 |     # Get menubar bounds
132 |     return get_element_bounds(menubar)
133 | 
134 | 
135 | def get_dock_bounds():
136 |     """Get the bounds of the macOS Dock
137 | 
138 |     Returns:
139 |         Dictionary with x, y, width, height of the Dock
140 |     """
141 |     dock_pid = find_dock_process()
142 |     if dock_pid is None:
143 |         print("Error: Could not find Dock process")
144 |         # Return empty bounds as fallback
145 |         return {"x": 0, "y": 0, "width": 0, "height": 0}
146 | 
147 |     # Create an accessibility element for the Dock
148 |     dock_element = AXUIElementCreateApplication(dock_pid)
149 |     if dock_element is None:
150 |         print(f"Error: Could not create accessibility element for Dock (PID {dock_pid})")
151 |         return {"x": 0, "y": 0, "width": 0, "height": 0}
152 | 
153 |     # Get the Dock's children
154 |     children = element_attribute(dock_element, kAXChildrenAttribute)
155 |     if not children or len(children) == 0:
156 |         print("Error: Could not get Dock children")
157 |         return {"x": 0, "y": 0, "width": 0, "height": 0}
158 | 
159 |     # Find the Dock's list (first child is usually the main dock list)
160 |     dock_list = None
161 |     for child in children:
162 |         role = element_attribute(child, kAXRoleAttribute)
163 |         if role == "AXList":
164 |             dock_list = child
165 |             break
166 | 
167 |     if dock_list is None:
168 |         print("Error: Could not find Dock list")
169 |         return {"x": 0, "y": 0, "width": 0, "height": 0}
170 | 
171 |     # Get the bounds of the dock list
172 |     return get_element_bounds(dock_list)
173 | 
174 | 
175 | def get_ui_element_bounds():
176 |     """Get the bounds of important UI elements like menubar and dock
177 | 
178 |     Returns:
179 |         Dictionary with menubar and dock bounds
180 |     """
181 |     menubar_bounds = get_menubar_bounds()
182 |     dock_bounds = get_dock_bounds()
183 | 
184 |     return {"menubar": menubar_bounds, "dock": dock_bounds}
185 | 
186 | 
187 | if __name__ == "__main__":
188 |     # Example usage
189 |     bounds = get_ui_element_bounds()
190 |     print("Menubar bounds:", bounds["menubar"])
191 |     print("Dock bounds:", bounds["dock"])
192 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/macos-vm-cli-playbook/lume/cli-reference.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Lume CLI Reference
  3 | description: Command Line Interface reference for Lume
  4 | ---
  5 | 
  6 | import { Callout } from 'fumadocs-ui/components/callout';
  7 | 
  8 | Once installed, you can start using Lume with these common workflows:
  9 | 
 10 | ### Run a Prebuilt VM
 11 | 
 12 | ```bash
 13 | # Run a macOS Sequoia VM
 14 | lume run macos-sequoia-vanilla:latest
 15 | 
 16 | # Run an Ubuntu VM
 17 | lume run ubuntu-noble-vanilla:latest
 18 | ```
 19 | 
 20 | <Callout>
 21 |   We provide [prebuilt VM images](../lume/prebuilt-images) in our [ghcr
 22 |   registry](https://github.com/orgs/trycua/packages).
 23 | </Callout>
 24 | 
 25 | ### Create a Custom VM
 26 | 
 27 | ```bash
 28 | # Create a new macOS VM
 29 | lume create my-macos-vm --cpu 4 --memory 8GB --disk-size 50GB
 30 | 
 31 | # Create a Linux VM
 32 | lume create my-linux-vm --os linux --cpu 2 --memory 4GB
 33 | ```
 34 | 
 35 | <Callout title="Disk Space">
 36 | The actual disk space used by sparse images will be much lower than the logical size listed. You can resize VM disks after creation using `lume set <name> --disk-size <size>`.
 37 | </Callout>
 38 | 
 39 | ## VM Management
 40 | 
 41 | lume create &lt;name&gt;
 42 | Create a new macOS or Linux virtual machine.
 43 | 
 44 | **Options:**
 45 | 
 46 | - `--os <os>` - Operating system to install (macOS or linux, default: macOS)
 47 | - `--cpu <cores>` - Number of CPU cores (default: 4)
 48 | - `--memory <size>` - Memory size, e.g., 8GB (default: 4GB)
 49 | - `--disk-size <size>` - Disk size, e.g., 50GB (default: 40GB)
 50 | - `--display <res>` - Display resolution (default: 1024x768)
 51 | - `--ipsw <path>` - Path to IPSW file or 'latest' for macOS VMs
 52 | - `--storage <name>` - VM storage location to use
 53 | 
 54 | **Examples:**
 55 | 
 56 | ```bash
 57 | # Create macOS VM with custom specs
 58 | lume create my-mac --cpu 6 --memory 16GB --disk-size 100GB
 59 | 
 60 | # Create Linux VM
 61 | lume create my-ubuntu --os linux --cpu 2 --memory 8GB
 62 | 
 63 | # Create macOS VM with latest IPSW
 64 | lume create my-sequoia --ipsw latest
 65 | ```
 66 | 
 67 | lume run &lt;name&gt;
 68 | Start and run a virtual machine.
 69 | 
 70 | **Options:**
 71 | 
 72 | - `--no-display` - Do not start the VNC client app
 73 | - `--shared-dir <dir>` - Share directory with VM (format: path[:ro|rw])
 74 | - `--mount <path>` - For Linux VMs only, attach a read-only disk image
 75 | - `--registry <url>` - Container registry URL (default: ghcr.io)
 76 | - `--organization <org>` - Organization to pull from (default: trycua)
 77 | - `--vnc-port <port>` - Port to use for the VNC server (default: 0 for auto-assign)
 78 | - `--recovery-mode <boolean>` - For macOS VMs only, start VM in recovery mode (default: false)
 79 | - `--storage <name>` - VM storage location to use
 80 | 
 81 | **Examples:**
 82 | 
 83 | ```bash
 84 | # Run VM with shared directory
 85 | lume run my-vm --shared-dir /path/to/share:rw
 86 | 
 87 | # Run VM without display (headless)
 88 | lume run my-vm --no-display
 89 | 
 90 | # Run macOS VM in recovery mode
 91 | lume run my-mac --recovery-mode true
 92 | ```
 93 | 
 94 | lume stop &lt;name&gt;
 95 | Stop a running virtual machine.
 96 | 
 97 | **Options:**
 98 | 
 99 | - `--storage <name>` - VM storage location to use
100 | 
101 | ### lume delete &lt;name&gt;
102 | 
103 | Delete a virtual machine and its associated files.
104 | 
105 | **Options:**
106 | 
107 | - `--force` - Force deletion without confirmation
108 | - `--storage <name>` - VM storage location to use
109 | 
110 | ### lume clone &lt;name&gt; &lt;new-name&gt;
111 | 
112 | Create a copy of an existing virtual machine.
113 | 
114 | **Options:**
115 | 
116 | - `--source-storage <name>` - Source VM storage location
117 | - `--dest-storage <name>` - Destination VM storage location
118 | 
119 | ## VM Information and Configuration
120 | 
121 | ### lume ls
122 | 
123 | List all virtual machines and their status.
124 | 
125 | ### lume get &lt;name&gt;
126 | 
127 | Get detailed information about a specific virtual machine.
128 | 
129 | **Options:**
130 | 
131 | - `-f, --format <format>` - Output format (json|text)
132 | - `--storage <name>` - VM storage location to use
133 | 
134 | ### lume set &lt;name&gt;
135 | 
136 | Modify virtual machine configuration.
137 | 
138 | **Options:**
139 | 
140 | - `--cpu <cores>` - New number of CPU cores (e.g., 4)
141 | - `--memory <size>` - New memory size (e.g., 8192MB or 8GB)
142 | - `--disk-size <size>` - New disk size (e.g., 40960MB or 40GB)
143 | - `--display <res>` - New display resolution in format WIDTHxHEIGHT (e.g., 1024x768)
144 | - `--storage <name>` - VM storage location to use
145 | 
146 | **Examples:**
147 | 
148 | ```bash
149 | # Increase VM memory
150 | lume set my-vm --memory 16GB
151 | 
152 | # Change display resolution
153 | lume set my-vm --display 1920x1080
154 | 
155 | # Add more CPU cores
156 | lume set my-vm --cpu 8
157 | ```
158 | 
159 | ## Image Management
160 | 
161 | ### lume images
162 | 
163 | List available macOS images in local cache.
164 | 
165 | ### lume pull &lt;image&gt;
166 | 
167 | Download a VM image from a container registry.
168 | 
169 | **Options:**
170 | 
171 | - `--registry <url>` - Container registry URL (default: ghcr.io)
172 | - `--organization <org>` - Organization to pull from (default: trycua)
173 | - `--storage <name>` - VM storage location to use
174 | 
175 | ### lume push &lt;name&gt; &lt;image:tag&gt;
176 | 
177 | Upload a VM image to a container registry.
178 | 
179 | **Options:**
180 | 
181 | - `--additional-tags <tags...>` - Additional tags to push the same image to
182 | - `--registry <url>` - Container registry URL (default: ghcr.io)
183 | - `--organization <org>` - Organization/user to push to (default: trycua)
184 | - `--storage <name>` - VM storage location to use
185 | - `--chunk-size-mb <size>` - Chunk size for disk image upload in MB (default: 512)
186 | - `--verbose` - Enable verbose logging
187 | - `--dry-run` - Prepare files and show plan without uploading
188 | - `--reassemble` - Verify integrity by reassembling chunks (requires --dry-run)
189 | 
190 | ### lume ipsw
191 | 
192 | Get the latest macOS restore image URL.
193 | 
194 | ### lume prune
195 | 
196 | Remove cached images to free up disk space.
197 | 
198 | ## Configuration
199 | 
200 | ### lume config
201 | 
202 | Manage Lume configuration settings.
203 | 
204 | **Subcommands:**
205 | 
206 | ##### Storage Management
207 | 
208 | - `lume config storage add <name> <path>` - Add a new VM storage location
209 | - `lume config storage remove <name>` - Remove a VM storage location
210 | - `lume config storage list` - List all VM storage locations
211 | - `lume config storage default <name>` - Set the default VM storage location
212 | 
213 | ##### Cache Management
214 | 
215 | - `lume config cache get` - Get current cache directory
216 | - `lume config cache set <path>` - Set cache directory
217 | 
218 | ##### Image Caching
219 | 
220 | - `lume config caching get` - Show current caching status
221 | - `lume config caching set <boolean>` - Enable or disable image caching
222 | 
223 | ## API Server
224 | 
225 | ### lume serve
226 | 
227 | Start the Lume API server for programmatic access.
228 | 
229 | **Options:**
230 | 
231 | - `--port <port>` - Port to listen on (default: 7777)
232 | 
233 | ## Global Options
234 | 
235 | These options are available for all commands:
236 | 
237 | - `--help` - Show help information
238 | - `--version` - Show version number
239 | 
```

--------------------------------------------------------------------------------
/libs/lumier/src/lib/utils.sh:
--------------------------------------------------------------------------------

```bash
  1 | #!/usr/bin/env bash
  2 | 
  3 | # Function to wait for SSH to become available
  4 | wait_for_ssh() {
  5 |     local host_ip=$1
  6 |     local user=$2
  7 |     local password=$3
  8 |     local retry_interval=${4:-5}   # Default retry interval is 5 seconds
  9 |     local max_retries=${5:-20}    # Default maximum retries is 20 (0 for infinite)
 10 | 
 11 |     # Only show waiting message in debug mode
 12 |     if [ "${LUMIER_DEBUG:-0}" == "1" ]; then
 13 |         echo "Waiting for SSH to become available on $host_ip..."
 14 |     fi
 15 | 
 16 |     local retry_count=0
 17 |     while true; do
 18 |         # Try to connect via SSH
 19 |         # Add -q for completely silent operation, redirect stderr to /dev/null
 20 |         sshpass -p "$password" ssh -q -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR "$user@$host_ip" "exit" 2>/dev/null
 21 | 
 22 |         # Check the exit status of the SSH command
 23 |         if [ $? -eq 0 ]; then
 24 |             echo "SSH is ready on $host_ip!"
 25 |             return 0
 26 |         fi
 27 | 
 28 |         # Increment retry count
 29 |         ((retry_count++))
 30 |         
 31 |         # Exit if maximum retries are reached
 32 |         if [ $max_retries -ne 0 ] && [ $retry_count -ge $max_retries ]; then
 33 |             echo "Maximum retries reached. SSH is not available."
 34 |             return 1
 35 |         fi
 36 | 
 37 |         # Only show retry messages in debug mode
 38 |         if [ "${LUMIER_DEBUG:-0}" == "1" ]; then
 39 |             echo "SSH not ready. Retrying in $retry_interval seconds... (Attempt $retry_count)"
 40 |         fi
 41 |         sleep $retry_interval
 42 |     done
 43 | }
 44 | 
 45 | # Function to execute a script on a remote server using sshpass
 46 | execute_remote_script() {
 47 |     local host="$1"
 48 |     local user="$2"
 49 |     local password="$3"
 50 |     local script_path="$4"
 51 |     local vnc_password="$5"
 52 |     local data_folder="$6"
 53 | 
 54 |     # Check if all required arguments are provided
 55 |     if [ -z "$host" ] || [ -z "$user" ] || [ -z "$password" ] || [ -z "$script_path" ] || [ -z "$vnc_password" ]; then
 56 |         echo "Usage: execute_remote_script <host> <user> <password> <script_path> <vnc_password> [data_folder]"
 57 |         return 1
 58 |     fi
 59 | 
 60 |     # Only show VNC info in debug mode
 61 |     if [ "${LUMIER_DEBUG:-0}" == "1" ]; then
 62 |         echo "VNC password exported to VM: $vnc_password"
 63 |     fi
 64 | 
 65 |     # Set the shared folder path for the VM
 66 |     if [ -n "$data_folder" ]; then
 67 |         # VM always sees shared folders at this path, regardless of container path
 68 |         shared_folder_path="/Volumes/My Shared Files"
 69 |         
 70 |         # Only show path in debug mode
 71 |         if [ "${LUMIER_DEBUG:-0}" == "1" ]; then
 72 |             echo "Data folder path in VM: $shared_folder_path"
 73 |         fi
 74 |     else
 75 |         shared_folder_path=""
 76 |     fi
 77 | 
 78 |     # Read the script content and prepend the shebang
 79 |     script_content="#!/usr/bin/env bash\n"
 80 |     # Always export VNC_PASSWORD
 81 |     script_content+="export VNC_PASSWORD='$vnc_password'\n"
 82 |     # Export SHARED_FOLDER_PATH only if we have a data folder path
 83 |     if [ -n "$shared_folder_path" ]; then
 84 |         script_content+="export SHARED_FOLDER_PATH='$shared_folder_path'\n"
 85 |     fi
 86 |     # Pass debug setting to the VM
 87 |     script_content+="export VNC_DEBUG='${LUMIER_DEBUG:-0}'\n"
 88 |     
 89 |     # Add debug messages only if debug mode is enabled
 90 |     if [[ "${LUMIER_DEBUG:-0}" == "1" ]]; then
 91 |         script_content+="echo \"[DEBUG] Starting on-logon script execution...\"\n"
 92 |     fi
 93 |     
 94 |     # Add the original script content
 95 |     script_content+="$(<"$script_path")"
 96 |     
 97 |     # Add debug messages only if debug mode is enabled
 98 |     if [[ "${LUMIER_DEBUG:-0}" == "1" ]]; then
 99 |         script_content+="\necho \"[DEBUG] Finished executing on-logon script.\"\n"
100 |     fi
101 |     
102 |     # Print debug info only when debug mode is enabled
103 |     if [[ "${LUMIER_DEBUG:-0}" == "1" ]]; then
104 |         echo "[DEBUG] Executing remote script with content length: $(echo -n "$script_content" | wc -c) bytes"
105 |         echo "[DEBUG] Script path: $script_path"
106 |     fi
107 |     
108 |     # Use a here-document to send the script content
109 |     # We'll capture both stdout and stderr when debug is enabled
110 |     if [[ "${LUMIER_DEBUG:-0}" == "1" ]]; then
111 |         echo "[DEBUG] Connecting to $user@$host to execute script..."
112 |         sshpass -p "$password" ssh -q -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR "$user@$host" "bash -s -- '$vnc_password' '$data_folder'" 2>&1 <<EOF
113 | $script_content
114 | EOF
115 |     else
116 |         # Otherwise run quietly
117 |         sshpass -p "$password" ssh -q -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR "$user@$host" "bash -s -- '$vnc_password' '$data_folder'" 2>/dev/null <<EOF
118 | $script_content
119 | EOF
120 |     fi
121 | 
122 |     # Print completion message only in debug mode
123 |     if [[ "${LUMIER_DEBUG:-0}" == "1" ]]; then
124 |         echo "[DEBUG] Script execution completed."
125 |     fi
126 | 
127 |     # Check the exit status of the sshpass command
128 |     if [ $? -ne 0 ]; then
129 |         echo "Failed to execute script on remote host $host."
130 |         return 1
131 |     fi
132 | }
133 | 
134 | extract_json_field() {
135 |     local field_name=$1
136 |     local input=$2
137 |     local result=""
138 |     
139 |     # First attempt with jq if available (most reliable JSON parsing)
140 |     if command -v jq &> /dev/null; then
141 |         # Use jq for reliable JSON parsing
142 |         result=$(echo "$input" | jq -r ".$field_name // empty" 2>/dev/null)
143 |         if [[ -n "$result" ]]; then
144 |             echo "$result"
145 |             return 0
146 |         fi
147 |     fi
148 |     
149 |     # Fallback to grep-based approach with improvements
150 |     # First try for quoted string values
151 |     result=$(echo "$input" | tr -d '\n' | grep -o "\"$field_name\"\s*:\s*\"[^\"]*\"" | sed -E 's/.*":\s*"(.*)"$/\1/')
152 |     if [[ -n "$result" ]]; then
153 |         echo "$result"
154 |         return 0
155 |     fi
156 |     
157 |     # Try for non-quoted values (numbers, true, false, null)
158 |     result=$(echo "$input" | tr -d '\n' | grep -o "\"$field_name\"\s*:\s*[^,}]*" | sed -E 's/.*":\s*(.*)$/\1/')
159 |     if [[ -n "$result" ]]; then
160 |         echo "$result"
161 |         return 0
162 |     fi
163 |     
164 |     # Return empty string if field not found
165 |     echo ""
166 | }
167 | 
168 | extract_json_field_from_file() {
169 |     local field_name=$1
170 |     local json_file=$2
171 |     local json_text
172 |     json_text=$(<"$json_file")
173 |     extract_json_field "$field_name" "$json_text"
174 | }
175 | 
176 | extract_json_field_from_text() {
177 |     local field_name=$1
178 |     local json_text=$2
179 |     extract_json_field "$field_name" "$json_text"
180 | }
181 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/benchmarks/ss-pro.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | ScreenSpot-Pro Benchmark Script
  4 | 
  5 | Evaluates models on the ScreenSpot-Pro dataset for click prediction accuracy.
  6 | Supports both ComputerAgent model strings and custom model classes.
  7 | """
  8 | 
  9 | import argparse
 10 | import asyncio
 11 | import random
 12 | import statistics
 13 | import time
 14 | from typing import Optional
 15 | 
 16 | from datasets import load_dataset
 17 | from tqdm import tqdm
 18 | from utils import (
 19 |     ModelWrapper,
 20 |     get_available_models,
 21 |     get_gpu_memory,
 22 |     is_click_in_bbox,
 23 |     save_results_to_markdown,
 24 |     save_visualizations,
 25 | )
 26 | 
 27 | 
 28 | async def evaluate_model(
 29 |     model_wrapper: ModelWrapper, dataset, max_samples: Optional[int] = None
 30 | ) -> dict:
 31 |     """
 32 |     Evaluate a model on the ScreenSpot-Pro dataset.
 33 | 
 34 |     Args:
 35 |         model_wrapper: ModelWrapper instance
 36 |         dataset: ScreenSpot-Pro dataset (list of samples)
 37 |         max_samples: Maximum number of samples to evaluate (None for all)
 38 | 
 39 |     Returns:
 40 |         Dictionary with evaluation results
 41 |     """
 42 |     print(f"\nEvaluating model: {model_wrapper.model_name}")
 43 | 
 44 |     # Load model
 45 |     await model_wrapper.load_model()
 46 | 
 47 |     total_samples = len(dataset)
 48 |     if max_samples is not None:
 49 |         total_samples = min(max_samples, total_samples)
 50 | 
 51 |     correct_predictions = 0
 52 |     error_predictions = 0
 53 |     results = []
 54 | 
 55 |     for i in tqdm(range(total_samples), desc=f"Evaluating {model_wrapper.model_name}"):
 56 |         sample = dataset[i]
 57 | 
 58 |         # Extract sample data
 59 |         image = sample["image"]
 60 |         instruction = sample["instruction"]
 61 |         bbox = sample["bbox"]  # [x1, y1, x2, y2]
 62 |         sample_id = sample["img_filename"]
 63 | 
 64 |         # Predict click coordinates with timing
 65 |         start_time = time.time()
 66 |         click_coords = await model_wrapper.predict_click(image, instruction)
 67 |         prediction_time = time.time() - start_time
 68 | 
 69 |         # Check if prediction is correct
 70 |         is_correct = is_click_in_bbox(click_coords, bbox)
 71 | 
 72 |         if is_correct:
 73 |             correct_predictions += 1
 74 | 
 75 |         results.append(
 76 |             {
 77 |                 "id": sample_id,
 78 |                 "instruction": instruction,
 79 |                 "bbox": bbox,
 80 |                 "predicted_coords": click_coords,
 81 |                 "is_correct": is_correct,
 82 |                 "failed": False,
 83 |                 "prediction_time": prediction_time,
 84 |             }
 85 |         )
 86 | 
 87 |     # Unload model
 88 |     await model_wrapper.unload_model()
 89 | 
 90 |     # Calculate metrics
 91 |     accuracy = correct_predictions / total_samples if total_samples > 0 else 0.0
 92 |     error_rate = error_predictions / total_samples if total_samples > 0 else 0.0
 93 | 
 94 |     # Calculate timing statistics
 95 |     successful_times = [r["prediction_time"] for r in results if not r["failed"]]
 96 |     avg_prediction_time = sum(successful_times) / len(successful_times) if successful_times else 0.0
 97 |     median_prediction_time = statistics.median(successful_times) if successful_times else 0.0
 98 |     min_prediction_time = min(successful_times) if successful_times else 0.0
 99 |     max_prediction_time = max(successful_times) if successful_times else 0.0
100 | 
101 |     # Get VRAM statistics
102 |     vram_stats = model_wrapper.get_vram_stats()
103 | 
104 |     return {
105 |         "model_name": model_wrapper.model_name,
106 |         "total_samples": total_samples,
107 |         "correct_predictions": correct_predictions,
108 |         "failed_predictions": error_predictions,
109 |         "accuracy": accuracy,
110 |         "failure_rate": error_rate,
111 |         "avg_prediction_time": avg_prediction_time,
112 |         "median_prediction_time": median_prediction_time,
113 |         "min_prediction_time": min_prediction_time,
114 |         "max_prediction_time": max_prediction_time,
115 |         "vram_max_mb": vram_stats["max_mb"],
116 |         "vram_avg_mb": vram_stats["avg_mb"],
117 |         "results": results,
118 |     }
119 | 
120 | 
121 | async def main():
122 |     """
123 |     Main function to run the benchmark.
124 |     """
125 |     # Parse command line arguments
126 |     parser = argparse.ArgumentParser(description="ScreenSpot-Pro Benchmark Script")
127 |     parser.add_argument(
128 |         "--samples", type=int, default=300, help="Number of samples to evaluate (default: 300)"
129 |     )
130 |     parser.add_argument(
131 |         "--seed", type=int, default=42, help="Random seed for shuffling (default: 42)"
132 |     )
133 |     args = parser.parse_args()
134 | 
135 |     # Set random seed
136 |     random.seed(args.seed)
137 | 
138 |     # Load dataset
139 |     print("Loading ScreenSpot-Pro dataset...")
140 |     ds = load_dataset("lmms-lab/ScreenSpot-Pro")
141 |     dataset = ds["train"]  # type: ignore
142 |     # Convert to list to support indexing
143 |     dataset_list = list(dataset)
144 |     print(f"Dataset loaded: {len(dataset_list)} samples")
145 | 
146 |     # Shuffle dataset with seed
147 |     random.shuffle(dataset_list)
148 |     print(f"Dataset shuffled with seed {args.seed}")
149 | 
150 |     # Get available models
151 |     models = get_available_models()
152 | 
153 |     # Evaluation settings
154 |     max_samples = args.samples  # Use command line argument
155 | 
156 |     # Run evaluations
157 |     all_results = []
158 | 
159 |     for model in models:
160 |         model_wrapper = ModelWrapper(model)
161 |         result = await evaluate_model(model_wrapper, dataset_list, max_samples)
162 |         all_results.append(result)
163 | 
164 |         # Print summary
165 |         print(f"\n{result['model_name']} Results:")
166 |         print(f"  Accuracy: {result['accuracy']*100:.2f}%")
167 |         print(f"  Correct: {result['correct_predictions']}/{result['total_samples']}")
168 |         print(f"  Errors: {result['failed_predictions']}")
169 |         print(f"  Error Rate: {result['failure_rate']*100:.2f}%")
170 |         print(f"  Avg Time: {result['avg_prediction_time']:.2f}s")
171 |         print(f"  Median Time: {result['median_prediction_time']:.2f}s")
172 |         print(
173 |             f"  Time Range: {result['min_prediction_time']:.2f}s - {result['max_prediction_time']:.2f}s"
174 |         )
175 |         print(f"  VRAM Max: {result['vram_max_mb']:.1f}MB")
176 |         print(f"  VRAM Avg: {result['vram_avg_mb']:.1f}MB")
177 | 
178 |         # Print GPU memory info
179 |         gpu_memory = get_gpu_memory()
180 |         if gpu_memory and gpu_memory[0] > 0:
181 |             print(f"  GPU Free Memory: {gpu_memory[0]:.1f}MB")
182 | 
183 |     # Save results
184 |     if all_results:
185 |         save_results_to_markdown(all_results)
186 |         save_visualizations(all_results, dataset_list)
187 |         print("\nBenchmark completed successfully!")
188 |     else:
189 |         print("\nNo successful evaluations completed.")
190 | 
191 | 
192 | if __name__ == "__main__":
193 |     asyncio.run(main())
194 | 
```

--------------------------------------------------------------------------------
/libs/lume/src/FileSystem/VMDirectory.swift:
--------------------------------------------------------------------------------

```swift
  1 | import Foundation
  2 | 
  3 | // MARK: - VMDirectory
  4 | 
  5 | /// Manages a virtual machine's directory structure and files
  6 | /// Responsible for:
  7 | /// - Managing VM configuration files
  8 | /// - Handling disk operations
  9 | /// - Managing VM state and locking
 10 | /// - Providing access to VM-related paths
 11 | struct VMDirectory: Sendable {
 12 |     // MARK: - Constants
 13 |     
 14 |     private enum FileNames {
 15 |         static let nvram = "nvram.bin"
 16 |         static let disk = "disk.img"
 17 |         static let config = "config.json"
 18 |         static let sessions = "sessions.json"
 19 |     }
 20 |     
 21 |     // MARK: - Properties
 22 |     
 23 |     let dir: Path
 24 |     let nvramPath: Path
 25 |     let diskPath: Path
 26 |     let configPath: Path
 27 |     let sessionsPath: Path
 28 |     
 29 |     /// The name of the VM directory
 30 |     var name: String { dir.name }
 31 |     
 32 |     // MARK: - Initialization
 33 |     
 34 |     /// Creates a new VMDirectory instance
 35 |     /// - Parameters:
 36 |     ///   - dir: The base directory path for the VM
 37 |     init(_ dir: Path) {
 38 |         self.dir = dir
 39 |         self.nvramPath = dir.file(FileNames.nvram)
 40 |         self.diskPath = dir.file(FileNames.disk)
 41 |         self.configPath = dir.file(FileNames.config)
 42 |         self.sessionsPath = dir.file(FileNames.sessions)
 43 |     }
 44 | }
 45 | 
 46 | // MARK: - VM State Management
 47 | 
 48 | extension VMDirectory {
 49 |     /// Checks if the VM directory is fully initialized with all required files
 50 |     func initialized() -> Bool {
 51 |         // Add detailed logging for debugging
 52 |         let configExists = configPath.exists()
 53 |         let diskExists = diskPath.exists()
 54 |         let nvramExists = nvramPath.exists()
 55 |         
 56 |         // Logger.info(
 57 |         //     "VM directory initialization check", 
 58 |         //     metadata: [
 59 |         //         "directory": dir.path,
 60 |         //         "config_path": configPath.path,
 61 |         //         "config_exists": "\(configExists)",
 62 |         //         "disk_path": diskPath.path,
 63 |         //         "disk_exists": "\(diskExists)",
 64 |         //         "nvram_path": nvramPath.path,
 65 |         //         "nvram_exists": "\(nvramExists)"
 66 |         //     ]
 67 |         // )
 68 |         
 69 |         return configExists && diskExists && nvramExists
 70 |     }
 71 | 
 72 |     /// Checks if the VM directory exists
 73 |     func exists() -> Bool {
 74 |         dir.exists()
 75 |     }
 76 | }
 77 | 
 78 | // MARK: - Disk Management
 79 | 
 80 | extension VMDirectory {
 81 |     /// Resizes the VM's disk to the specified size
 82 |     /// - Parameter size: The new size in bytes
 83 |     /// - Throws: VMDirectoryError if the disk operation fails
 84 |     func setDisk(_ size: UInt64) throws {
 85 |         do {
 86 |             if !diskPath.exists() {
 87 |                 guard FileManager.default.createFile(atPath: diskPath.path, contents: nil) else {
 88 |                     throw VMDirectoryError.fileCreationFailed(diskPath.path)
 89 |                 }
 90 |             }
 91 |             
 92 |             let handle = try FileHandle(forWritingTo: diskPath.url)
 93 |             defer { try? handle.close() }
 94 |             
 95 |             try handle.truncate(atOffset: size)
 96 |         } catch {
 97 |         }
 98 |     }
 99 | }
100 | 
101 | // MARK: - Configuration Management
102 | 
103 | extension VMDirectory {
104 |     /// Saves the VM configuration to disk
105 |     /// - Parameter config: The configuration to save
106 |     /// - Throws: VMDirectoryError if the save operation fails
107 |     func saveConfig(_ config: VMConfig) throws {
108 |         let encoder = JSONEncoder()
109 |         encoder.outputFormatting = .prettyPrinted
110 |         
111 |         do {
112 |             let data = try encoder.encode(config)
113 |             guard FileManager.default.createFile(atPath: configPath.path, contents: data) else {
114 |                 throw VMDirectoryError.fileCreationFailed(configPath.path)
115 |             }
116 |         } catch {
117 |             throw VMDirectoryError.invalidConfigData
118 |         }
119 |     }
120 | 
121 |     /// Loads the VM configuration from disk
122 |     /// - Returns: The loaded configuration
123 |     /// - Throws: VMDirectoryError if the load operation fails
124 |     func loadConfig() throws -> VMConfig {
125 |         guard let data = FileManager.default.contents(atPath: configPath.path) else {
126 |             throw VMDirectoryError.configNotFound
127 |         }
128 |         
129 |         do {
130 |             let decoder = JSONDecoder()
131 |             return try decoder.decode(VMConfig.self, from: data)
132 |         } catch {
133 |             throw VMDirectoryError.invalidConfigData
134 |         }
135 |     }
136 | }
137 | 
138 | // MARK: - VNC Session Management
139 | 
140 | struct VNCSession: Codable {
141 |     let url: String
142 |     let sharedDirectories: [SharedDirectory]?
143 |     
144 |     init(url: String, sharedDirectories: [SharedDirectory]? = nil) {
145 |         self.url = url
146 |         self.sharedDirectories = sharedDirectories
147 |     }
148 | }
149 | 
150 | extension VMDirectory {
151 |     /// Saves VNC session information to disk
152 |     /// - Parameters:
153 |     ///   - session: The VNC session to save
154 |     ///   - sharedDirectories: Optional array of shared directories to save with the session
155 |     /// - Throws: VMDirectoryError if the save operation fails
156 |     func saveSession(_ session: VNCSession) throws {
157 |         let encoder = JSONEncoder()
158 |         encoder.outputFormatting = .prettyPrinted
159 |         
160 |         do {
161 |             let data = try encoder.encode(session)
162 |             guard FileManager.default.createFile(atPath: sessionsPath.path, contents: data) else {
163 |                 throw VMDirectoryError.fileCreationFailed(sessionsPath.path)
164 |             }
165 |         } catch {
166 |             throw VMDirectoryError.invalidSessionData
167 |         }
168 |     }
169 |     
170 |     /// Loads the VNC session information from disk
171 |     /// - Returns: The loaded VNC session
172 |     /// - Throws: VMDirectoryError if the load operation fails
173 |     func loadSession() throws -> VNCSession {
174 |         guard let data = FileManager.default.contents(atPath: sessionsPath.path) else {
175 |             throw VMDirectoryError.sessionNotFound
176 |         }
177 |         
178 |         do {
179 |             let decoder = JSONDecoder()
180 |             return try decoder.decode(VNCSession.self, from: data)
181 |         } catch {
182 |             throw VMDirectoryError.invalidSessionData
183 |         }
184 |     }
185 |     
186 |     /// Removes the VNC session information from disk
187 |     func clearSession() {
188 |         try? FileManager.default.removeItem(atPath: sessionsPath.path)
189 |     }
190 | }
191 | 
192 | // MARK: - CustomStringConvertible
193 | extension VMDirectory: CustomStringConvertible {
194 |     var description: String {
195 |         "VMDirectory(path: \(dir.path))"
196 |     }
197 | }
198 | 
199 | extension VMDirectory {
200 |     func delete() throws {
201 |         try FileManager.default.removeItem(atPath: dir.path)
202 |     }
203 | }
204 | 
```

--------------------------------------------------------------------------------
/.github/workflows/npm-publish-cli.yml:
--------------------------------------------------------------------------------

```yaml
  1 | name: Publish @trycua/cli
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       version:
  7 |         description: "Version to publish (default: from package.json)"
  8 |         required: false
  9 |         default: ""
 10 | 
 11 | jobs:
 12 |   build-and-publish:
 13 |     permissions:
 14 |       id-token: write
 15 |       contents: write
 16 |       packages: write
 17 | 
 18 |     strategy:
 19 |       matrix:
 20 |         include:
 21 |           - target: bun-linux-x64
 22 |             ext: ""
 23 |             binary_name: cua-linux-x64
 24 |           - target: bun-darwin-x64
 25 |             ext: ""
 26 |             binary_name: cua-darwin-x64
 27 |           - target: bun-darwin-arm64
 28 |             ext: ""
 29 |             binary_name: cua-darwin-arm64
 30 |           - target: bun-windows-x64
 31 |             ext: ".exe"
 32 |             binary_name: cua-windows-x64
 33 | 
 34 |     runs-on: ubuntu-latest
 35 | 
 36 |     steps:
 37 |       - name: Checkout code
 38 |         uses: actions/checkout@v4
 39 |         with:
 40 |           fetch-depth: 0
 41 | 
 42 |       - name: Setup Bun
 43 |         uses: oven-sh/setup-bun@v2
 44 |         with:
 45 |           bun-version: latest
 46 | 
 47 |       - name: Get version
 48 |         id: version
 49 |         run: |
 50 |           if [ -n "${{ github.event.inputs.version }}" ]; then
 51 |             echo "version=${{ github.event.inputs.version }}" >> $GITHUB_OUTPUT
 52 |           else
 53 |             VERSION=$(bun -p "require('./libs/typescript/cua-cli/package.json').version")
 54 |             echo "version=${VERSION}" >> $GITHUB_OUTPUT
 55 |           fi
 56 | 
 57 |       - name: Install dependencies
 58 |         working-directory: ./libs/typescript/cua-cli
 59 |         run: bun install --frozen-lockfile
 60 | 
 61 |       - name: Build binary
 62 |         working-directory: ./libs/typescript/cua-cli
 63 |         run: |
 64 |           bun build --compile --minify --sourcemap --target=${{ matrix.target }} index.ts --outfile ${{ matrix.binary_name }}${{ matrix.ext }}
 65 |           mkdir -p ../../../dist
 66 |           mv ${{ matrix.binary_name }}${{ matrix.ext }}* ../../../dist/
 67 | 
 68 |       - name: Upload artifacts
 69 |         uses: actions/upload-artifact@v4
 70 |         with:
 71 |           name: cua-binary-${{ matrix.target }}
 72 |           path: dist/
 73 |           if-no-files-found: error
 74 |           retention-days: 1
 75 | 
 76 |   publish-npm:
 77 |     needs: build-and-publish
 78 |     if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/cua-v')
 79 |     runs-on: ubuntu-latest
 80 |     steps:
 81 |       - name: Checkout code
 82 |         uses: actions/checkout@v4
 83 | 
 84 |       - name: Setup Bun
 85 |         uses: oven-sh/setup-bun@v2
 86 |         with:
 87 |           bun-version: latest
 88 | 
 89 |       - name: Install dependencies
 90 |         working-directory: ./libs/typescript/cua-cli
 91 |         run: bun install --frozen-lockfile
 92 | 
 93 |       - name: Publish to npm
 94 |         working-directory: ./libs/typescript/cua-cli
 95 |         env:
 96 |           NPM_CONFIG_TOKEN: ${{ secrets.NPM_TOKEN }}
 97 |         run: bun publish --production --access public --tolerate-republish
 98 | 
 99 |   create-release:
100 |     needs: [build-and-publish, publish-npm]
101 |     runs-on: ubuntu-latest
102 |     permissions:
103 |       contents: write
104 |     steps:
105 |       - name: Checkout code
106 |         uses: actions/checkout@v4
107 | 
108 |       - name: Setup Bun
109 |         uses: oven-sh/setup-bun@v2
110 |         with:
111 |           bun-version: latest
112 | 
113 |       - name: Get version
114 |         id: version
115 |         run: |
116 |           VERSION=$(bun -p "require('./libs/typescript/cua-cli/package.json').version")
117 |           echo "version=${VERSION}" >> $GITHUB_OUTPUT
118 |           echo "tag=cua-v${VERSION}" >> $GITHUB_OUTPUT
119 | 
120 |       - name: Download all artifacts
121 |         uses: actions/download-artifact@v4
122 |         with:
123 |           path: dist
124 |           merge-multiple: true
125 | 
126 |       - name: Create Release
127 |         id: create_release
128 |         uses: actions/create-release@v1
129 |         env:
130 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
131 |         with:
132 |           tag_name: ${{ steps.version.outputs.tag }}
133 |           release_name: cua-cli v${{ steps.version.outputs.version }}
134 |           body: |
135 |             # cua-cli v${{ steps.version.outputs.version }}
136 | 
137 |             ## Installation
138 | 
139 |             ### Using install script (recommended)
140 |             ```bash
141 |             # For Linux/macOS
142 |             curl -fsSL https://cua.ai/cli/install.sh | sh
143 | 
144 |             # For Windows (PowerShell)
145 |             irm https://cua.ai/cli/install.ps1 | iex
146 |             ```
147 | 
148 |             ### Using npm/bun
149 |             ```bash
150 |             # Using bun
151 |             bun add -g @trycua/cli
152 | 
153 |             # Or using npm
154 |             npm install -g @trycua/cli
155 |             ```
156 | 
157 |             ### From source
158 |             ```bash
159 |             git clone -b ${{ steps.version.outputs.tag }} https://github.com/trycua/cua.git
160 |             cd cua/libs/typescript/cua-cli
161 |             bun install
162 |             bun link
163 |             bun link cua-cli
164 |             ```
165 | 
166 |             ## Release Assets
167 |             - `cua-darwin-arm64`: macOS (Apple Silicon)
168 |             - `cua-darwin-x64`: macOS (Intel)
169 |             - `cua-linux-x64`: Linux (x86_64)
170 |             - `cua-windows-x64.exe`: Windows (x86_64)
171 |           draft: false
172 |           prerelease: false
173 | 
174 |       - name: Upload Linux Binary
175 |         uses: actions/upload-release-asset@v1
176 |         with:
177 |           upload_url: ${{ steps.create_release.outputs.upload_url }}
178 |           asset_path: ./dist/cua-linux-x64
179 |           asset_name: cua-linux-x64
180 |           asset_content_type: application/octet-stream
181 |         env:
182 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
183 | 
184 |       - name: Upload macOS Intel Binary
185 |         uses: actions/upload-release-asset@v1
186 |         with:
187 |           upload_url: ${{ steps.create_release.outputs.upload_url }}
188 |           asset_path: ./dist/cua-darwin-x64
189 |           asset_name: cua-darwin-x64
190 |           asset_content_type: application/octet-stream
191 |         env:
192 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
193 | 
194 |       - name: Upload macOS Apple Silicon Binary
195 |         uses: actions/upload-release-asset@v1
196 |         with:
197 |           upload_url: ${{ steps.create_release.outputs.upload_url }}
198 |           asset_path: ./dist/cua-darwin-arm64
199 |           asset_name: cua-darwin-arm64
200 |           asset_content_type: application/octet-stream
201 |         env:
202 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
203 | 
204 |       - name: Upload Windows Binary
205 |         uses: actions/upload-release-asset@v1
206 |         with:
207 |           upload_url: ${{ steps.create_release.outputs.upload_url }}
208 |           asset_path: ./dist/cua-windows-x64.exe
209 |           asset_name: cua-windows-x64.exe
210 |           asset_content_type: application/octet-stream
211 |         env:
212 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
213 | 
```

--------------------------------------------------------------------------------
/libs/xfce/Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
  1 | # CUA Docker XFCE Container
  2 | # Vanilla XFCE desktop with noVNC and computer-server
  3 | 
  4 | FROM ubuntu:22.04
  5 | 
  6 | # Avoid prompts from apt
  7 | ENV DEBIAN_FRONTEND=noninteractive
  8 | 
  9 | # Set environment variables
 10 | ENV HOME=/home/cua
 11 | ENV DISPLAY=:1
 12 | ENV VNC_PORT=5901
 13 | ENV NOVNC_PORT=6901
 14 | ENV API_PORT=8000
 15 | ENV VNC_RESOLUTION=1024x768
 16 | ENV VNC_COL_DEPTH=24
 17 | 
 18 | # Install system dependencies first (including sudo)
 19 | RUN apt-get update && apt-get install -y \
 20 |     # System utilities
 21 |     sudo \
 22 |     unzip \
 23 |     zip \
 24 |     xdg-utils \
 25 |     gcc \
 26 |     # Qt/XCB runtime deps for PyQt5 (libqxcb.so)
 27 |     libxcb-icccm4 \
 28 |     libxcb-image0 \
 29 |     libxcb-keysyms1 \
 30 |     libxcb-render-util0 \
 31 |     libxcb-xinerama0 \
 32 |     libxcb-shape0 \
 33 |     libxcb-randr0 \
 34 |     libxcb-xfixes0 \
 35 |     libxcb-sync1 \
 36 |     libxcb-util1 \
 37 |     libxcb-cursor0 \
 38 |     libxkbcommon-x11-0 \
 39 |     # Desktop environment
 40 |     xfce4 \
 41 |     xfce4-terminal \
 42 |     dbus-x11 \
 43 |     # VNC server
 44 |     tigervnc-standalone-server \
 45 |     tigervnc-common \
 46 |     # noVNC dependencies
 47 |     # python will be installed via deadsnakes as 3.12 \
 48 |     git \
 49 |     net-tools \
 50 |     netcat \
 51 |     supervisor \
 52 |     # Computer-server dependencies
 53 |     # python-tk/dev for 3.12 will be installed later \
 54 |     gnome-screenshot \
 55 |     wmctrl \
 56 |     ffmpeg \
 57 |     socat \
 58 |     xclip \
 59 |     # Browser
 60 |     wget \
 61 |     software-properties-common \
 62 |     # Build tools
 63 |     build-essential \
 64 |     libncursesw5-dev \
 65 |     libssl-dev \
 66 |     libsqlite3-dev \
 67 |     tk-dev \
 68 |     libgl1-mesa-dev \
 69 |     libgdbm-dev \
 70 |     libc6-dev \
 71 |     libbz2-dev \
 72 |     libffi-dev \
 73 |     zlib1g-dev \
 74 |     && rm -rf /var/lib/apt/lists/*
 75 | 
 76 | # Install Python 3.12 from deadsnakes (keep system python3 for apt)
 77 | RUN add-apt-repository -y ppa:deadsnakes/ppa && \
 78 |     apt-get update && apt-get install -y \
 79 |     python3.12 python3.12-venv python3.12-dev python3.12-tk \
 80 |     && \
 81 |     python3.12 -m ensurepip --upgrade && \
 82 |     python3.12 -m pip install --upgrade pip setuptools wheel && \
 83 |     rm -rf /var/lib/apt/lists/*
 84 | 
 85 | # Ensure 'python' points to Python 3.12
 86 | RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 2
 87 | 
 88 | # Remove screensavers and power manager to avoid popups and lock screens
 89 | RUN apt-get remove -y \
 90 |     xfce4-power-manager \
 91 |     xfce4-power-manager-data \
 92 |     xfce4-power-manager-plugins \
 93 |     xfce4-screensaver \
 94 |     light-locker \
 95 |     xscreensaver \
 96 |     xscreensaver-data || true
 97 | 
 98 | # Create user after sudo is installed
 99 | RUN useradd -m -s /bin/bash -G sudo cua && \
100 |     echo "cua:cua" | chpasswd && \
101 |     echo "cua ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
102 | 
103 | # Install Firefox from Mozilla PPA (snap-free) - inline to avoid script issues
104 | RUN apt-get update && \
105 |     add-apt-repository -y ppa:mozillateam/ppa && \
106 |     echo 'Package: *\nPin: release o=LP-PPA-mozillateam\nPin-Priority: 1001' > /etc/apt/preferences.d/mozilla-firefox && \
107 |     apt-get update && \
108 |     apt-get install -y firefox && \
109 |     echo 'pref("datareporting.policy.firstRunURL", "");\npref("datareporting.policy.dataSubmissionEnabled", false);\npref("datareporting.healthreport.service.enabled", false);\npref("datareporting.healthreport.uploadEnabled", false);\npref("trailhead.firstrun.branches", "nofirstrun-empty");\npref("browser.aboutwelcome.enabled", false);' > /usr/lib/firefox/browser/defaults/preferences/firefox.js && \
110 |     update-alternatives --install /usr/bin/x-www-browser x-www-browser /usr/bin/firefox 100 && \
111 |     update-alternatives --install /usr/bin/gnome-www-browser gnome-www-browser /usr/bin/firefox 100 && \
112 |     rm -rf /var/lib/apt/lists/*
113 | 
114 | # Install noVNC
115 | RUN git clone https://github.com/novnc/noVNC.git /opt/noVNC && \
116 |     git clone https://github.com/novnc/websockify /opt/noVNC/utils/websockify && \
117 |     ln -s /opt/noVNC/vnc.html /opt/noVNC/index.html
118 | 
119 | # Pre-create cache directory with correct ownership before pip install
120 | RUN mkdir -p /home/cua/.cache && \
121 |     chown -R cua:cua /home/cua/.cache
122 | 
123 | # Install computer-server using Python 3.12 pip
124 | RUN python3.12 -m pip install cua-computer-server
125 | 
126 | # Install GTK and WebKit dependencies for pywebview
127 | RUN apt-get update && apt-get install -y \
128 |     python3-gi \
129 |     python3-gi-cairo \
130 |     gir1.2-gtk-3.0 \
131 |     gir1.2-webkit2-4.1 \
132 |     libgirepository1.0-dev \
133 |     libcairo2-dev \
134 |     pkg-config \
135 |     gobject-introspection \
136 |     && rm -rf /var/lib/apt/lists/*
137 | 
138 | # Install pywebview with GTK backend, used by cua-bench for web UIs
139 | RUN python3.12 -m pip install "pywebview[gtk]"
140 | RUN python3.12 -m pip install cua-bench-ui>=0.7.0 --no-cache-dir
141 | 
142 | # Install playwright and Firefox dependencies
143 | RUN python3.12 -m pip install playwright && \
144 |     python3.12 -m playwright install --with-deps firefox
145 | 
146 | # Fix any cache files created by pip
147 | RUN chown -R cua:cua /home/cua/.cache
148 | 
149 | # Copy startup scripts
150 | COPY src/supervisor/ /etc/supervisor/conf.d/
151 | COPY src/scripts/ /usr/local/bin/
152 | 
153 | # Make scripts executable
154 | RUN chmod +x /usr/local/bin/*.sh
155 | 
156 | # Setup VNC
157 | RUN chown -R cua:cua /home/cua
158 | USER cua
159 | WORKDIR /home/cua
160 | 
161 | # Create VNC directory (no password needed with SecurityTypes None)
162 | RUN mkdir -p $HOME/.vnc
163 | 
164 | # Configure XFCE for first start
165 | RUN mkdir -p $HOME/.config/xfce4/xfconf/xfce-perchannel-xml $HOME/.config/xfce4 $HOME/.config/autostart
166 | 
167 | # Copy XFCE config to disable browser launching and welcome screens
168 | COPY --chown=cua:cua src/xfce-config/helpers.rc $HOME/.config/xfce4/helpers.rc
169 | COPY --chown=cua:cua src/xfce-config/xfce4-session.xml $HOME/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-session.xml
170 | COPY --chown=cua:cua src/xfce-config/xfce4-power-manager.xml $HOME/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-power-manager.xml
171 | 
172 | # Disable autostart for screensaver, lock screen, and power manager
173 | RUN echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-tips-autostart.desktop && \
174 |     echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-screensaver.desktop && \
175 |     echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/light-locker.desktop && \
176 |     echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-power-manager.desktop && \
177 |     chown -R cua:cua $HOME/.config
178 | 
179 | # Create storage and shared directories, and Firefox cache directory
180 | RUN mkdir -p $HOME/storage $HOME/shared $HOME/.cache/dconf $HOME/.mozilla/firefox && \
181 |     chown -R cua:cua $HOME/storage $HOME/shared $HOME/.cache $HOME/.mozilla $HOME/.vnc
182 | 
183 | USER root
184 | 
185 | # Expose ports
186 | EXPOSE $VNC_PORT $NOVNC_PORT $API_PORT
187 | 
188 | # Start services via supervisor
189 | CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"]
190 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/callbacks/operator_validator.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | OperatorValidatorCallback
  3 | 
  4 | Ensures agent output actions conform to expected schemas by fixing common issues:
  5 | - click: add default button='left' if missing
  6 | - keypress: wrap keys string into a list
  7 | - etc.
  8 | 
  9 | This runs in on_llm_end, which receives the output array (AgentMessage[] as dicts).
 10 | The purpose is to avoid spending another LLM call to fix broken computer call syntax when possible.
 11 | """
 12 | 
 13 | from __future__ import annotations
 14 | 
 15 | from typing import Any, Dict, List
 16 | 
 17 | from .base import AsyncCallbackHandler
 18 | 
 19 | 
 20 | class OperatorNormalizerCallback(AsyncCallbackHandler):
 21 |     """Normalizes common computer call hallucinations / errors in computer call syntax."""
 22 | 
 23 |     async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 24 |         # Mutate in-place as requested, but still return the list for chaining
 25 |         for item in output or []:
 26 |             if item.get("type") != "computer_call":
 27 |                 continue
 28 |             action = item.get("action")
 29 |             if not isinstance(action, dict):
 30 |                 continue
 31 | 
 32 |             # rename mouse click actions to "click"
 33 |             for mouse_btn in ["left", "right", "wheel", "back", "forward"]:
 34 |                 if action.get("type", "") == f"{mouse_btn}_click":
 35 |                     action["type"] = "click"
 36 |                     action["button"] = mouse_btn
 37 |             # rename hotkey actions to "keypress"
 38 |             for alias in ["hotkey", "key", "press", "key_press"]:
 39 |                 if action.get("type", "") == alias:
 40 |                     action["type"] = "keypress"
 41 |             # assume click actions
 42 |             if "button" in action and "type" not in action:
 43 |                 action["type"] = "click"
 44 |             if "click" in action and "type" not in action:
 45 |                 action["type"] = "click"
 46 |             if ("scroll_x" in action or "scroll_y" in action) and "type" not in action:
 47 |                 action["type"] = "scroll"
 48 |             if "text" in action and "type" not in action:
 49 |                 action["type"] = "type"
 50 | 
 51 |             action_type = action.get("type")
 52 | 
 53 |             def _keep_keys(action: Dict[str, Any], keys_to_keep: List[str]):
 54 |                 """Keep only the provided keys on action; delete everything else.
 55 |                 Always ensures required 'type' is present if listed in keys_to_keep.
 56 |                 """
 57 |                 for key in list(action.keys()):
 58 |                     if key not in keys_to_keep:
 59 |                         del action[key]
 60 | 
 61 |             # rename "coordinate" to "x", "y"
 62 |             if "coordinate" in action:
 63 |                 action["x"] = action["coordinate"][0]
 64 |                 action["y"] = action["coordinate"][1]
 65 |                 del action["coordinate"]
 66 |             if action_type == "click":
 67 |                 # convert "click" to "button"
 68 |                 if "button" not in action and "click" in action:
 69 |                     action["button"] = action["click"]
 70 |                     del action["click"]
 71 |                 # default button to "left"
 72 |                 action["button"] = action.get("button", "left")
 73 |             # add default scroll x, y if missing
 74 |             if action_type == "scroll":
 75 |                 action["scroll_x"] = action.get("scroll_x", 0)
 76 |                 action["scroll_y"] = action.get("scroll_y", 0)
 77 |             # ensure keys arg is a list (normalize aliases first)
 78 |             if action_type == "keypress":
 79 |                 keys = action.get("keys")
 80 |                 for keys_alias in ["keypress", "key", "press", "key_press", "text"]:
 81 |                     if keys_alias in action:
 82 |                         action["keys"] = action[keys_alias]
 83 |                         del action[keys_alias]
 84 |                 keys = action.get("keys")
 85 |                 if isinstance(keys, str):
 86 |                     action["keys"] = keys.replace("-", "+").split("+") if len(keys) > 1 else [keys]
 87 |             required_keys_by_type = {
 88 |                 # OpenAI actions
 89 |                 "click": ["type", "button", "x", "y"],
 90 |                 "double_click": ["type", "x", "y"],
 91 |                 "drag": ["type", "path"],
 92 |                 "keypress": ["type", "keys"],
 93 |                 "move": ["type", "x", "y"],
 94 |                 "screenshot": ["type"],
 95 |                 "scroll": ["type", "scroll_x", "scroll_y", "x", "y"],
 96 |                 "type": ["type", "text"],
 97 |                 "wait": ["type"],
 98 |                 # Anthropic actions
 99 |                 "left_mouse_down": ["type", "x", "y"],
100 |                 "left_mouse_up": ["type", "x", "y"],
101 |                 "triple_click": ["type", "button", "x", "y"],
102 |             }
103 |             keep = required_keys_by_type.get(action_type or "")
104 |             if keep:
105 |                 _keep_keys(action, keep)
106 | 
107 |         # # Second pass: if an assistant message is immediately followed by a computer_call,
108 |         # # replace the assistant message itself with a reasoning message with summary text.
109 |         # if isinstance(output, list):
110 |         #     for i, item in enumerate(output):
111 |         #         # AssistantMessage shape: { type: 'message', role: 'assistant', content: OutputContent[] }
112 |         #         if item.get("type") == "message" and item.get("role") == "assistant":
113 |         #             next_idx = i + 1
114 |         #             if next_idx >= len(output):
115 |         #                 continue
116 |         #             next_item = output[next_idx]
117 |         #             if not isinstance(next_item, dict):
118 |         #                 continue
119 |         #             if next_item.get("type") != "computer_call":
120 |         #                 continue
121 |         #             contents = item.get("content") or []
122 |         #             # Extract text from OutputContent[]
123 |         #             text_parts: List[str] = []
124 |         #             if isinstance(contents, list):
125 |         #                 for c in contents:
126 |         #                     if isinstance(c, dict) and c.get("type") == "output_text" and isinstance(c.get("text"), str):
127 |         #                         text_parts.append(c["text"])
128 |         #             text_content = "\n".join(text_parts).strip()
129 |         #             # Replace assistant message with reasoning message
130 |         #             output[i] = {
131 |         #                 "type": "reasoning",
132 |         #                 "summary": [
133 |         #                     {
134 |         #                         "type": "summary_text",
135 |         #                         "text": text_content,
136 |         #                     }
137 |         #                 ],
138 |         #             }
139 | 
140 |         return output
141 | 
```

--------------------------------------------------------------------------------
/scripts/install-cli.ps1:
--------------------------------------------------------------------------------

```
  1 | # CUA CLI Installation Script for Windows
  2 | $ErrorActionPreference = "Stop"
  3 | 
  4 | function Install-WithBun {
  5 |     Write-Host "Installing CUA CLI using Bun..." -ForegroundColor Yellow
  6 |     
  7 |     # Check if bun is already installed
  8 |     if (-not (Get-Command bun -ErrorAction SilentlyContinue)) {
  9 |         Write-Host "Installing Bun..." -ForegroundColor Yellow
 10 |         try {
 11 |             powershell -c "irm bun.sh/install.ps1|iex"
 12 |             
 13 |             # Refresh environment variables
 14 |             $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
 15 |             
 16 |             # Add bun to PATH for this session if not already there
 17 |             $bunPath = "$env:USERPROFILE\.bun\bin"
 18 |             if ($env:Path -notlike "*$bunPath*") {
 19 |                 $env:Path = "$bunPath;$env:Path"
 20 |             }
 21 |         } catch {
 22 |             Write-Host "Error: Failed to install Bun. Please install manually from https://bun.sh" -ForegroundColor Red
 23 |             return $false
 24 |         }
 25 |     }
 26 | 
 27 |     # Verify bun installation
 28 |     if (-not (Get-Command bun -ErrorAction SilentlyContinue)) {
 29 |         Write-Host "Error: Bun installation failed. Please install manually from https://bun.sh" -ForegroundColor Red
 30 |         return $false
 31 |     }
 32 | 
 33 |     try {
 34 |         bun add -g @trycua/cli
 35 |         # Determine installed version from npm registry
 36 |         try {
 37 |             $bunVersion = (npm view @trycua/cli version) 2>$null
 38 |             if (-not $bunVersion) { $bunVersion = "unknown" }
 39 |         } catch { $bunVersion = "unknown" }
 40 |         # Ensure install dir and write version file
 41 |         $installDir = "$env:USERPROFILE\.cua\bin"
 42 |         if (-not (Test-Path $installDir)) { New-Item -ItemType Directory -Path $installDir -Force | Out-Null }
 43 |         Set-Content -Path (Join-Path $installDir ".version") -Value $bunVersion -NoNewline
 44 |         return $true
 45 |     } catch {
 46 |         Write-Host "Warning: Failed to install with Bun, trying npm..." -ForegroundColor Yellow
 47 |         try {
 48 |             npm install -g @trycua/cli
 49 |             # Determine installed version from npm registry
 50 |             try {
 51 |                 $npmVersion = (npm view @trycua/cli version) 2>$null
 52 |                 if (-not $npmVersion) { $npmVersion = "unknown" }
 53 |             } catch { $npmVersion = "unknown" }
 54 |             # Ensure install dir and write version file
 55 |             $installDir = "$env:USERPROFILE\.cua\bin"
 56 |             if (-not (Test-Path $installDir)) { New-Item -ItemType Directory -Path $installDir -Force | Out-Null }
 57 |             Set-Content -Path (Join-Path $installDir ".version") -Value $npmVersion -NoNewline
 58 |             return $true
 59 |         } catch {
 60 |             Write-Host "Error: Installation failed with npm as well." -ForegroundColor Red
 61 |             return $false
 62 |         }
 63 |     }
 64 | }
 65 | 
 66 | Write-Host "Installing CUA CLI..." -ForegroundColor Green
 67 | 
 68 | # Determine if this is a 64-bit system
 69 | $is64Bit = [Environment]::Is64BitOperatingSystem
 70 | if (-not $is64Bit) {
 71 |     Write-Host "Warning: 32-bit Windows is not supported. Falling back to Bun installation..." -ForegroundColor Yellow
 72 |     if (Install-WithBun) {
 73 |         exit 0
 74 |     } else {
 75 |         Write-Host "Error: Installation failed. Please try installing manually:" -ForegroundColor Red
 76 |         Write-Host "   irm https://cua.ai/install.ps1 | iex"
 77 |         exit 1
 78 |     }
 79 | }
 80 | 
 81 | # Get the latest release version
 82 | try {
 83 |     $release = Invoke-RestMethod -Uri "https://api.github.com/repos/trycua/cua/releases/latest" -ErrorAction Stop
 84 |     $version = $release.tag_name -replace '^cua-v', ''
 85 |     # Look for the windows binary in the release assets
 86 |     $windowsAsset = $release.assets | Where-Object { $_.name -eq 'cua-windows-x64.exe' }
 87 |     
 88 |     if (-not $windowsAsset) {
 89 |         throw "Windows binary not found in release assets"
 90 |     }
 91 |     
 92 |     $binaryUrl = $windowsAsset.browser_download_url
 93 | } catch {
 94 |     Write-Host "Warning: Could not fetch latest release, falling back to Bun installation" -ForegroundColor Yellow
 95 |     if (Install-WithBun) {
 96 |         exit 0
 97 |     } else {
 98 |         Write-Host "Error: Installation failed. Please try installing manually:" -ForegroundColor Red
 99 |         Write-Host "   irm https://cua.ai/install.ps1 | iex"
100 |         exit 1
101 |     }
102 | }
103 | 
104 | # Create installation directory
105 | $installDir = "$env:USERPROFILE\.cua\bin"
106 | if (-not (Test-Path $installDir)) {
107 |     New-Item -ItemType Directory -Path $installDir -Force | Out-Null
108 | }
109 | 
110 | $binaryPath = Join-Path $installDir "cua.exe"
111 | 
112 | # Download the binary
113 | Write-Host "Downloading CUA CLI $version for Windows x64..." -ForegroundColor Cyan
114 | try {
115 |     Invoke-WebRequest -Uri $binaryUrl -OutFile $binaryPath -ErrorAction Stop
116 | } catch {
117 |     Write-Host "Warning: Failed to download pre-built binary, falling back to Bun installation" -ForegroundColor Yellow
118 |     if (Install-WithBun) {
119 |         exit 0
120 |     } else {
121 |         Write-Host "Error: Installation failed. Please try installing manually:" -ForegroundColor Red
122 |         Write-Host "   irm https://cua.ai/install.ps1 | iex"
123 |         exit 1
124 |     }
125 | }
126 | 
127 | # Write version file for binary install
128 | try {
129 |     Set-Content -Path (Join-Path $installDir ".version") -Value $version -NoNewline
130 | } catch {
131 |     # Non-fatal
132 | }
133 | 
134 | # Add to PATH if not already there
135 | $currentPath = [Environment]::GetEnvironmentVariable("Path", "User")
136 | if ($currentPath -notlike "*$installDir*") {
137 |     [Environment]::SetEnvironmentVariable("Path", "$currentPath;$installDir", "User")
138 |     $env:Path = "$env:Path;$installDir"
139 |     Write-Host "Success: Added $installDir to your PATH" -ForegroundColor Green
140 | }
141 | 
142 | # Verify installation
143 | if (Test-Path $binaryPath) {
144 |     Write-Host "Success: CUA CLI $version installed successfully to $binaryPath" -ForegroundColor Green
145 |     Write-Host ""
146 |     Write-Host "Get started with:" -ForegroundColor Cyan
147 |     Write-Host "   cua login"
148 |     Write-Host "   cua create --os linux --configuration small --region north-america"
149 |     Write-Host ""
150 |     Write-Host "For more help, visit: https://docs.cua.ai/libraries/cua-cli" -ForegroundColor Cyan
151 |     
152 |     # Offer to add to PATH if not already there
153 |     if (-not ($env:Path -like "*$installDir*")) {
154 |         Write-Host ""
155 |         Write-Host "Note: Please restart your terminal or run the following command to use CUA CLI:" -ForegroundColor Yellow
156 |         Write-Host "   `$env:Path += ';$installDir'"
157 |     }
158 | } else {
159 |     Write-Host "Error: Installation failed. Please try installing manually:" -ForegroundColor Red
160 |     Write-Host "   irm https://cua.ai/install.ps1 | iex"
161 |     exit 1
162 | }
```

--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------

```json
  1 | {
  2 |     "configurations": [
  3 |         {
  4 |             "name": "Agent UI",
  5 |             "type": "debugpy",
  6 |             "request": "launch",
  7 |             "program": "examples/agent_ui_examples.py",
  8 |             "console": "integratedTerminal",
  9 |             "justMyCode": false,
 10 |             "python": "${workspaceFolder:cua-root}/.venv/bin/python",
 11 |             "cwd": "${workspaceFolder:cua-root}",
 12 |             "env": {
 13 |                 "PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som"
 14 |             }
 15 |         },
 16 |         {
 17 |             "name": "Computer UI",
 18 |             "type": "debugpy",
 19 |             "request": "launch",
 20 |             "program": "examples/computer_ui_examples.py",
 21 |             "console": "integratedTerminal",
 22 |             "justMyCode": false,
 23 |             "python": "${workspaceFolder:cua-root}/.venv/bin/python",
 24 |             "cwd": "${workspaceFolder:cua-root}",
 25 |             "env": {
 26 |                 "PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som"
 27 |             }
 28 |         },
 29 |         {
 30 |             "name": "Run Computer Examples",
 31 |             "type": "debugpy",
 32 |             "request": "launch",
 33 |             "program": "examples/computer_examples.py",
 34 |             "console": "integratedTerminal",
 35 |             "justMyCode": true,
 36 |             "python": "${workspaceFolder:cua-root}/.venv/bin/python",
 37 |             "cwd": "${workspaceFolder:cua-root}",
 38 |             "env": {
 39 |                 "PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som"
 40 |             }
 41 |         },
 42 |         {
 43 |             "name": "Run Agent Examples",
 44 |             "type": "debugpy",
 45 |             "request": "launch",
 46 |             "program": "examples/agent_examples.py",
 47 |             "console": "integratedTerminal",
 48 |             "justMyCode": false,
 49 |             "python": "${workspaceFolder:cua-root}/.venv/bin/python",
 50 |             "cwd": "${workspaceFolder:cua-root}",
 51 |             "env": {
 52 |                 "PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som"
 53 |             }
 54 |         },
 55 |         {
 56 |             "name": "SOM: Run Experiments (No OCR)",
 57 |             "type": "debugpy",
 58 |             "request": "launch",
 59 |             "program": "examples/som_examples.py",
 60 |             "args": [
 61 |                 "examples/test_data",
 62 |                 "--output-dir",
 63 |                 "examples/output",
 64 |                 "--ocr",
 65 |                 "none",
 66 |                 "--mode",
 67 |                 "experiment"
 68 |             ],
 69 |             "console": "integratedTerminal",
 70 |             "justMyCode": false,
 71 |             "python": "${workspaceFolder:cua-root}/.venv/bin/python",
 72 |             "cwd": "${workspaceFolder:cua-root}",
 73 |             "env": {
 74 |                 "PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som"
 75 |             }
 76 |         },
 77 |         {
 78 |             "name": "SOM: Run Experiments (EasyOCR)",
 79 |             "type": "debugpy",
 80 |             "request": "launch",
 81 |             "program": "examples/som_examples.py",
 82 |             "args": [
 83 |                 "examples/test_data",
 84 |                 "--output-dir",
 85 |                 "examples/output",
 86 |                 "--ocr",
 87 |                 "easyocr",
 88 |                 "--mode",
 89 |                 "experiment"
 90 |             ],
 91 |             "console": "integratedTerminal",
 92 |             "justMyCode": false,
 93 |             "python": "${workspaceFolder:cua-root}/.venv/bin/python",
 94 |             "cwd": "${workspaceFolder:cua-root}",
 95 |             "env": {
 96 |                 "PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som"
 97 |             }
 98 |         },
 99 |         {
100 |             "name": "Run Computer Server",
101 |             "type": "debugpy",
102 |             "request": "launch",
103 |             "program": "${workspaceFolder}/libs/python/computer-server/run_server.py",
104 |             "console": "integratedTerminal",
105 |             "justMyCode": true,
106 |             "python": "${workspaceFolder:cua-root}/.venv/bin/python",
107 |             "cwd": "${workspaceFolder:cua-root}",
108 |             "env": {
109 |                 "PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer:${workspaceFolder:cua-root}/libs/python/agent:${workspaceFolder:cua-root}/libs/python/som"
110 |             }
111 |         },
112 |         {
113 |             "name": "Run Computer Server with Args",
114 |             "type": "debugpy",
115 |             "request": "launch",
116 |             "program": "${workspaceFolder}/libs/python/computer-server/run_server.py",
117 |             "args": [
118 |                 "--host",
119 |                 "0.0.0.0",
120 |                 "--port",
121 |                 "8000",
122 |                 "--log-level",
123 |                 "debug"
124 |             ],
125 |             "console": "integratedTerminal",
126 |             "justMyCode": false,
127 |             "python": "${workspaceFolder:cua-root}/.venv/bin/python",
128 |             "cwd": "${workspaceFolder:cua-root}",
129 |             "env": {
130 |                 "PYTHONPATH": "${workspaceFolder:cua-root}/libs/python/core:${workspaceFolder:cua-root}/libs/python/computer-server"
131 |             }
132 |         },
133 |         {
134 |             "type": "lldb",
135 |             "request": "launch",
136 |             "args": [],
137 |             "cwd": "${workspaceFolder:cua-root}/libs/lume",
138 |             "name": "Debug lume (libs/lume)",
139 |             "program": "${workspaceFolder:cua-root}/libs/lume/.build/debug/lume",
140 |             "preLaunchTask": "swift: Build Debug lume (libs/lume)"
141 |         },
142 |         {
143 |             "type": "lldb",
144 |             "request": "launch",
145 |             "args": [],
146 |             "cwd": "${workspaceFolder:cua-root}/libs/lume",
147 |             "name": "Release lume (libs/lume)",
148 |             "program": "${workspaceFolder:cua-root}/libs/lume/.build/release/lume",
149 |             "preLaunchTask": "swift: Build Release lume (libs/lume)"
150 |         }
151 |     ]
152 | }
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/loops/internvl.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | InternVL agent loop implementation for click prediction using litellm.acompletion.
  3 | 
  4 | Implements the ScreenSpot InternVL grounding baseline behavior:
  5 | - Uses the exact grounding prompt format with <image> and <ref> tags
  6 | - Expects coordinates in 0-1000 normalized range in formats [[x1,y1,x2,y2]] or [[x,y]]
  7 | - Converts to pixel coordinates relative to the original screenshot size
  8 | 
  9 | Note: We do NOT manually load the InternVL model; acompletions (via HuggingFaceLocalAdapter)
 10 | will handle loading based on the provided model name.
 11 | """
 12 | 
 13 | from __future__ import annotations
 14 | 
 15 | import base64
 16 | import math
 17 | import re
 18 | from io import BytesIO
 19 | from typing import Any, Dict, List, Optional, Tuple
 20 | 
 21 | import litellm
 22 | from PIL import Image
 23 | 
 24 | from ..decorators import register_agent
 25 | from ..types import AgentCapability
 26 | from .composed_grounded import ComposedGroundedConfig
 27 | 
 28 | # Regex patterns for extracting coordinates
 29 | # Accept optional whitespace and optional decimal fractions
 30 | _NUM = r"(\d+(?:\.\d+)?)"
 31 | _POINT_PATTERN = re.compile(r"\[\[\s*" + _NUM + r"\s*,\s*" + _NUM + r"\s*\]\]")
 32 | _BBOX_PATTERN = re.compile(
 33 |     r"\[\[\s*" + _NUM + r"\s*,\s*" + _NUM + r"\s*,\s*" + _NUM + r"\s*,\s*" + _NUM + r"\s*\]\]"
 34 | )
 35 | 
 36 | 
 37 | def _extract_first_point(text: str) -> Optional[Tuple[float, float]]:
 38 |     """Extract the first [[x,y]] as normalized (0-1000) floats."""
 39 |     m = _POINT_PATTERN.search(text)
 40 |     if not m:
 41 |         return None
 42 |     try:
 43 |         x = float(m.group(1))
 44 |         y = float(m.group(2))
 45 |         return x, y
 46 |     except Exception:
 47 |         return None
 48 | 
 49 | 
 50 | def _extract_last_bbox(text: str) -> Optional[Tuple[float, float, float, float]]:
 51 |     """Extract the last [[x1,y1,x2,y2]] as normalized (0-1000) floats."""
 52 |     matches = list(_BBOX_PATTERN.finditer(text))
 53 |     if not matches:
 54 |         return None
 55 |     m = matches[-1]
 56 |     try:
 57 |         x1 = float(m.group(1))
 58 |         y1 = float(m.group(2))
 59 |         x2 = float(m.group(3))
 60 |         y2 = float(m.group(4))
 61 |         return x1, y1, x2, y2
 62 |     except Exception:
 63 |         return None
 64 | 
 65 | 
 66 | def _scale_norm_to_pixels(x_norm: float, y_norm: float, width: int, height: int) -> Tuple[int, int]:
 67 |     """Scale 0-1000 normalized coordinates to pixel coordinates for given image size."""
 68 |     x_px = int(math.floor((x_norm / 1000.0) * width))
 69 |     y_px = int(math.floor((y_norm / 1000.0) * height))
 70 |     # Clamp to image bounds just in case
 71 |     x_px = max(0, min(width - 1, x_px))
 72 |     y_px = max(0, min(height - 1, y_px))
 73 |     return x_px, y_px
 74 | 
 75 | 
 76 | @register_agent(models=r"(?i).*InternVL.*")
 77 | class InternVLConfig(ComposedGroundedConfig):
 78 |     """InternVL agent configuration reusing ComposedGroundedConfig for steps and
 79 |     overriding predict_click to implement ScreenSpot InternVL grounding baseline."""
 80 | 
 81 |     async def predict_step(
 82 |         self,
 83 |         messages: List[Dict[str, Any]],
 84 |         model: str,
 85 |         tools: Optional[List[Dict[str, Any]]] = None,
 86 |         max_retries: Optional[int] = None,
 87 |         stream: bool = False,
 88 |         computer_handler=None,
 89 |         _on_api_start=None,
 90 |         _on_api_end=None,
 91 |         _on_usage=None,
 92 |         _on_screenshot=None,
 93 |         **kwargs,
 94 |     ) -> Dict[str, Any]:
 95 |         """Fallback to a self-composed model"""
 96 |         return await super().predict_step(
 97 |             messages=messages,
 98 |             model=f"{model}+{model}",
 99 |             tools=tools,
100 |             max_retries=max_retries,
101 |             stream=stream,
102 |             computer_handler=computer_handler,
103 |             _on_api_start=_on_api_start,
104 |             _on_api_end=_on_api_end,
105 |             _on_usage=_on_usage,
106 |             _on_screenshot=_on_screenshot,
107 |             **kwargs,
108 |         )
109 | 
110 |     async def predict_click(
111 |         self, model: str, image_b64: str, instruction: str, **kwargs
112 |     ) -> Optional[Tuple[int, int]]:
113 |         """
114 |         Predict click coordinates using InternVL via litellm.acompletion.
115 | 
116 |         Behavior mirrors the ScreenSpot InternVL baseline:
117 |         - Prompt: "<image>\nPlease provide the bounding box coordinate of the UI element this user instruction describes: <ref>{instruction}</ref>. Answer in the format of [[x1, y1, x2, y2]]"
118 |         - Parse either [[x,y]] point or [[x1,y1,x2,y2]] bbox, using bbox center if point missing
119 |         - Coordinates are 0-1000 normalized; convert to pixel coordinates for the original screenshot
120 |         """
121 |         try:
122 |             # Decode image dimensions to scale the normalized outputs
123 |             img_bytes = base64.b64decode(image_b64)
124 |             image = Image.open(BytesIO(img_bytes))
125 |             width, height = image.size
126 |         except Exception:
127 |             # If decoding fails, proceed with a safe default size to avoid crash
128 |             width, height = 1920, 1080
129 | 
130 |         # Build grounding prompt exactly like the baseline
131 |         grounding_prompt = (
132 |             f"Please provide the bounding box coordinate of the UI element this user instruction describes: <ref>{instruction}</ref>. "
133 |             f"Answer in the format of [[x1, y1, x2, y2]]"
134 |         )
135 | 
136 |         # Prepare messages for LiteLLM
137 |         messages = [
138 |             {
139 |                 "role": "user",
140 |                 "content": [
141 |                     {
142 |                         "type": "image_url",
143 |                         "image_url": {"url": f"data:image/png;base64,{image_b64}"},
144 |                     },
145 |                     {"type": "text", "text": grounding_prompt},
146 |                 ],
147 |             }
148 |         ]
149 | 
150 |         # Call acompletion; HuggingFaceLocalAdapter/model handler will handle InternVL loading
151 |         api_kwargs = {
152 |             "model": model,
153 |             "messages": messages,
154 |             # Conservative generation params akin to baseline (deterministic)
155 |             "max_tokens": kwargs.get("max_tokens", 256),
156 |             "temperature": kwargs.get("temperature", 0.0),
157 |         }
158 | 
159 |         response = await litellm.acompletion(**api_kwargs)
160 |         output_text = (response.choices[0].message.content or "").strip()  # type: ignore
161 | 
162 |         # print(f"InternVL output: {output_text}")
163 | 
164 |         # Try to parse a point first; if absent, parse bbox and take center
165 |         point = _extract_first_point(output_text)
166 |         if point is None:
167 |             bbox = _extract_last_bbox(output_text)
168 |             if bbox is None:
169 |                 return None
170 |             x1, y1, x2, y2 = bbox
171 |             cx = (x1 + x2) / 2.0
172 |             cy = (y1 + y2) / 2.0
173 |             point = (cx, cy)
174 | 
175 |         x_norm, y_norm = point
176 |         x_px, y_px = _scale_norm_to_pixels(x_norm, y_norm, width, height)
177 |         return (x_px, y_px)
178 | 
179 |     def get_capabilities(self) -> List[AgentCapability]:
180 |         return ["click", "step"]
181 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/benchmarks/interactive.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | Interactive Click Prediction Tool
  4 | 
  5 | Takes screenshots and allows testing multiple models interactively.
  6 | Models are loaded/unloaded one at a time to avoid memory issues.
  7 | """
  8 | 
  9 | import asyncio
 10 | import os
 11 | from datetime import datetime
 12 | from typing import Any, Dict, List
 13 | 
 14 | from utils import (
 15 |     ModelWrapper,
 16 |     get_available_models,
 17 |     save_prediction_visualization,
 18 |     take_screenshot,
 19 | )
 20 | 
 21 | 
 22 | async def predict_with_all_models(image, instruction: str, models) -> List[Dict[str, Any]]:
 23 |     """
 24 |     Predict click coordinates with all models sequentially.
 25 | 
 26 |     Args:
 27 |         image: PIL Image to analyze
 28 |         instruction: Instruction text
 29 |         models: List of model instances
 30 | 
 31 |     Returns:
 32 |         List of prediction results
 33 |     """
 34 |     predictions = []
 35 | 
 36 |     for model in models:
 37 |         model_wrapper = ModelWrapper(model)
 38 |         print(f"\n🔄 Loading {model_wrapper.model_name}...")
 39 | 
 40 |         try:
 41 |             # Load model
 42 |             await model_wrapper.load_model()
 43 | 
 44 |             # Predict
 45 |             coords = await model_wrapper.predict_click(image, instruction)
 46 | 
 47 |             predictions.append(
 48 |                 {"model_name": model_wrapper.model_name, "coords": coords, "error": None}
 49 |             )
 50 | 
 51 |             if coords:
 52 |                 print(f"✅ {model_wrapper.model_name}: ({coords[0]}, {coords[1]})")
 53 |             else:
 54 |                 print(f"❌ {model_wrapper.model_name}: No prediction")
 55 | 
 56 |         except Exception as e:
 57 |             print(f"❌ {model_wrapper.model_name}: ERROR - {str(e)}")
 58 |             predictions.append(
 59 |                 {"model_name": model_wrapper.model_name, "coords": None, "error": str(e)}
 60 |             )
 61 | 
 62 |         finally:
 63 |             # Always unload model to free memory
 64 |             try:
 65 |                 await model_wrapper.unload_model()
 66 |                 print(f"🗑️  Unloaded {model_wrapper.model_name}")
 67 |             except Exception as e:
 68 |                 print(f"⚠️  Error unloading {model_wrapper.model_name}: {e}")
 69 | 
 70 |     return predictions
 71 | 
 72 | 
 73 | def print_header():
 74 |     """Print the interactive tool header."""
 75 |     print("=" * 60)
 76 |     print("🖱️  Interactive Click Prediction Tool")
 77 |     print("=" * 60)
 78 |     print("Commands:")
 79 |     print("  • Type an instruction to test models on last screenshot")
 80 |     print("  • 'screenshot' - Take a new screenshot")
 81 |     print("  • 'models' - List available models")
 82 |     print("  • 'quit' or 'exit' - Exit the tool")
 83 |     print("=" * 60)
 84 |     print("💡 Tip: Take a screenshot first, then send instructions to test models!")
 85 | 
 86 | 
 87 | def print_models(models):
 88 |     """Print available models."""
 89 |     print("\n📋 Available Models:")
 90 |     for i, model in enumerate(models, 1):
 91 |         if isinstance(model, str):
 92 |             print(f"  {i}. {model}")
 93 |         else:
 94 |             print(f"  {i}. models.{model.__class__.__name__}")
 95 | 
 96 | 
 97 | async def main():
 98 |     """
 99 |     Main interactive loop.
100 |     """
101 |     print_header()
102 | 
103 |     # Get available models
104 |     models = get_available_models()
105 |     print_models(models)
106 | 
107 |     # Create output directory for visualizations
108 |     output_dir = "interactive_output"
109 |     os.makedirs(output_dir, exist_ok=True)
110 | 
111 |     session_count = 0
112 |     last_screenshot = None
113 |     screenshot_timestamp = None
114 | 
115 |     while True:
116 |         try:
117 |             # Get user input
118 |             print(f"\n{'='*40}")
119 |             user_input = input("🎯 Enter instruction (or command): ").strip()
120 | 
121 |             if not user_input:
122 |                 continue
123 | 
124 |             # Handle commands
125 |             if user_input.lower() in ["quit", "exit", "q"]:
126 |                 print("👋 Goodbye!")
127 |                 break
128 | 
129 |             elif user_input.lower() == "models":
130 |                 print_models(models)
131 |                 continue
132 | 
133 |             elif user_input.lower() == "screenshot":
134 |                 print("📸 Taking screenshot...")
135 |                 try:
136 |                     last_screenshot = take_screenshot()
137 |                     screenshot_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
138 |                     screenshot_path = os.path.join(
139 |                         output_dir, f"screenshot_{screenshot_timestamp}.png"
140 |                     )
141 |                     last_screenshot.save(screenshot_path)
142 |                     print(f"✅ Screenshot captured and saved to: {screenshot_path}")
143 |                     print(f"📝 Ready for instructions! Screenshot size: {last_screenshot.size}")
144 |                 except Exception as e:
145 |                     print(f"❌ Error taking screenshot: {e}")
146 |                 continue
147 | 
148 |             # Handle instruction input
149 |             if last_screenshot is None:
150 |                 print(
151 |                     "⚠️  No screenshot available! Please take a screenshot first using 'screenshot' command."
152 |                 )
153 |                 continue
154 | 
155 |             session_count += 1
156 |             print(f"\n🎯 Session {session_count}: '{user_input}'")
157 |             print(f"📷 Using screenshot from: {screenshot_timestamp}")
158 | 
159 |             # Predict with all models using last screenshot
160 |             print(f"\n🤖 Testing {len(models)} models on screenshot...")
161 |             predictions = await predict_with_all_models(last_screenshot, user_input, models)
162 | 
163 |             # Display results summary
164 |             print("\n📊 Results Summary:")
165 |             print("-" * 50)
166 |             for pred in predictions:
167 |                 if pred["coords"]:
168 |                     print(f"✅ {pred['model_name']}: ({pred['coords'][0]}, {pred['coords'][1]})")
169 |                 elif pred["error"]:
170 |                     print(f"❌ {pred['model_name']}: ERROR - {pred['error']}")
171 |                 else:
172 |                     print(f"❌ {pred['model_name']}: No prediction")
173 | 
174 |             # Save visualization
175 |             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
176 |             vis_filename = f"session_{session_count:03d}_{timestamp}.png"
177 |             vis_path = os.path.join(output_dir, vis_filename)
178 | 
179 |             try:
180 |                 save_prediction_visualization(last_screenshot, user_input, predictions, vis_path)
181 |                 print(f"\n💾 Visualization saved to: {vis_path}")
182 |             except Exception as e:
183 |                 print(f"⚠️  Error saving visualization: {e}")
184 | 
185 |             print(f"\n✨ Session {session_count} completed!")
186 | 
187 |         except KeyboardInterrupt:
188 |             print("\n\n👋 Interrupted by user. Goodbye!")
189 |             break
190 |         except Exception as e:
191 |             print(f"\n❌ Unexpected error: {e}")
192 |             print("Continuing...")
193 | 
194 | 
195 | if __name__ == "__main__":
196 |     try:
197 |         asyncio.run(main())
198 |     except KeyboardInterrupt:
199 |         print("\n👋 Goodbye!")
200 |     except Exception as e:
201 |         print(f"❌ Fatal error: {e}")
202 | 
```

--------------------------------------------------------------------------------
/.github/workflows/pypi-publish-agent.yml:
--------------------------------------------------------------------------------

```yaml
  1 | name: Publish Agent Package
  2 | 
  3 | on:
  4 |   push:
  5 |     tags:
  6 |       - "agent-v*"
  7 |   workflow_dispatch:
  8 |     inputs:
  9 |       version:
 10 |         description: "Version to publish (without v prefix)"
 11 |         required: true
 12 |         default: "0.1.0"
 13 |   workflow_call:
 14 |     inputs:
 15 |       version:
 16 |         description: "Version to publish"
 17 |         required: true
 18 |         type: string
 19 | 
 20 | # Adding permissions at workflow level
 21 | permissions:
 22 |   contents: write
 23 | 
 24 | jobs:
 25 |   prepare:
 26 |     runs-on: macos-latest
 27 |     outputs:
 28 |       version: ${{ steps.get-version.outputs.version }}
 29 |       computer_version: ${{ steps.update-deps.outputs.computer_version }}
 30 |       som_version: ${{ steps.update-deps.outputs.som_version }}
 31 |       core_version: ${{ steps.update-deps.outputs.core_version }}
 32 |     steps:
 33 |       - uses: actions/checkout@v4
 34 |         with:
 35 |           ref: main
 36 |           fetch-depth: 0
 37 | 
 38 |       - name: Ensure latest main branch
 39 |         run: |
 40 |           git fetch origin main
 41 |           git reset --hard origin/main
 42 |           echo "Current HEAD commit:"
 43 |           git log -1 --oneline
 44 | 
 45 |       - name: Determine version
 46 |         id: get-version
 47 |         run: |
 48 |           # Check inputs.version first (works for workflow_call regardless of event_name)
 49 |           if [ -n "${{ inputs.version }}" ]; then
 50 |             VERSION=${{ inputs.version }}
 51 |           elif [ "${{ github.event_name }}" == "push" ]; then
 52 |             # Extract version from tag (for package-specific tags)
 53 |             if [[ "${{ github.ref }}" =~ ^refs/tags/agent-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then
 54 |               VERSION=${BASH_REMATCH[1]}
 55 |             else
 56 |               echo "ERROR: Invalid tag format for agent"
 57 |               exit 1
 58 |             fi
 59 |           elif [ -n "${{ github.event.inputs.version }}" ]; then
 60 |             VERSION=${{ github.event.inputs.version }}
 61 |           else
 62 |             echo "ERROR: No version found (inputs.version, event.inputs.version, and tag all empty)"
 63 |             exit 1
 64 |           fi
 65 | 
 66 |           echo "Agent version: $VERSION"
 67 |           echo "version=$VERSION" >> $GITHUB_OUTPUT
 68 | 
 69 |       - name: Set up Python
 70 |         uses: actions/setup-python@v4
 71 |         with:
 72 |           python-version: "3.11"
 73 | 
 74 |       - name: Update dependencies to latest versions
 75 |         id: update-deps
 76 |         run: |
 77 |           cd libs/python/agent
 78 | 
 79 |           # Install required package for PyPI API access
 80 |           pip install requests
 81 | 
 82 |           # Create a more robust Python script for PyPI version checking
 83 |           cat > get_latest_versions.py << 'EOF'
 84 |           import requests
 85 |           import json
 86 |           import sys
 87 | 
 88 |           def get_package_version(package_name, fallback="0.1.0"):
 89 |               try:
 90 |                   response = requests.get(f'https://pypi.org/pypi/{package_name}/json')
 91 |                   print(f"API Response Status for {package_name}: {response.status_code}", file=sys.stderr)
 92 |                   
 93 |                   if response.status_code != 200:
 94 |                       print(f"API request failed for {package_name}, using fallback version", file=sys.stderr)
 95 |                       return fallback
 96 |                   
 97 |                   data = json.loads(response.text)
 98 |                   
 99 |                   if 'info' not in data:
100 |                       print(f"Missing 'info' key in API response for {package_name}, using fallback version", file=sys.stderr)
101 |                       return fallback
102 |                       
103 |                   return data['info']['version']
104 |               except Exception as e:
105 |                   print(f"Error fetching version for {package_name}: {str(e)}", file=sys.stderr)
106 |                   return fallback
107 | 
108 |           # Get latest versions
109 |           print(get_package_version('cua-computer'))
110 |           print(get_package_version('cua-som'))
111 |           print(get_package_version('cua-core'))
112 |           EOF
113 | 
114 |           # Execute the script to get the versions
115 |           VERSIONS=($(python get_latest_versions.py))
116 |           LATEST_COMPUTER=${VERSIONS[0]}
117 |           LATEST_SOM=${VERSIONS[1]}
118 |           LATEST_CORE=${VERSIONS[2]}
119 | 
120 |           echo "Latest cua-computer version: $LATEST_COMPUTER"
121 |           echo "Latest cua-som version: $LATEST_SOM"
122 |           echo "Latest cua-core version: $LATEST_CORE"
123 | 
124 |           # Output the versions for the next job
125 |           echo "computer_version=$LATEST_COMPUTER" >> $GITHUB_OUTPUT
126 |           echo "som_version=$LATEST_SOM" >> $GITHUB_OUTPUT
127 |           echo "core_version=$LATEST_CORE" >> $GITHUB_OUTPUT
128 | 
129 |           # Determine major version for version constraint
130 |           COMPUTER_MAJOR=$(echo $LATEST_COMPUTER | cut -d. -f1)
131 |           SOM_MAJOR=$(echo $LATEST_SOM | cut -d. -f1)
132 |           CORE_MAJOR=$(echo $LATEST_CORE | cut -d. -f1)
133 | 
134 |           NEXT_COMPUTER_MAJOR=$((COMPUTER_MAJOR + 1))
135 |           NEXT_SOM_MAJOR=$((SOM_MAJOR + 1))
136 |           NEXT_CORE_MAJOR=$((CORE_MAJOR + 1))
137 | 
138 |           # Update dependencies in pyproject.toml
139 |           if [[ "$OSTYPE" == "darwin"* ]]; then
140 |             # macOS version of sed needs an empty string for -i
141 |             sed -i '' "s/\"cua-computer>=.*,<.*\"/\"cua-computer>=$LATEST_COMPUTER,<$NEXT_COMPUTER_MAJOR.0.0\"/" pyproject.toml
142 |             sed -i '' "s/\"cua-som>=.*,<.*\"/\"cua-som>=$LATEST_SOM,<$NEXT_SOM_MAJOR.0.0\"/" pyproject.toml
143 |             sed -i '' "s/\"cua-core>=.*,<.*\"/\"cua-core>=$LATEST_CORE,<$NEXT_CORE_MAJOR.0.0\"/" pyproject.toml
144 |           else
145 |             # Linux version
146 |             sed -i "s/\"cua-computer>=.*,<.*\"/\"cua-computer>=$LATEST_COMPUTER,<$NEXT_COMPUTER_MAJOR.0.0\"/" pyproject.toml
147 |             sed -i "s/\"cua-som>=.*,<.*\"/\"cua-som>=$LATEST_SOM,<$NEXT_SOM_MAJOR.0.0\"/" pyproject.toml
148 |             sed -i "s/\"cua-core>=.*,<.*\"/\"cua-core>=$LATEST_CORE,<$NEXT_CORE_MAJOR.0.0\"/" pyproject.toml
149 |           fi
150 | 
151 |           # Display the updated dependencies
152 |           echo "Updated dependencies in pyproject.toml:"
153 |           grep -E "cua-computer|cua-som|cua-core" pyproject.toml
154 | 
155 |   publish:
156 |     needs: prepare
157 |     uses: ./.github/workflows/pypi-reusable-publish.yml
158 |     with:
159 |       package_name: "agent"
160 |       package_dir: "libs/python/agent"
161 |       version: ${{ needs.prepare.outputs.version }}
162 |       is_lume_package: false
163 |       base_package_name: "cua-agent"
164 |     secrets:
165 |       PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
166 | 
167 |   set-env-variables:
168 |     needs: [prepare, publish]
169 |     runs-on: macos-latest
170 |     steps:
171 |       - name: Set environment variables for use in other jobs
172 |         run: |
173 |           echo "COMPUTER_VERSION=${{ needs.prepare.outputs.computer_version }}" >> $GITHUB_ENV
174 |           echo "SOM_VERSION=${{ needs.prepare.outputs.som_version }}" >> $GITHUB_ENV
175 |           echo "CORE_VERSION=${{ needs.prepare.outputs.core_version }}" >> $GITHUB_ENV
176 | 
```
Page 8/28FirstPrevNextLast