#
tokens: 49740/50000 18/616 files (page 9/28)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 9 of 28. Use http://codebase.md/trycua/cua?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .cursorignore
├── .dockerignore
├── .editorconfig
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── scripts
│   │   ├── get_pyproject_version.py
│   │   └── tests
│   │       ├── __init__.py
│   │       ├── README.md
│   │       └── test_get_pyproject_version.py
│   └── workflows
│       ├── bump-version.yml
│       ├── ci-lume.yml
│       ├── docker-publish-cua-linux.yml
│       ├── docker-publish-cua-windows.yml
│       ├── docker-publish-kasm.yml
│       ├── docker-publish-xfce.yml
│       ├── docker-reusable-publish.yml
│       ├── link-check.yml
│       ├── lint.yml
│       ├── npm-publish-cli.yml
│       ├── npm-publish-computer.yml
│       ├── npm-publish-core.yml
│       ├── publish-lume.yml
│       ├── pypi-publish-agent.yml
│       ├── pypi-publish-computer-server.yml
│       ├── pypi-publish-computer.yml
│       ├── pypi-publish-core.yml
│       ├── pypi-publish-mcp-server.yml
│       ├── pypi-publish-som.yml
│       ├── pypi-reusable-publish.yml
│       ├── python-tests.yml
│       ├── test-cua-models.yml
│       └── test-validation-script.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .prettierignore
├── .prettierrc.yaml
├── .vscode
│   ├── docs.code-workspace
│   ├── extensions.json
│   ├── launch.json
│   ├── libs-ts.code-workspace
│   ├── lume.code-workspace
│   ├── lumier.code-workspace
│   ├── py.code-workspace
│   └── settings.json
├── blog
│   ├── app-use.md
│   ├── assets
│   │   ├── composite-agents.png
│   │   ├── docker-ubuntu-support.png
│   │   ├── hack-booth.png
│   │   ├── hack-closing-ceremony.jpg
│   │   ├── hack-cua-ollama-hud.jpeg
│   │   ├── hack-leaderboard.png
│   │   ├── hack-the-north.png
│   │   ├── hack-winners.jpeg
│   │   ├── hack-workshop.jpeg
│   │   ├── hud-agent-evals.png
│   │   └── trajectory-viewer.jpeg
│   ├── bringing-computer-use-to-the-web.md
│   ├── build-your-own-operator-on-macos-1.md
│   ├── build-your-own-operator-on-macos-2.md
│   ├── cloud-windows-ga-macos-preview.md
│   ├── composite-agents.md
│   ├── computer-use-agents-for-growth-hacking.md
│   ├── cua-hackathon.md
│   ├── cua-playground-preview.md
│   ├── cua-vlm-router.md
│   ├── hack-the-north.md
│   ├── hud-agent-evals.md
│   ├── human-in-the-loop.md
│   ├── introducing-cua-cli.md
│   ├── introducing-cua-cloud-containers.md
│   ├── lume-to-containerization.md
│   ├── neurips-2025-cua-papers.md
│   ├── sandboxed-python-execution.md
│   ├── training-computer-use-models-trajectories-1.md
│   ├── trajectory-viewer.md
│   ├── ubuntu-docker-support.md
│   └── windows-sandbox.md
├── CONTRIBUTING.md
├── Development.md
├── Dockerfile
├── docs
│   ├── .env.example
│   ├── .gitignore
│   ├── content
│   │   └── docs
│   │       ├── agent-sdk
│   │       │   ├── agent-loops.mdx
│   │       │   ├── benchmarks
│   │       │   │   ├── index.mdx
│   │       │   │   ├── interactive.mdx
│   │       │   │   ├── introduction.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── osworld-verified.mdx
│   │       │   │   ├── screenspot-pro.mdx
│   │       │   │   └── screenspot-v2.mdx
│   │       │   ├── callbacks
│   │       │   │   ├── agent-lifecycle.mdx
│   │       │   │   ├── cost-saving.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── logging.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── pii-anonymization.mdx
│   │       │   │   └── trajectories.mdx
│   │       │   ├── chat-history.mdx
│   │       │   ├── custom-tools.mdx
│   │       │   ├── customizing-computeragent.mdx
│   │       │   ├── integrations
│   │       │   │   ├── hud.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── observability.mdx
│   │       │   ├── mcp-server
│   │       │   │   ├── client-integrations.mdx
│   │       │   │   ├── configuration.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── llm-integrations.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── tools.mdx
│   │       │   │   └── usage.mdx
│   │       │   ├── message-format.mdx
│   │       │   ├── meta.json
│   │       │   ├── migration-guide.mdx
│   │       │   ├── prompt-caching.mdx
│   │       │   ├── supported-agents
│   │       │   │   ├── composed-agents.mdx
│   │       │   │   ├── computer-use-agents.mdx
│   │       │   │   ├── grounding-models.mdx
│   │       │   │   ├── human-in-the-loop.mdx
│   │       │   │   └── meta.json
│   │       │   ├── supported-model-providers
│   │       │   │   ├── cua-vlm-router.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   └── local-models.mdx
│   │       │   ├── telemetry.mdx
│   │       │   └── usage-tracking.mdx
│   │       ├── cli-playbook
│   │       │   ├── commands.mdx
│   │       │   ├── index.mdx
│   │       │   └── meta.json
│   │       ├── computer-sdk
│   │       │   ├── cloud-vm-management.mdx
│   │       │   ├── commands.mdx
│   │       │   ├── computer-server
│   │       │   │   ├── Commands.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── REST-API.mdx
│   │       │   │   └── WebSocket-API.mdx
│   │       │   ├── computer-ui.mdx
│   │       │   ├── computers.mdx
│   │       │   ├── custom-computer-handlers.mdx
│   │       │   ├── meta.json
│   │       │   ├── sandboxed-python.mdx
│   │       │   └── tracing-api.mdx
│   │       ├── example-usecases
│   │       │   ├── form-filling.mdx
│   │       │   ├── gemini-complex-ui-navigation.mdx
│   │       │   ├── meta.json
│   │       │   ├── post-event-contact-export.mdx
│   │       │   └── windows-app-behind-vpn.mdx
│   │       ├── get-started
│   │       │   ├── meta.json
│   │       │   └── quickstart.mdx
│   │       ├── index.mdx
│   │       ├── macos-vm-cli-playbook
│   │       │   ├── lume
│   │       │   │   ├── cli-reference.mdx
│   │       │   │   ├── faq.md
│   │       │   │   ├── http-api.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── prebuilt-images.mdx
│   │       │   ├── lumier
│   │       │   │   ├── building-lumier.mdx
│   │       │   │   ├── docker-compose.mdx
│   │       │   │   ├── docker.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   └── meta.json
│   │       │   └── meta.json
│   │       └── meta.json
│   ├── next.config.mjs
│   ├── package-lock.json
│   ├── package.json
│   ├── pnpm-lock.yaml
│   ├── postcss.config.mjs
│   ├── public
│   │   └── img
│   │       ├── agent_gradio_ui.png
│   │       ├── agent.png
│   │       ├── bg-dark.jpg
│   │       ├── bg-light.jpg
│   │       ├── cli.png
│   │       ├── computer.png
│   │       ├── grounding-with-gemini3.gif
│   │       ├── hero.png
│   │       ├── laminar_trace_example.png
│   │       ├── som_box_threshold.png
│   │       └── som_iou_threshold.png
│   ├── README.md
│   ├── source.config.ts
│   ├── src
│   │   ├── app
│   │   │   ├── (home)
│   │   │   │   ├── [[...slug]]
│   │   │   │   │   └── page.tsx
│   │   │   │   └── layout.tsx
│   │   │   ├── api
│   │   │   │   ├── posthog
│   │   │   │   │   └── [...path]
│   │   │   │   │       └── route.ts
│   │   │   │   └── search
│   │   │   │       └── route.ts
│   │   │   ├── favicon.ico
│   │   │   ├── global.css
│   │   │   ├── layout.config.tsx
│   │   │   ├── layout.tsx
│   │   │   ├── llms.mdx
│   │   │   │   └── [[...slug]]
│   │   │   │       └── route.ts
│   │   │   ├── llms.txt
│   │   │   │   └── route.ts
│   │   │   ├── robots.ts
│   │   │   └── sitemap.ts
│   │   ├── assets
│   │   │   ├── discord-black.svg
│   │   │   ├── discord-white.svg
│   │   │   ├── logo-black.svg
│   │   │   └── logo-white.svg
│   │   ├── components
│   │   │   ├── analytics-tracker.tsx
│   │   │   ├── cookie-consent.tsx
│   │   │   ├── doc-actions-menu.tsx
│   │   │   ├── editable-code-block.tsx
│   │   │   ├── footer.tsx
│   │   │   ├── hero.tsx
│   │   │   ├── iou.tsx
│   │   │   ├── mermaid.tsx
│   │   │   └── page-feedback.tsx
│   │   ├── lib
│   │   │   ├── llms.ts
│   │   │   └── source.ts
│   │   ├── mdx-components.tsx
│   │   └── providers
│   │       └── posthog-provider.tsx
│   └── tsconfig.json
├── examples
│   ├── agent_examples.py
│   ├── agent_ui_examples.py
│   ├── browser_tool_example.py
│   ├── cloud_api_examples.py
│   ├── computer_examples_windows.py
│   ├── computer_examples.py
│   ├── computer_ui_examples.py
│   ├── computer-example-ts
│   │   ├── .env.example
│   │   ├── .gitignore
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── README.md
│   │   ├── src
│   │   │   ├── helpers.ts
│   │   │   └── index.ts
│   │   └── tsconfig.json
│   ├── docker_examples.py
│   ├── evals
│   │   ├── hud_eval_examples.py
│   │   └── wikipedia_most_linked.txt
│   ├── pylume_examples.py
│   ├── sandboxed_functions_examples.py
│   ├── som_examples.py
│   ├── tracing_examples.py
│   ├── utils.py
│   └── winsandbox_example.py
├── img
│   ├── agent_gradio_ui.png
│   ├── agent.png
│   ├── cli.png
│   ├── computer.png
│   ├── logo_black.png
│   └── logo_white.png
├── libs
│   ├── kasm
│   │   ├── Dockerfile
│   │   ├── LICENSE
│   │   ├── README.md
│   │   └── src
│   │       └── ubuntu
│   │           └── install
│   │               └── firefox
│   │                   ├── custom_startup.sh
│   │                   ├── firefox.desktop
│   │                   └── install_firefox.sh
│   ├── lume
│   │   ├── .cursorignore
│   │   ├── CONTRIBUTING.md
│   │   ├── Development.md
│   │   ├── img
│   │   │   └── cli.png
│   │   ├── Package.resolved
│   │   ├── Package.swift
│   │   ├── README.md
│   │   ├── resources
│   │   │   └── lume.entitlements
│   │   ├── scripts
│   │   │   ├── build
│   │   │   │   ├── build-debug.sh
│   │   │   │   ├── build-release-notarized.sh
│   │   │   │   └── build-release.sh
│   │   │   └── install.sh
│   │   ├── src
│   │   │   ├── Commands
│   │   │   │   ├── Clone.swift
│   │   │   │   ├── Config.swift
│   │   │   │   ├── Create.swift
│   │   │   │   ├── Delete.swift
│   │   │   │   ├── Get.swift
│   │   │   │   ├── Images.swift
│   │   │   │   ├── IPSW.swift
│   │   │   │   ├── List.swift
│   │   │   │   ├── Logs.swift
│   │   │   │   ├── Options
│   │   │   │   │   └── FormatOption.swift
│   │   │   │   ├── Prune.swift
│   │   │   │   ├── Pull.swift
│   │   │   │   ├── Push.swift
│   │   │   │   ├── Run.swift
│   │   │   │   ├── Serve.swift
│   │   │   │   ├── Set.swift
│   │   │   │   └── Stop.swift
│   │   │   ├── ContainerRegistry
│   │   │   │   ├── ImageContainerRegistry.swift
│   │   │   │   ├── ImageList.swift
│   │   │   │   └── ImagesPrinter.swift
│   │   │   ├── Errors
│   │   │   │   └── Errors.swift
│   │   │   ├── FileSystem
│   │   │   │   ├── Home.swift
│   │   │   │   ├── Settings.swift
│   │   │   │   ├── VMConfig.swift
│   │   │   │   ├── VMDirectory.swift
│   │   │   │   └── VMLocation.swift
│   │   │   ├── LumeController.swift
│   │   │   ├── Main.swift
│   │   │   ├── Server
│   │   │   │   ├── Handlers.swift
│   │   │   │   ├── HTTP.swift
│   │   │   │   ├── Requests.swift
│   │   │   │   ├── Responses.swift
│   │   │   │   └── Server.swift
│   │   │   ├── Utils
│   │   │   │   ├── CommandRegistry.swift
│   │   │   │   ├── CommandUtils.swift
│   │   │   │   ├── Logger.swift
│   │   │   │   ├── NetworkUtils.swift
│   │   │   │   ├── Path.swift
│   │   │   │   ├── ProcessRunner.swift
│   │   │   │   ├── ProgressLogger.swift
│   │   │   │   ├── String.swift
│   │   │   │   └── Utils.swift
│   │   │   ├── Virtualization
│   │   │   │   ├── DarwinImageLoader.swift
│   │   │   │   ├── DHCPLeaseParser.swift
│   │   │   │   ├── ImageLoaderFactory.swift
│   │   │   │   └── VMVirtualizationService.swift
│   │   │   ├── VM
│   │   │   │   ├── DarwinVM.swift
│   │   │   │   ├── LinuxVM.swift
│   │   │   │   ├── VM.swift
│   │   │   │   ├── VMDetails.swift
│   │   │   │   ├── VMDetailsPrinter.swift
│   │   │   │   ├── VMDisplayResolution.swift
│   │   │   │   └── VMFactory.swift
│   │   │   └── VNC
│   │   │       ├── PassphraseGenerator.swift
│   │   │       └── VNCService.swift
│   │   └── tests
│   │       ├── Mocks
│   │       │   ├── MockVM.swift
│   │       │   ├── MockVMVirtualizationService.swift
│   │       │   └── MockVNCService.swift
│   │       ├── VM
│   │       │   └── VMDetailsPrinterTests.swift
│   │       ├── VMTests.swift
│   │       ├── VMVirtualizationServiceTests.swift
│   │       └── VNCServiceTests.swift
│   ├── lumier
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   └── src
│   │       ├── bin
│   │       │   └── entry.sh
│   │       ├── config
│   │       │   └── constants.sh
│   │       ├── hooks
│   │       │   └── on-logon.sh
│   │       └── lib
│   │           ├── utils.sh
│   │           └── vm.sh
│   ├── python
│   │   ├── agent
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── agent
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── adapters
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cua_adapter.py
│   │   │   │   │   ├── huggingfacelocal_adapter.py
│   │   │   │   │   ├── human_adapter.py
│   │   │   │   │   ├── mlxvlm_adapter.py
│   │   │   │   │   └── models
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── generic.py
│   │   │   │   │       ├── internvl.py
│   │   │   │   │       ├── opencua.py
│   │   │   │   │       └── qwen2_5_vl.py
│   │   │   │   ├── agent.py
│   │   │   │   ├── callbacks
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── budget_manager.py
│   │   │   │   │   ├── image_retention.py
│   │   │   │   │   ├── logging.py
│   │   │   │   │   ├── operator_validator.py
│   │   │   │   │   ├── pii_anonymization.py
│   │   │   │   │   ├── prompt_instructions.py
│   │   │   │   │   ├── telemetry.py
│   │   │   │   │   └── trajectory_saver.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── computers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cua.py
│   │   │   │   │   └── custom.py
│   │   │   │   ├── decorators.py
│   │   │   │   ├── human_tool
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── server.py
│   │   │   │   │   └── ui.py
│   │   │   │   ├── integrations
│   │   │   │   │   └── hud
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── agent.py
│   │   │   │   │       └── proxy.py
│   │   │   │   ├── loops
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── anthropic.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── composed_grounded.py
│   │   │   │   │   ├── gelato.py
│   │   │   │   │   ├── gemini.py
│   │   │   │   │   ├── generic_vlm.py
│   │   │   │   │   ├── glm45v.py
│   │   │   │   │   ├── gta1.py
│   │   │   │   │   ├── holo.py
│   │   │   │   │   ├── internvl.py
│   │   │   │   │   ├── model_types.csv
│   │   │   │   │   ├── moondream3.py
│   │   │   │   │   ├── omniparser.py
│   │   │   │   │   ├── openai.py
│   │   │   │   │   ├── opencua.py
│   │   │   │   │   ├── uiins.py
│   │   │   │   │   ├── uitars.py
│   │   │   │   │   └── uitars2.py
│   │   │   │   ├── proxy
│   │   │   │   │   ├── examples.py
│   │   │   │   │   └── handlers.py
│   │   │   │   ├── responses.py
│   │   │   │   ├── tools
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── browser_tool.py
│   │   │   │   ├── types.py
│   │   │   │   └── ui
│   │   │   │       ├── __init__.py
│   │   │   │       ├── __main__.py
│   │   │   │       └── gradio
│   │   │   │           ├── __init__.py
│   │   │   │           ├── app.py
│   │   │   │           └── ui_components.py
│   │   │   ├── benchmarks
│   │   │   │   ├── .gitignore
│   │   │   │   ├── contrib.md
│   │   │   │   ├── interactive.py
│   │   │   │   ├── models
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   └── gta1.py
│   │   │   │   ├── README.md
│   │   │   │   ├── ss-pro.py
│   │   │   │   ├── ss-v2.py
│   │   │   │   └── utils.py
│   │   │   ├── example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_computer_agent.py
│   │   ├── bench-ui
│   │   │   ├── bench_ui
│   │   │   │   ├── __init__.py
│   │   │   │   ├── api.py
│   │   │   │   └── child.py
│   │   │   ├── examples
│   │   │   │   ├── folder_example.py
│   │   │   │   ├── gui
│   │   │   │   │   ├── index.html
│   │   │   │   │   ├── logo.svg
│   │   │   │   │   └── styles.css
│   │   │   │   ├── output_overlay.png
│   │   │   │   └── simple_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       └── test_port_detection.py
│   │   ├── computer
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer
│   │   │   │   ├── __init__.py
│   │   │   │   ├── computer.py
│   │   │   │   ├── diorama_computer.py
│   │   │   │   ├── helpers.py
│   │   │   │   ├── interface
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── logger.py
│   │   │   │   ├── models.py
│   │   │   │   ├── providers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cloud
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── docker
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── lume
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── lume_api.py
│   │   │   │   │   ├── lumier
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── types.py
│   │   │   │   │   └── winsandbox
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── provider.py
│   │   │   │   │       └── setup_script.ps1
│   │   │   │   ├── tracing_wrapper.py
│   │   │   │   ├── tracing.py
│   │   │   │   ├── ui
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   └── gradio
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── app.py
│   │   │   │   └── utils.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_computer.py
│   │   ├── computer-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── browser.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── diorama
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── diorama_computer.py
│   │   │   │   │   ├── diorama.py
│   │   │   │   │   ├── draw.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── safezone.py
│   │   │   │   ├── handlers
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── main.py
│   │   │   │   ├── server.py
│   │   │   │   ├── utils
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── wallpaper.py
│   │   │   │   └── watchdog.py
│   │   │   ├── examples
│   │   │   │   ├── __init__.py
│   │   │   │   └── usage_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   ├── run_server.py
│   │   │   ├── test_connection.py
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_server.py
│   │   ├── core
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── core
│   │   │   │   ├── __init__.py
│   │   │   │   └── telemetry
│   │   │   │       ├── __init__.py
│   │   │   │       └── posthog.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_telemetry.py
│   │   ├── mcp-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── build-extension.py
│   │   │   ├── CONCURRENT_SESSIONS.md
│   │   │   ├── desktop-extension
│   │   │   │   ├── cua-extension.mcpb
│   │   │   │   ├── desktop_extension.png
│   │   │   │   ├── manifest.json
│   │   │   │   ├── README.md
│   │   │   │   ├── requirements.txt
│   │   │   │   ├── run_server.sh
│   │   │   │   └── setup.py
│   │   │   ├── mcp_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── server.py
│   │   │   │   └── session_manager.py
│   │   │   ├── pdm.lock
│   │   │   ├── pyproject.toml
│   │   │   ├── QUICK_TEST_COMMANDS.sh
│   │   │   ├── quick_test_local_option.py
│   │   │   ├── README.md
│   │   │   ├── scripts
│   │   │   │   ├── install_mcp_server.sh
│   │   │   │   └── start_mcp_server.sh
│   │   │   ├── test_mcp_server_local_option.py
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_mcp_server.py
│   │   ├── pylume
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_pylume.py
│   │   └── som
│   │       ├── .bumpversion.cfg
│   │       ├── LICENSE
│   │       ├── poetry.toml
│   │       ├── pyproject.toml
│   │       ├── README.md
│   │       ├── som
│   │       │   ├── __init__.py
│   │       │   ├── detect.py
│   │       │   ├── detection.py
│   │       │   ├── models.py
│   │       │   ├── ocr.py
│   │       │   ├── util
│   │       │   │   └── utils.py
│   │       │   └── visualization.py
│   │       └── tests
│   │           ├── conftest.py
│   │           └── test_omniparser.py
│   ├── qemu-docker
│   │   ├── linux
│   │   │   ├── Dockerfile
│   │   │   ├── README.md
│   │   │   └── src
│   │   │       ├── entry.sh
│   │   │       └── vm
│   │   │           ├── image
│   │   │           │   └── README.md
│   │   │           └── setup
│   │   │               ├── install.sh
│   │   │               ├── setup-cua-server.sh
│   │   │               └── setup.sh
│   │   ├── README.md
│   │   └── windows
│   │       ├── Dockerfile
│   │       ├── README.md
│   │       └── src
│   │           ├── entry.sh
│   │           └── vm
│   │               ├── image
│   │               │   └── README.md
│   │               └── setup
│   │                   ├── install.bat
│   │                   ├── on-logon.ps1
│   │                   ├── setup-cua-server.ps1
│   │                   ├── setup-utils.psm1
│   │                   └── setup.ps1
│   ├── typescript
│   │   ├── .gitignore
│   │   ├── .nvmrc
│   │   ├── agent
│   │   │   ├── examples
│   │   │   │   ├── playground-example.html
│   │   │   │   └── README.md
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── client.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   └── client.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── computer
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── computer
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── providers
│   │   │   │   │   │   ├── base.ts
│   │   │   │   │   │   ├── cloud.ts
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── base.ts
│   │   │   │   │   ├── factory.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── linux.ts
│   │   │   │   │   ├── macos.ts
│   │   │   │   │   └── windows.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   ├── computer
│   │   │   │   │   └── cloud.test.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── factory.test.ts
│   │   │   │   │   ├── index.test.ts
│   │   │   │   │   ├── linux.test.ts
│   │   │   │   │   ├── macos.test.ts
│   │   │   │   │   └── windows.test.ts
│   │   │   │   └── setup.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── core
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── index.ts
│   │   │   │   └── telemetry
│   │   │   │       ├── clients
│   │   │   │       │   ├── index.ts
│   │   │   │       │   └── posthog.ts
│   │   │   │       └── index.ts
│   │   │   ├── tests
│   │   │   │   └── telemetry.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── cua-cli
│   │   │   ├── .gitignore
│   │   │   ├── .prettierrc
│   │   │   ├── bun.lock
│   │   │   ├── CLAUDE.md
│   │   │   ├── index.ts
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── auth.ts
│   │   │   │   ├── cli.ts
│   │   │   │   ├── commands
│   │   │   │   │   ├── auth.ts
│   │   │   │   │   └── sandbox.ts
│   │   │   │   ├── config.ts
│   │   │   │   ├── http.ts
│   │   │   │   ├── storage.ts
│   │   │   │   └── util.ts
│   │   │   └── tsconfig.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── pnpm-workspace.yaml
│   │   └── README.md
│   └── xfce
│       ├── .dockerignore
│       ├── .gitignore
│       ├── Development.md
│       ├── Dockerfile
│       ├── Dockerfile.dev
│       ├── README.md
│       └── src
│           ├── scripts
│           │   ├── resize-display.sh
│           │   ├── start-computer-server.sh
│           │   ├── start-novnc.sh
│           │   ├── start-vnc.sh
│           │   └── xstartup.sh
│           ├── supervisor
│           │   └── supervisord.conf
│           └── xfce-config
│               ├── helpers.rc
│               ├── xfce4-power-manager.xml
│               └── xfce4-session.xml
├── LICENSE.md
├── Makefile
├── notebooks
│   ├── agent_nb.ipynb
│   ├── blog
│   │   ├── build-your-own-operator-on-macos-1.ipynb
│   │   └── build-your-own-operator-on-macos-2.ipynb
│   ├── composite_agents_docker_nb.ipynb
│   ├── computer_nb.ipynb
│   ├── computer_server_nb.ipynb
│   ├── customizing_computeragent.ipynb
│   ├── eval_osworld.ipynb
│   ├── ollama_nb.ipynb
│   ├── README.md
│   ├── sota_hackathon_cloud.ipynb
│   └── sota_hackathon.ipynb
├── package-lock.json
├── package.json
├── pnpm-lock.yaml
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── scripts
│   ├── install-cli.ps1
│   ├── install-cli.sh
│   ├── playground-docker.sh
│   ├── playground.sh
│   ├── run-docker-dev.sh
│   └── typescript-typecheck.js
├── TESTING.md
├── tests
│   ├── agent_loop_testing
│   │   ├── agent_test.py
│   │   └── README.md
│   ├── pytest.ini
│   ├── shell_cmd.py
│   ├── test_files.py
│   ├── test_mcp_server_session_management.py
│   ├── test_mcp_server_streaming.py
│   ├── test_shell_bash.py
│   ├── test_telemetry.py
│   ├── test_tracing.py
│   ├── test_venv.py
│   └── test_watchdog.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/libs/python/agent/agent/adapters/huggingfacelocal_adapter.py:
--------------------------------------------------------------------------------

```python
  1 | import asyncio
  2 | import functools
  3 | import warnings
  4 | from concurrent.futures import ThreadPoolExecutor
  5 | from typing import Any, AsyncIterator, Dict, Iterator, List, Optional
  6 | 
  7 | from litellm import acompletion, completion
  8 | from litellm.llms.custom_llm import CustomLLM
  9 | from litellm.types.utils import GenericStreamingChunk, ModelResponse
 10 | 
 11 | # Try to import HuggingFace dependencies
 12 | try:
 13 |     import torch
 14 |     from transformers import AutoModelForImageTextToText, AutoProcessor
 15 | 
 16 |     HF_AVAILABLE = True
 17 | except ImportError:
 18 |     HF_AVAILABLE = False
 19 | 
 20 | from .models import load_model as load_model_handler
 21 | 
 22 | 
 23 | class HuggingFaceLocalAdapter(CustomLLM):
 24 |     """HuggingFace Local Adapter for running vision-language models locally."""
 25 | 
 26 |     def __init__(self, device: str = "auto", trust_remote_code: bool = False, **kwargs):
 27 |         """Initialize the adapter.
 28 | 
 29 |         Args:
 30 |             device: Device to load model on ("auto", "cuda", "cpu", etc.)
 31 |             trust_remote_code: Whether to trust remote code
 32 |             **kwargs: Additional arguments
 33 |         """
 34 |         super().__init__()
 35 |         self.device = device
 36 |         self.trust_remote_code = trust_remote_code
 37 |         # Cache for model handlers keyed by model_name
 38 |         self._handlers: Dict[str, Any] = {}
 39 |         self._executor = ThreadPoolExecutor(max_workers=1)  # Single thread pool
 40 | 
 41 |     def _get_handler(self, model_name: str):
 42 |         """Get or create a model handler for the given model name."""
 43 |         if model_name not in self._handlers:
 44 |             self._handlers[model_name] = load_model_handler(
 45 |                 model_name=model_name, device=self.device, trust_remote_code=self.trust_remote_code
 46 |             )
 47 |         return self._handlers[model_name]
 48 | 
 49 |     def _convert_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 50 |         """Convert OpenAI format messages to HuggingFace format.
 51 | 
 52 |         Args:
 53 |             messages: Messages in OpenAI format
 54 | 
 55 |         Returns:
 56 |             Messages in HuggingFace format
 57 |         """
 58 |         converted_messages = []
 59 | 
 60 |         for message in messages:
 61 |             converted_message = {"role": message["role"], "content": []}
 62 | 
 63 |             content = message.get("content", [])
 64 |             if isinstance(content, str):
 65 |                 # Simple text content
 66 |                 converted_message["content"].append({"type": "text", "text": content})
 67 |             elif isinstance(content, list):
 68 |                 # Multi-modal content
 69 |                 for item in content:
 70 |                     if item.get("type") == "text":
 71 |                         converted_message["content"].append(
 72 |                             {"type": "text", "text": item.get("text", "")}
 73 |                         )
 74 |                     elif item.get("type") == "image_url":
 75 |                         # Convert image_url format to image format
 76 |                         image_url = item.get("image_url", {}).get("url", "")
 77 |                         converted_message["content"].append({"type": "image", "image": image_url})
 78 | 
 79 |             converted_messages.append(converted_message)
 80 | 
 81 |         return converted_messages
 82 | 
 83 |     def _generate(self, **kwargs) -> str:
 84 |         """Generate response using the local HuggingFace model.
 85 | 
 86 |         Args:
 87 |             **kwargs: Keyword arguments containing messages and model info
 88 | 
 89 |         Returns:
 90 |             Generated text response
 91 |         """
 92 |         if not HF_AVAILABLE:
 93 |             raise ImportError(
 94 |                 "HuggingFace transformers dependencies not found. "
 95 |                 'Please install with: pip install "cua-agent[uitars-hf]"'
 96 |             )
 97 | 
 98 |         # Extract messages and model from kwargs
 99 |         messages = kwargs.get("messages", [])
100 |         model_name = kwargs.get("model", "ByteDance-Seed/UI-TARS-1.5-7B")
101 |         max_new_tokens = kwargs.get("max_tokens", 128)
102 | 
103 |         # Warn about ignored kwargs
104 |         ignored_kwargs = set(kwargs.keys()) - {"messages", "model", "max_tokens"}
105 |         if ignored_kwargs:
106 |             warnings.warn(f"Ignoring unsupported kwargs: {ignored_kwargs}")
107 | 
108 |         # Convert messages to HuggingFace format
109 |         hf_messages = self._convert_messages(messages)
110 | 
111 |         # Delegate to model handler
112 |         handler = self._get_handler(model_name)
113 |         generated_text = handler.generate(hf_messages, max_new_tokens=max_new_tokens)
114 |         return generated_text
115 | 
116 |     def completion(self, *args, **kwargs) -> ModelResponse:
117 |         """Synchronous completion method.
118 | 
119 |         Returns:
120 |             ModelResponse with generated text
121 |         """
122 |         generated_text = self._generate(**kwargs)
123 | 
124 |         return completion(
125 |             model=f"huggingface-local/{kwargs['model']}",
126 |             mock_response=generated_text,
127 |         )
128 | 
129 |     async def acompletion(self, *args, **kwargs) -> ModelResponse:
130 |         """Asynchronous completion method.
131 | 
132 |         Returns:
133 |             ModelResponse with generated text
134 |         """
135 |         # Run _generate in thread pool to avoid blocking
136 |         loop = asyncio.get_event_loop()
137 |         generated_text = await loop.run_in_executor(
138 |             self._executor, functools.partial(self._generate, **kwargs)
139 |         )
140 | 
141 |         return await acompletion(
142 |             model=f"huggingface-local/{kwargs['model']}",
143 |             mock_response=generated_text,
144 |         )
145 | 
146 |     def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]:
147 |         """Synchronous streaming method.
148 | 
149 |         Returns:
150 |             Iterator of GenericStreamingChunk
151 |         """
152 |         generated_text = self._generate(**kwargs)
153 | 
154 |         generic_streaming_chunk: GenericStreamingChunk = {
155 |             "finish_reason": "stop",
156 |             "index": 0,
157 |             "is_finished": True,
158 |             "text": generated_text,
159 |             "tool_use": None,
160 |             "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0},
161 |         }
162 | 
163 |         yield generic_streaming_chunk
164 | 
165 |     async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]:
166 |         """Asynchronous streaming method.
167 | 
168 |         Returns:
169 |             AsyncIterator of GenericStreamingChunk
170 |         """
171 |         # Run _generate in thread pool to avoid blocking
172 |         loop = asyncio.get_event_loop()
173 |         generated_text = await loop.run_in_executor(
174 |             self._executor, functools.partial(self._generate, **kwargs)
175 |         )
176 | 
177 |         generic_streaming_chunk: GenericStreamingChunk = {
178 |             "finish_reason": "stop",
179 |             "index": 0,
180 |             "is_finished": True,
181 |             "text": generated_text,
182 |             "tool_use": None,
183 |             "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0},
184 |         }
185 | 
186 |         yield generic_streaming_chunk
187 | 
```

--------------------------------------------------------------------------------
/blog/cua-vlm-router.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Cua VLM Router: One Provider for All Your Computer-Use Models
  2 | 
  3 | If you've been building computer-use agents, you know the reality: every model provider has its own specification and deployment process. Anthropic has one API format, OpenAI another, Google something else entirely. Want to try a Hugging Face model? That's a completely different setup. Self-hosting? Even more complexity. Each provider requires learning their specific API, managing their credentials, and adapting your code to their particular requirements.
  4 | 
  5 | Today we're launching the **Cua VLM Router**: a managed inference API that gives you unified access to multiple vision-language model providers through a single API key. We're starting with Anthropic's Claude models (Sonnet 4.5 and Haiku 4.5)—some of the most loved and widely-used computer-use models in the Cua ecosystem - with more providers coming soon.
  6 | 
  7 | ![Cua VLM Router Banner](https://github.com/user-attachments/assets/1b978f62-2cae-4cf7-932a-55ac8c8f2e06)
  8 | 
  9 | ## What You Get
 10 | 
 11 | The Cua VLM Router handles the infrastructure so you can focus on building:
 12 | 
 13 | **Single API Key**
 14 | 
 15 | - One key for all model providers (no juggling multiple credentials)
 16 | - Works for both model inference and sandbox access
 17 | - Manage everything from one dashboard at cua.ai
 18 | 
 19 | **Smart Routing**
 20 | 
 21 | - Automatic provider selection for optimal availability and performance
 22 | - For Anthropic models, we route to the best provider (Anthropic, AWS Bedrock, or Microsoft Foundry)
 23 | - No configuration needed—just specify the model and we handle the rest
 24 | 
 25 | **Cost Tracking & Optimization**
 26 | 
 27 | - Unified usage dashboard across all models
 28 | - Real-time credit balance tracking
 29 | - Detailed cost breakdown per request (gateway cost + upstream cost)
 30 | 
 31 | **Production-Ready**
 32 | 
 33 | - OpenAI-compatible API (drop-in replacement for existing code)
 34 | - Full streaming support with Server-Sent Events
 35 | - Metadata about routing decisions in every response
 36 | 
 37 | ## Available Models (Launch)
 38 | 
 39 | We're starting with Anthropic's latest Claude models:
 40 | 
 41 | | Model                             | Best For                           |
 42 | | --------------------------------- | ---------------------------------- |
 43 | | `cua/anthropic/claude-sonnet-4.5` | General-purpose tasks, recommended |
 44 | | `cua/anthropic/claude-haiku-4.5`  | Fast responses, cost-effective     |
 45 | 
 46 | ## How It Works
 47 | 
 48 | When you request an Anthropic model through Cua, we automatically route to the best available provider—whether that's Anthropic directly, AWS Bedrock, or Microsoft Foundry. You just specify `cua/anthropic/claude-sonnet-4.5`, and we handle the provider selection, failover, and optimization behind the scenes. No need to manage multiple accounts or implement fallback logic yourself.
 49 | 
 50 | ## Getting Started
 51 | 
 52 | Sign up at [cua.ai/signin](https://cua.ai/signin) and create your API key from **Dashboard > API Keys > New API Key** (save it immediately—you won't see it again).
 53 | 
 54 | Use it with the Agent SDK (make sure to set your environment variable):
 55 | 
 56 | ```python
 57 | import asyncio
 58 | from agent import ComputerAgent
 59 | from computer import Computer
 60 | 
 61 | async def main():
 62 |   # Initialize cloud computer
 63 |   computer = Computer(
 64 |     os_type="linux",
 65 |     provider_type="cloud",
 66 |     name="your-container-name",
 67 |     api_key="your-cua-api-key"
 68 |   )
 69 | 
 70 |   # Initialize agent with Claude Sonnet 4.5
 71 |   agent = ComputerAgent(
 72 |     tools=[computer],
 73 |     model="cua/anthropic/claude-sonnet-4.5",
 74 |     api_key="your-cua-api-key",
 75 |     instructions="You are a helpful assistant that can control computers",
 76 |     only_n_most_recent_images=3
 77 |   )
 78 | 
 79 |   # Run a task
 80 |   async for result in agent.run("Open a browser and search for Python tutorials"):
 81 |     print(result)
 82 | 
 83 | if __name__ == "__main__":
 84 |   asyncio.run(main())
 85 | ```
 86 | 
 87 | ## Migration is Simple
 88 | 
 89 | Already using Anthropic directly? Just add the `cua/` prefix:
 90 | 
 91 | **Before:**
 92 | 
 93 | ```python
 94 | export ANTHROPIC_API_KEY="sk-ant-..."
 95 | agent = ComputerAgent(model="anthropic/claude-sonnet-4-5-20250929")
 96 | ```
 97 | 
 98 | **After:**
 99 | 
100 | ```python
101 | export CUA_API_KEY="sk_cua-api01_..."
102 | agent = ComputerAgent(model="cua/anthropic/claude-sonnet-4.5")
103 | ```
104 | 
105 | Same code structure. No other changes needed.
106 | 
107 | ## Direct API Access
108 | 
109 | The router exposes an OpenAI-compatible API at `https://inference.cua.ai/v1`:
110 | 
111 | ```bash
112 | curl -X POST https://inference.cua.ai/v1/chat/completions \
113 |   -H "Authorization: Bearer ${CUA_API_KEY}" \
114 |   -H "Content-Type: application/json" \
115 |   -d '{
116 |     "model": "anthropic/claude-sonnet-4.5",
117 |     "messages": [{"role": "user", "content": "Hello!"}],
118 |     "stream": true
119 |   }'
120 | ```
121 | 
122 | Works with any OpenAI-compatible client library.
123 | 
124 | ## FAQs
125 | 
126 | <details>
127 | <summary><strong>Do I still need provider API keys?</strong></summary>
128 | 
129 | No. Cua manages all provider API keys and infrastructure. You only need one Cua API key for everything—model inference and sandbox access.
130 | 
131 | </details>
132 | 
133 | <details>
134 | <summary><strong>How does pricing work?</strong></summary>
135 | 
136 | Requests are billed in credits, deducted from your Cua account balance. Every response includes both the Cua gateway cost and the actual upstream API cost for transparency.
137 | 
138 | </details>
139 | 
140 | <details>
141 | <summary><strong>Can I still use my own Anthropic key (BYOK)?</strong></summary>
142 | 
143 | Yes. The agent SDK still supports direct provider access. Just use `anthropic/claude-sonnet-4-5-20250929` instead of the `cua/` prefix and set your `ANTHROPIC_API_KEY`. See [Supported Model Providers](https://cua.ai/docs/agent-sdk/supported-model-providers/) for details.
144 | 
145 | </details>
146 | 
147 | <details>
148 | <summary><strong>What about other providers?</strong></summary>
149 | 
150 | We're starting with Anthropic and adding more providers based on what people actually use. Request access to specific models in [Discord](https://discord.gg/cua-ai).
151 | 
152 | </details>
153 | 
154 | <details>
155 | <summary><strong>Does streaming work?</strong></summary>
156 | 
157 | Yes. Set `"stream": true` in your request to receive Server-Sent Events. Works identically to OpenAI's streaming API.
158 | 
159 | </details>
160 | 
161 | ## What's Next
162 | 
163 | This is just the beginning. We're actively iterating based on feedback:
164 | 
165 | - Additional model providers
166 | - Custom model routing rules
167 | - Usage alerts and budget controls
168 | - Team collaboration features
169 | 
170 | If there's a model or feature you need, let us know in [Discord](https://discord.gg/cua-ai).
171 | 
172 | ## Need Help?
173 | 
174 | - **Documentation**: [cua.ai/docs/agent-sdk/supported-model-providers/cua-vlm-router](https://cua.ai/docs/agent-sdk/supported-model-providers/cua-vlm-router)
175 | - **Quickstart Guide**: [cua.ai/docs/get-started/quickstart](https://cua.ai/docs/get-started/quickstart)
176 | - **Discord Community**: [discord.gg/cua-ai](https://discord.gg/cua-ai)
177 | 
178 | ---
179 | 
180 | Get started at [cua.ai](https://cua.ai) or check out the [VLM Router docs](https://cua.ai/docs/agent-sdk/supported-model-providers/cua-vlm-router).
181 | 
```

--------------------------------------------------------------------------------
/tests/test_venv.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Virtual Environment Testing Module
  3 | This module tests the ability to execute python code in a virtual environment within Cua Containers.
  4 | 
  5 | Required environment variables:
  6 | - CUA_API_KEY: API key for Cua cloud provider
  7 | - CUA_CONTAINER_NAME: Name of the container to use
  8 | """
  9 | 
 10 | import asyncio
 11 | import os
 12 | import sys
 13 | import traceback
 14 | from pathlib import Path
 15 | 
 16 | import pytest
 17 | 
 18 | # Load environment variables from .env file
 19 | project_root = Path(__file__).parent.parent
 20 | env_file = project_root / ".env"
 21 | print(f"Loading environment from: {env_file}")
 22 | from dotenv import load_dotenv
 23 | 
 24 | load_dotenv(env_file)
 25 | 
 26 | # Add paths to sys.path if needed
 27 | pythonpath = os.environ.get("PYTHONPATH", "")
 28 | for path in pythonpath.split(":"):
 29 |     if path and path not in sys.path:
 30 |         sys.path.insert(0, path)  # Insert at beginning to prioritize
 31 |         print(f"Added to sys.path: {path}")
 32 | 
 33 | from computer import Computer, VMProviderType
 34 | from computer.helpers import sandboxed, set_default_computer
 35 | 
 36 | 
 37 | @pytest.fixture(scope="session")
 38 | async def computer():
 39 |     """Shared Computer instance for all test cases."""
 40 |     # Create a remote Linux computer with Cua
 41 |     computer = Computer(
 42 |         os_type="linux",
 43 |         api_key=os.getenv("CUA_API_KEY"),
 44 |         name=str(os.getenv("CUA_CONTAINER_NAME")),
 45 |         provider_type=VMProviderType.CLOUD,
 46 |     )
 47 | 
 48 |     # # Create a local macOS computer with Cua
 49 |     # computer = Computer()
 50 | 
 51 |     try:
 52 |         await computer.run()
 53 |         yield computer
 54 |     finally:
 55 |         await computer.disconnect()
 56 | 
 57 | 
 58 | # Sample test cases
 59 | @pytest.mark.asyncio(loop_scope="session")
 60 | async def test_venv_install(computer):
 61 |     """Test virtual environment creation and package installation."""
 62 |     # Create a test virtual environment and install requests
 63 |     stdout, _ = await computer.venv_install("test_env", ["requests"])
 64 | 
 65 |     # Check that installation was successful (no major errors)
 66 |     assert "Successfully installed" in stdout or "Requirement already satisfied" in stdout
 67 | 
 68 | 
 69 | @pytest.mark.asyncio(loop_scope="session")
 70 | async def test_venv_cmd(computer):
 71 |     """Test executing shell commands in virtual environment."""
 72 |     # Test Python version check
 73 |     stdout, _ = await computer.venv_cmd("test_env", "python --version")
 74 | 
 75 |     assert "Python" in stdout
 76 | 
 77 | 
 78 | @pytest.mark.asyncio(loop_scope="session")
 79 | async def test_venv_exec(computer):
 80 |     """Test executing Python functions in virtual environment."""
 81 | 
 82 |     def test_function(message="Hello World"):
 83 |         import sys
 84 | 
 85 |         return f"Python {sys.version_info.major}.{sys.version_info.minor}: {message}"
 86 | 
 87 |     result = await computer.venv_exec("test_env", test_function, message="Test successful!")
 88 | 
 89 |     assert "Python" in result
 90 |     assert "Test successful!" in result
 91 | 
 92 | 
 93 | @pytest.mark.asyncio(loop_scope="session")
 94 | async def test_venv_exec_with_package(computer):
 95 |     """Test executing Python functions that use installed packages."""
 96 | 
 97 |     def test_requests():
 98 |         import requests
 99 | 
100 |         return f"requests version: {requests.__version__}"
101 | 
102 |     result = await computer.venv_exec("test_env", test_requests)
103 | 
104 |     assert "requests version:" in result
105 | 
106 | 
107 | @pytest.mark.asyncio(loop_scope="session")
108 | async def test_venv_exec_error_handling(computer):
109 |     """Test error handling in venv_exec."""
110 | 
111 |     def test_error():
112 |         raise ValueError("This is a test error")
113 | 
114 |     with pytest.raises(ValueError, match="This is a test error"):
115 |         await computer.venv_exec("test_env", test_error)
116 | 
117 | 
118 | @pytest.mark.asyncio(loop_scope="session")
119 | async def test_venv_exec_with_args_kwargs(computer):
120 |     """Test executing Python functions with args and kwargs that return an object."""
121 | 
122 |     def create_data_object(name, age, *hobbies, **metadata):
123 |         return {
124 |             "name": name,
125 |             "age": age,
126 |             "hobbies": list(hobbies),
127 |             "metadata": metadata,
128 |             "status": "active",
129 |         }
130 | 
131 |     args = ["Alice", 25, "reading", "coding"]
132 |     kwargs = {"location": "New York", "department": "Engineering"}
133 | 
134 |     result = await computer.venv_exec("test_env", create_data_object, *args, **kwargs)
135 | 
136 |     assert result["name"] == "Alice"
137 |     assert result["age"] == 25
138 |     assert result["hobbies"] == ["reading", "coding"]
139 |     assert result["metadata"]["location"] == "New York"
140 |     assert result["status"] == "active"
141 | 
142 | 
143 | @pytest.mark.asyncio(loop_scope="session")
144 | async def test_venv_exec_stdout_capture(computer, capfd):
145 |     """Test capturing stdout from Python functions executed in virtual environment."""
146 | 
147 |     def hello_world_function():
148 |         print("Hello World!")
149 |         return "Function completed"
150 | 
151 |     # Execute the function in the virtual environment
152 |     result = await computer.venv_exec("test_env", hello_world_function)
153 | 
154 |     # Capture stdout and stderr
155 |     out, _ = capfd.readouterr()
156 | 
157 |     # Assert the stdout contains our expected output
158 |     assert out == "Hello World!\n\n"
159 |     assert result == "Function completed"
160 | 
161 | 
162 | @pytest.mark.asyncio(loop_scope="session")
163 | async def test_remote_decorator(computer):
164 |     """Test the remote decorator from computer.helpers module."""
165 |     # Set the computer as default for the remote decorator
166 |     set_default_computer(computer)
167 | 
168 |     # Define a function with the remote decorator
169 |     @sandboxed("test_env")
170 |     def get_package_version():
171 |         import platform
172 |         import sys
173 | 
174 |         return {"python_version": sys.version, "platform": platform.platform(), "success": True}
175 | 
176 |     # Call the decorated function
177 |     result = await get_package_version()
178 | 
179 |     # Verify the function executed in the virtual environment
180 |     assert "python_version" in result
181 |     assert "platform" in result
182 |     assert result["success"] == True
183 | 
184 | 
185 | @pytest.mark.asyncio(loop_scope="session")
186 | async def test_remote_decorator_with_custom_computer(computer):
187 |     """Test the remote decorator with explicitly specified computer instance."""
188 | 
189 |     # Define a function with the remote decorator that explicitly specifies the computer
190 |     @sandboxed("test_env", computer=computer)
191 |     def get_system_info():
192 |         import os
193 |         import sys
194 | 
195 |         return {
196 |             "python_version": sys.version,
197 |             "environment_vars": dict(os.environ),
198 |             "working_directory": os.getcwd(),
199 |         }
200 | 
201 |     # Call the decorated function
202 |     result = await get_system_info()
203 | 
204 |     # Verify the function executed in the virtual environment
205 |     assert "python_version" in result
206 |     assert "environment_vars" in result
207 |     assert "working_directory" in result
208 |     # The virtual environment should have a different working directory
209 |     # than the current test process
210 |     assert result["working_directory"] != os.getcwd()
211 | 
212 | 
213 | if __name__ == "__main__":
214 |     # Run tests directly
215 |     pytest.main([__file__, "-v"])
216 | 
```

--------------------------------------------------------------------------------
/libs/python/mcp-server/quick_test_local_option.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | Quick test to verify the local desktop option logic without full setup.
  4 | 
  5 | This script tests the environment variable parsing and logic flow
  6 | without requiring VMs, computer-server, or MCP clients to be running.
  7 | """
  8 | 
  9 | import os
 10 | import sys
 11 | 
 12 | 
 13 | def test_env_var_parsing():
 14 |     """Test that environment variable is parsed correctly."""
 15 |     print("Testing CUA_USE_HOST_COMPUTER_SERVER environment variable parsing...")
 16 |     print("-" * 60)
 17 | 
 18 |     test_cases = [
 19 |         # (env_value, expected_result, description)
 20 |         ("true", True, "lowercase 'true'"),
 21 |         ("True", True, "capitalized 'True'"),
 22 |         ("TRUE", True, "uppercase 'TRUE'"),
 23 |         ("1", True, "numeric '1'"),
 24 |         ("yes", True, "lowercase 'yes'"),
 25 |         ("Yes", True, "capitalized 'Yes'"),
 26 |         ("false", False, "lowercase 'false'"),
 27 |         ("False", False, "capitalized 'False'"),
 28 |         ("FALSE", False, "uppercase 'FALSE'"),
 29 |         ("0", False, "numeric '0'"),
 30 |         ("no", False, "lowercase 'no'"),
 31 |         ("", False, "empty string"),
 32 |         ("random", False, "random value"),
 33 |         (None, False, "not set (None)"),
 34 |     ]
 35 | 
 36 |     passed = 0
 37 |     failed = 0
 38 | 
 39 |     for env_value, expected, description in test_cases:
 40 |         # Simulate the logic from session_manager.py line 59
 41 |         if env_value is None:
 42 |             actual = os.getenv("CUA_USE_HOST_COMPUTER_SERVER", "false").lower() in (
 43 |                 "true",
 44 |                 "1",
 45 |                 "yes",
 46 |             )
 47 |         else:
 48 |             os.environ["CUA_USE_HOST_COMPUTER_SERVER"] = env_value
 49 |             actual = os.getenv("CUA_USE_HOST_COMPUTER_SERVER", "false").lower() in (
 50 |                 "true",
 51 |                 "1",
 52 |                 "yes",
 53 |             )
 54 | 
 55 |         status = "✓ PASS" if actual == expected else "✗ FAIL"
 56 |         if actual == expected:
 57 |             passed += 1
 58 |         else:
 59 |             failed += 1
 60 | 
 61 |         print(
 62 |             f"{status} | Value: {env_value!r:15} | Expected: {expected!s:5} | Got: {actual!s:5} | {description}"
 63 |         )
 64 | 
 65 |     # Clean up
 66 |     os.environ.pop("CUA_USE_HOST_COMPUTER_SERVER", None)
 67 | 
 68 |     print("-" * 60)
 69 |     print(f"Results: {passed} passed, {failed} failed")
 70 |     return failed == 0
 71 | 
 72 | 
 73 | def test_session_manager_logic():
 74 |     """Test the logic flow in session_manager.py without actual Computer creation."""
 75 |     print("\nTesting session_manager.py logic flow...")
 76 |     print("-" * 60)
 77 | 
 78 |     # Read the actual session_manager.py to verify the logic
 79 |     import pathlib
 80 | 
 81 |     session_manager_path = (
 82 |         pathlib.Path(__file__).parent.parent
 83 |         / "libs"
 84 |         / "python"
 85 |         / "mcp-server"
 86 |         / "mcp_server"
 87 |         / "session_manager.py"
 88 |     )
 89 | 
 90 |     if not session_manager_path.exists():
 91 |         print(f"✗ FAIL | session_manager.py not found at {session_manager_path}")
 92 |         return False
 93 | 
 94 |     content = session_manager_path.read_text()
 95 | 
 96 |     # Check for the key logic
 97 |     checks = [
 98 |         ('os.getenv("CUA_USE_HOST_COMPUTER_SERVER"', "Environment variable check present"),
 99 |         ("use_host_computer_server=use_host", "use_host_computer_server parameter passed"),
100 |         ("Computer(", "Computer instantiation present"),
101 |     ]
102 | 
103 |     all_checks_passed = True
104 |     for check_str, description in checks:
105 |         if check_str in content:
106 |             print(f"✓ PASS | {description}")
107 |         else:
108 |             print(f"✗ FAIL | {description} - not found")
109 |             all_checks_passed = False
110 | 
111 |     print("-" * 60)
112 |     return all_checks_passed
113 | 
114 | 
115 | def test_documentation_consistency():
116 |     """Verify documentation mentions the new feature."""
117 |     print("\nTesting documentation consistency...")
118 |     print("-" * 60)
119 | 
120 |     import pathlib
121 | 
122 |     docs_to_check = [
123 |         ("configuration.mdx", "CUA_USE_HOST_COMPUTER_SERVER"),
124 |         ("usage.mdx", "Targeting Your Local Desktop"),
125 |     ]
126 | 
127 |     docs_path = (
128 |         pathlib.Path(__file__).parent.parent
129 |         / "docs"
130 |         / "content"
131 |         / "docs"
132 |         / "libraries"
133 |         / "mcp-server"
134 |     )
135 | 
136 |     all_docs_ok = True
137 |     for doc_file, expected_content in docs_to_check:
138 |         doc_path = docs_path / doc_file
139 |         if not doc_path.exists():
140 |             print(f"✗ FAIL | {doc_file} not found")
141 |             all_docs_ok = False
142 |             continue
143 | 
144 |         content = doc_path.read_text()
145 |         if expected_content in content:
146 |             print(f"✓ PASS | {doc_file} contains '{expected_content}'")
147 |         else:
148 |             print(f"✗ FAIL | {doc_file} missing '{expected_content}'")
149 |             all_docs_ok = False
150 | 
151 |     print("-" * 60)
152 |     return all_docs_ok
153 | 
154 | 
155 | def print_usage_examples():
156 |     """Print usage examples for both modes."""
157 |     print("\n" + "=" * 60)
158 |     print("USAGE EXAMPLES")
159 |     print("=" * 60)
160 | 
161 |     print("\n1. DEFAULT MODE (VM):")
162 |     print("-" * 60)
163 |     print(
164 |         """
165 | {
166 |   "mcpServers": {
167 |     "cua-agent": {
168 |       "command": "/bin/bash",
169 |       "args": ["~/.cua/start_mcp_server.sh"],
170 |       "env": {
171 |         "CUA_MODEL_NAME": "anthropic/claude-sonnet-4-5-20250929"
172 |       }
173 |     }
174 |   }
175 | }
176 | 
177 | Note: CUA_USE_HOST_COMPUTER_SERVER is not set, so VM mode is used (safe).
178 | """
179 |     )
180 | 
181 |     print("\n2. LOCAL DESKTOP MODE:")
182 |     print("-" * 60)
183 |     print(
184 |         """
185 | Step 1: Start computer-server locally:
186 |     python -m computer_server
187 | 
188 | Step 2: Configure MCP client:
189 | {
190 |   "mcpServers": {
191 |     "cua-agent": {
192 |       "command": "/bin/bash",
193 |       "args": ["~/.cua/start_mcp_server.sh"],
194 |       "env": {
195 |         "CUA_MODEL_NAME": "anthropic/claude-sonnet-4-5-20250929",
196 |         "CUA_USE_HOST_COMPUTER_SERVER": "true"
197 |       }
198 |     }
199 |   }
200 | }
201 | 
202 | ⚠️  WARNING: AI will have direct access to your desktop!
203 | """
204 |     )
205 | 
206 | 
207 | def main():
208 |     """Run all quick tests."""
209 |     print("=" * 60)
210 |     print("QUICK TEST: MCP Server Local Desktop Option")
211 |     print("=" * 60)
212 |     print()
213 | 
214 |     results = []
215 | 
216 |     # Run tests
217 |     results.append(("Environment Variable Parsing", test_env_var_parsing()))
218 |     results.append(("Session Manager Logic", test_session_manager_logic()))
219 |     results.append(("Documentation Consistency", test_documentation_consistency()))
220 | 
221 |     # Print summary
222 |     print("\n" + "=" * 60)
223 |     print("SUMMARY")
224 |     print("=" * 60)
225 |     for test_name, passed in results:
226 |         status = "✓ PASSED" if passed else "✗ FAILED"
227 |         print(f"{status} | {test_name}")
228 | 
229 |     all_passed = all(result for _, result in results)
230 | 
231 |     if all_passed:
232 |         print("\n🎉 All quick tests passed!")
233 |         print_usage_examples()
234 |         print("\nNext steps:")
235 |         print("1. Run full automated tests: pytest tests/test_mcp_server_local_option.py")
236 |         print("2. Follow manual testing guide: tests/MANUAL_TEST_LOCAL_OPTION.md")
237 |         return 0
238 |     else:
239 |         print("\n❌ Some tests failed. Please review the output above.")
240 |         return 1
241 | 
242 | 
243 | if __name__ == "__main__":
244 |     sys.exit(main())
245 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/agent-loops.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Agent Loops
  3 | description: Supported computer-using agent loops and models
  4 | ---
  5 | 
  6 | <Callout>
  7 |   A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/agent_nb.ipynb" target="_blank">Jupyter Notebook</a> is available for this documentation.
  8 | </Callout>
  9 | 
 10 | An agent can be thought of as a loop - it generates actions, executes them, and repeats until done:
 11 | 
 12 | 1. **Generate**: Your `model` generates `output_text`, `computer_call`, `function_call`
 13 | 2. **Execute**: The `computer` safely executes those items
 14 | 3. **Complete**: If the model has no more calls, it's done!
 15 | 
 16 | To run an agent loop simply do:
 17 | 
 18 | ```python
 19 | from agent import ComputerAgent
 20 | import asyncio
 21 | from computer import Computer
 22 | 
 23 | 
 24 | async def take_screenshot():
 25 |     async with Computer(
 26 |         os_type="linux",
 27 |         provider_type="cloud",
 28 |         name="your-sandbox-name",
 29 |         api_key="your-api-key"
 30 |     ) as computer:
 31 | 
 32 |         agent = ComputerAgent(
 33 |             model="anthropic/claude-sonnet-4-5-20250929",
 34 |             tools=[computer],
 35 |             max_trajectory_budget=5.0
 36 |         )
 37 | 
 38 |         messages = [{"role": "user", "content": "Take a screenshot and tell me what you see"}]
 39 | 
 40 |         async for result in agent.run(messages):
 41 |             for item in result["output"]:
 42 |                 if item["type"] == "message":
 43 |                     print(item["content"][0]["text"])
 44 | 
 45 | 
 46 | if __name__ == "__main__":
 47 |     asyncio.run(take_screenshot())
 48 | ```
 49 | 
 50 | For a list of supported models and configurations, see the [Supported Agents](./supported-agents/computer-use-agents) page.
 51 | 
 52 | ### Response Format
 53 | 
 54 | ```python
 55 | {
 56 |     "output": [
 57 |         {
 58 |             "type": "message",
 59 |             "role": "assistant",
 60 |             "content": [{"type": "output_text", "text": "I can see..."}]
 61 |         },
 62 |         {
 63 |             "type": "computer_call",
 64 |             "action": {"type": "screenshot"},
 65 |             "call_id": "call_123"
 66 |         },
 67 |         {
 68 |             "type": "computer_call_output",
 69 |             "call_id": "call_123",
 70 |             "output": {"image_url": "data:image/png;base64,..."}
 71 |         }
 72 |     ],
 73 |     "usage": {
 74 |         "prompt_tokens": 150,
 75 |         "completion_tokens": 75,
 76 |         "total_tokens": 225,
 77 |         "response_cost": 0.01,
 78 |     }
 79 | }
 80 | ```
 81 | 
 82 | ### Environment Variables
 83 | 
 84 | Use the following environment variables to configure the agent and its access to cloud computers and LLM providers:
 85 | 
 86 | ```bash
 87 | # Computer instance (cloud)
 88 | export CUA_SANDBOX_NAME="your-sandbox-name"
 89 | export CUA_API_KEY="your-cua-api-key"
 90 | 
 91 | # LLM API keys
 92 | export ANTHROPIC_API_KEY="your-anthropic-key"
 93 | export OPENAI_API_KEY="your-openai-key"
 94 | ```
 95 | 
 96 | ### Input and output
 97 | 
 98 | The input prompt passed to `Agent.run` can either be a string or a list of message dictionaries:
 99 | 
100 | ```python
101 | messages = [
102 |     {
103 |         "role": "user",
104 |         "content": "Take a screenshot and describe what you see"
105 |     },
106 |     {
107 |         "role": "assistant",
108 |         "content": "I'll take a screenshot for you."
109 |     }
110 | ]
111 | ```
112 | 
113 | The output is an AsyncGenerator that yields response chunks.
114 | 
115 | ### Parameters
116 | 
117 | The `ComputerAgent` constructor provides a wide range of options for customizing agent behavior, tool integration, callbacks, resource management, and more.
118 | 
119 | - `model` (`str`): Default: **required**
120 |   The LLM or agent model to use. Determines which agent loop is selected unless `custom_loop` is provided. (e.g., "claude-sonnet-4-5-20250929", "computer-use-preview", "omni+vertex_ai/gemini-pro")
121 | - `tools` (`List[Any]`):
122 |   List of tools the agent can use (e.g., `Computer`, sandboxed Python functions, etc.).
123 | - `custom_loop` (`Callable`):
124 |   Optional custom agent loop function. If provided, overrides automatic loop selection.
125 | - `only_n_most_recent_images` (`int`):
126 |   If set, only the N most recent images are kept in the message history. Useful for limiting memory usage. Automatically adds `ImageRetentionCallback`.
127 | - `callbacks` (`List[Any]`):
128 |   List of callback instances for advanced preprocessing, postprocessing, logging, or custom hooks. See [Callbacks & Extensibility](#callbacks--extensibility).
129 | - `verbosity` (`int`):
130 |   Logging level (e.g., `logging.INFO`). If set, adds a logging callback.
131 | - `trajectory_dir` (`str`):
132 |   Directory path to save full trajectory data, including screenshots and responses. Adds `TrajectorySaverCallback`.
133 | - `max_retries` (`int`): Default: `3`
134 |   Maximum number of retries for failed API calls (default: 3).
135 | - `screenshot_delay` (`float` | `int`): Default: `0.5`
136 |   Delay (in seconds) before taking screenshots (default: 0.5).
137 | - `use_prompt_caching` (`bool`): Default: `False`
138 |   Enables prompt caching for repeated prompts (mainly for Anthropic models).
139 | - `max_trajectory_budget` (`float` | `dict`):
140 |   If set (float or dict), adds a budget manager callback that tracks usage costs and stops execution if the budget is exceeded. Dict allows advanced options (e.g., `{ "max_budget": 5.0, "raise_error": True }`).
141 | - `instructions` (`str` | `list[str]`):
142 |   System instructions for the agent. Can be a single string or multiple strings in a tuple/list for readability; they are concatenated into one system prompt.
143 | - `api_key` (`str`):
144 |   Optional API key override for the model provider.
145 | - `api_base` (`str`):
146 |   Optional API base URL override for the model provider.
147 | - `**additional_generation_kwargs` (`any`):
148 |   Any additional keyword arguments are passed through to the agent loop or model provider.
149 | 
150 | **Example with advanced options:**
151 | 
152 | ```python
153 | from agent import ComputerAgent
154 | from computer import Computer
155 | from agent.callbacks import ImageRetentionCallback
156 | 
157 | agent = ComputerAgent(
158 |     model="anthropic/claude-sonnet-4-5-20250929",
159 |     tools=[Computer(...)],
160 |     only_n_most_recent_images=3,
161 |     callbacks=[ImageRetentionCallback(only_n_most_recent_images=3)],
162 |     verbosity=logging.INFO,
163 |     trajectory_dir="trajectories",
164 |     max_retries=5,
165 |     screenshot_delay=1.0,
166 |     use_prompt_caching=True,
167 |     max_trajectory_budget={"max_budget": 5.0, "raise_error": True},
168 |     instructions=(
169 |         "You are a helpful computer-using agent"
170 |         "Output computer calls until you complete the given task"
171 |     ),
172 |     api_key="your-api-key",
173 |     api_base="https://your-api-base.com/v1",
174 | )
175 | ```
176 | 
177 | ### Streaming Responses
178 | 
179 | ```python
180 | async for result in agent.run(messages, stream=True):
181 |     # Process streaming chunks
182 |     for item in result["output"]:
183 |         if item["type"] == "message":
184 |             print(item["content"][0]["text"], end="", flush=True)
185 |         elif item["type"] == "computer_call":
186 |             action = item["action"]
187 |             print(f"\n[Action: {action['type']}]")
188 | ```
189 | 
190 | ### Error Handling
191 | 
192 | ```python
193 | try:
194 |     async for result in agent.run(messages):
195 |         # Process results
196 |         pass
197 | except BudgetExceededException:
198 |     print("Budget limit exceeded")
199 | except Exception as e:
200 |     print(f"Agent error: {e}")
201 | ```
202 | 
```

--------------------------------------------------------------------------------
/scripts/install-cli.sh:
--------------------------------------------------------------------------------

```bash
  1 | #!/bin/bash
  2 | set -e
  3 | 
  4 | # CUA CLI Installation Script for macOS/Linux
  5 | echo "🚀 Installing CUA CLI..."
  6 | 
  7 | # Function to print success message
  8 | print_success() {
  9 |     local bin_path="$1"
 10 |     local version="$2"
 11 |     local config_file="$3"
 12 |     
 13 |     printf "\033[32m✅  CUA CLI %s was installed successfully to %s\033[0m\n" "$version" "$bin_path"
 14 |     printf "\033[90mAdded \"%s\" to \$PATH in \"%s\"\033[0m\n" "$bin_path" "$config_file"
 15 |     printf "\n\033[90mTo get started, run:\033[0m\n"
 16 |     printf "  source %s\n" "$config_file"
 17 |     printf "  cua --help\n"
 18 |     printf "\033[90m📚 For more help, visit: https://docs.cua.ai/libraries/cua-cli\033[0m\n"
 19 | }
 20 | 
 21 | # Function to install with bun as fallback
 22 | install_with_bun() {
 23 |     echo "📦 Installing CUA CLI using Bun..."
 24 |     
 25 |     # Check if bun is already installed
 26 |     if ! command -v bun &> /dev/null; then
 27 |         echo "📦 Installing Bun..."
 28 |         curl -fsSL https://bun.sh/install | bash
 29 |         
 30 |         # Source the shell profile to make bun available
 31 |         if [ -f "$HOME/.bashrc" ]; then
 32 |             source "$HOME/.bashrc"
 33 |         elif [ -f "$HOME/.zshrc" ]; then
 34 |             source "$HOME/.zshrc"
 35 |         fi
 36 |         
 37 |         # Add bun to PATH for this session
 38 |         export PATH="$HOME/.bun/bin:$PATH"
 39 |     fi
 40 | 
 41 |     # Verify bun installation
 42 |     if ! command -v bun &> /dev/null; then
 43 |         echo "❌ Failed to install Bun. Please install manually from https://bun.sh"
 44 |         exit 1
 45 |     fi
 46 | 
 47 |     echo "📦 Installing CUA CLI..."
 48 |     if ! bun add -g @trycua/cli; then
 49 |         echo "❌ Failed to install with Bun, trying npm..."
 50 |         if ! npm install -g @trycua/cli; then
 51 |             echo "❌ Installation failed. Please try installing manually:"
 52 |             echo "   npm install -g @trycua/cli"
 53 |             exit 1
 54 |         fi
 55 |     fi
 56 | 
 57 |     # Verify installation
 58 |     if command -v cua &> /dev/null; then
 59 |         # Determine which config file was updated
 60 |         local config_file="$HOME/.bashrc"
 61 |         if [ -f "$HOME/.zshrc" ]; then
 62 |             config_file="$HOME/.zshrc"
 63 |         elif [ -f "$HOME/.profile" ]; then
 64 |             config_file="$HOME/.profile"
 65 |         fi
 66 |         # Determine installed version via npm registry (fallback to unknown)
 67 |         local VERSION_BUN
 68 |         VERSION_BUN=$(npm view @trycua/cli version 2>/dev/null || echo "unknown")
 69 |         # Write version file to ~/.cua/bin/.version
 70 |         local INSTALL_DIR="$HOME/.cua/bin"
 71 |         mkdir -p "$INSTALL_DIR"
 72 |         echo "$VERSION_BUN" > "$INSTALL_DIR/.version"
 73 |         # Print success and exit
 74 |         print_success "$(command -v cua)" "$VERSION_BUN" "$config_file"
 75 |         exit 0
 76 |     else
 77 |         echo "❌ Installation failed. Please try installing manually:"
 78 |         echo "   npm install -g @trycua/cli"
 79 |         exit 1
 80 |     fi
 81 | }
 82 | 
 83 | # Determine OS and architecture
 84 | OS=$(uname -s | tr '[:upper:]' '[:lower:]')
 85 | ARCH=$(uname -m)
 86 | 
 87 | # Map architecture to the format used in release assets
 88 | case "$ARCH" in
 89 |     x86_64) ARCH="x64" ;;
 90 |     aarch64) ARCH="arm64" ;;
 91 |     arm64) ARCH="arm64" ;;
 92 |     *) ARCH="$ARCH" ;;
 93 | esac
 94 | 
 95 | # Determine the binary name
 96 | BINARY_NAME="cua-${OS}-${ARCH}"
 97 | if [ "$OS" = "darwin" ] && [ "$ARCH" = "arm64" ]; then
 98 |     BINARY_NAME="cua-darwin-arm64"
 99 | elif [ "$OS" = "darwin" ] && [ "$ARCH" = "x64" ]; then
100 |     BINARY_NAME="cua-darwin-x64"
101 | elif [ "$OS" = "linux" ] && [ "$ARCH" = "x64" ]; then
102 |     BINARY_NAME="cua-linux-x64"
103 | else
104 |     echo "⚠️  Pre-built binary not available for ${OS}-${ARCH}, falling back to Bun installation"
105 |     install_with_bun
106 |     exit 0
107 | fi
108 | 
109 | # Get the latest release version
110 | LATEST_RELEASE=$(curl -s https://api.github.com/repos/trycua/cua/releases/latest)
111 | if [ -z "$LATEST_RELEASE" ]; then
112 |     echo "⚠️  Could not fetch latest release, falling back to Bun installation"
113 |     install_with_bun
114 |     exit 0
115 | fi
116 | 
117 | # Extract version number (remove 'cua-v' prefix)
118 | TAG_NAME=$(echo "$LATEST_RELEASE" | grep 'tag_name' | cut -d '"' -f 4)
119 | VERSION=${TAG_NAME#cua-v}
120 | 
121 | # Find the binary URL in the release assets
122 | BINARY_URL=$(echo "$LATEST_RELEASE" | grep -o 'https://.*/download/[^"]*/'${BINARY_NAME}'"' | head -1)
123 | BINARY_URL="${BINARY_URL%\"}"
124 | printf "\033[90mBINARY_URL: %s\033[0m\n" "$BINARY_URL"
125 | 
126 | if [ -z "$BINARY_URL" ]; then
127 |     echo "⚠️  Could not find ${BINARY_NAME} in release assets, falling back to Bun installation"
128 |     install_with_bun
129 |     exit 0
130 | fi
131 | 
132 | # Create ~/.cua/bin directory if it doesn't exist
133 | INSTALL_DIR="$HOME/.cua/bin"
134 | mkdir -p "$INSTALL_DIR"
135 | 
136 | # Download the binary
137 | echo "📥 Downloading CUA CLI $VERSION for ${OS}-${ARCH}..."
138 | echo "📍 Downloading from: $BINARY_URL"
139 | 
140 | # Download with progress bar and proper error handling
141 | if ! curl -L --progress-bar --fail "$BINARY_URL" -o "$INSTALL_DIR/cua"; then
142 |     echo "❌ Failed to download pre-built binary from $BINARY_URL"
143 |     echo "⚠️  Falling back to Bun installation"
144 |     install_with_bun
145 |     exit 0
146 | fi
147 | 
148 | # Verify the downloaded file exists and has content
149 | if [ ! -f "$INSTALL_DIR/cua" ] || [ ! -s "$INSTALL_DIR/cua" ]; then
150 |     echo "❌ Downloaded file is missing or empty"
151 |     echo "⚠️  Falling back to Bun installation"
152 |     rm -f "$INSTALL_DIR/cua"
153 |     install_with_bun
154 |     exit 0
155 | fi
156 | 
157 | # Check if the downloaded file looks like a binary (not HTML error page)
158 | if file "$INSTALL_DIR/cua" | grep -q "HTML\|text"; then
159 |     echo "❌ Downloaded file appears to be corrupted (HTML/text instead of binary)"
160 |     echo "⚠️  Falling back to Bun installation"
161 |     rm -f "$INSTALL_DIR/cua"
162 |     install_with_bun
163 |     exit 0
164 | fi
165 | 
166 | # Make the binary executable
167 | chmod +x "$INSTALL_DIR/cua"
168 | 
169 | # Write version file
170 | echo "$VERSION" > "$INSTALL_DIR/.version"
171 | 
172 | # Add ~/.cua/bin to PATH if not already in PATH
173 | if [[ ":$PATH:" != *":$INSTALL_DIR:"* ]]; then
174 |     # Add to .bashrc, .zshrc, or .profile
175 |     if [ -f "$HOME/.bashrc" ]; then
176 |         echo "export PATH=\"$INSTALL_DIR:\$PATH\"" >> "$HOME/.bashrc"
177 |         echo "Added $INSTALL_DIR to PATH in ~/.bashrc"
178 |     fi
179 |     
180 |     if [ -f "$HOME/.zshrc" ]; then
181 |         echo "export PATH=\"$INSTALL_DIR:\$PATH\"" >> "$HOME/.zshrc"
182 |         echo "Added $INSTALL_DIR to PATH in ~/.zshrc"
183 |     fi
184 |     
185 |     if [ -f "$HOME/.profile" ] && [ ! -f "$HOME/.bashrc" ] && [ ! -f "$HOME/.zshrc" ]; then
186 |         echo "export PATH=\"$INSTALL_DIR:\$PATH\"" >> "$HOME/.profile"
187 |         echo "Added $INSTALL_DIR to PATH in ~/.profile"
188 |     fi
189 |     
190 |     # Add to current session
191 |     export PATH="$INSTALL_DIR:$PATH"
192 | fi
193 | 
194 | # Verify installation
195 | if command -v cua &> /dev/null; then
196 |     # Determine which config file was updated
197 |     config_file="$HOME/.bashrc"
198 |     if [ -f "$HOME/.zshrc" ]; then
199 |         config_file="$HOME/.zshrc"
200 |     elif [ -f "$HOME/.profile" ]; then
201 |         config_file="$HOME/.profile"
202 |     fi
203 |     
204 |     print_success "$(which cua)" "$VERSION" "$config_file"
205 |     exit 0
206 | else
207 |     echo "❌ Installation failed. Please try installing manually:"
208 |     echo "   curl -fsSL https://cua.ai/install.sh | sh"
209 |     exit 1
210 | fi
211 | 
```

--------------------------------------------------------------------------------
/libs/qemu-docker/windows/src/vm/setup/setup-cua-server.ps1:
--------------------------------------------------------------------------------

```
  1 | # Setup CUA Computer Server on Windows 11
  2 | # Creates a scheduled task to run computer server in background
  3 | 
  4 | Set-StrictMode -Version Latest
  5 | $ErrorActionPreference = 'Continue'
  6 | 
  7 | # Import shared utilities
  8 | $scriptFolder = "C:\OEM"
  9 | Import-Module (Join-Path $scriptFolder -ChildPath "setup-utils.psm1")
 10 | 
 11 | # --- Logging ---
 12 | $LogDir = "C:\Windows\Temp"
 13 | if (!(Test-Path $LogDir)) { New-Item -ItemType Directory -Force -Path $LogDir | Out-Null }
 14 | $RunId = (Get-Date -Format 'yyyyMMdd_HHmmss') + "_" + $PID
 15 | $script:LogFile = Join-Path $LogDir ("setup_cua_server_" + $RunId + ".log")
 16 | 
 17 | Write-Log -LogFile $script:LogFile -Message "=== Installing CUA Computer Server ==="
 18 | 
 19 | # Ensure Chocolatey and Python 3.12 are present
 20 | try {
 21 |   $ChocoExe = Resolve-ChocoPath
 22 |   if ($ChocoExe) {
 23 |     Write-Log -LogFile $script:LogFile -Message "Installing Python 3.12 via Chocolatey"
 24 |     try {
 25 |       & $ChocoExe install -y python312 | Out-Null
 26 |     } catch {
 27 |       Write-Log -LogFile $script:LogFile -Message "Python 3.12 install warning: $($_.Exception.Message)"
 28 |     }
 29 |   } else {
 30 |     Write-Log -LogFile $script:LogFile -Message "Chocolatey not available; skipping python312 install"
 31 |   }
 32 | } catch {
 33 |   Write-Log -LogFile $script:LogFile -Message "Chocolatey bootstrap warning: $($_.Exception.Message)"
 34 | }
 35 | 
 36 | # Create venv
 37 | $HomeDir = $env:USERPROFILE
 38 | $CuaDir  = Join-Path $HomeDir '.cua-server'
 39 | $VenvDir = Join-Path $CuaDir 'venv'
 40 | New-Item -ItemType Directory -Force -Path $CuaDir | Out-Null
 41 | 
 42 | Write-Log -LogFile $script:LogFile -Message "Creating Python virtual environment at $VenvDir"
 43 | $ExistingVenvPython = Join-Path $VenvDir 'Scripts\python.exe'
 44 | if (Test-Path -LiteralPath $ExistingVenvPython) {
 45 |   Write-Log -LogFile $script:LogFile -Message "Existing venv detected; skipping creation"
 46 | } else {
 47 |   try {
 48 |     & py -m venv $VenvDir
 49 |     Write-Log -LogFile $script:LogFile -Message "Virtual environment created successfully"
 50 |   } catch {
 51 |     Write-Log -LogFile $script:LogFile -Message "venv creation error: $($_.Exception.Message)"
 52 |     throw
 53 |   }
 54 | }
 55 | 
 56 | $PyExe  = Join-Path $VenvDir 'Scripts\python.exe'
 57 | $PipExe = Join-Path $VenvDir 'Scripts\pip.exe'
 58 | $ActivateScript = Join-Path $VenvDir 'Scripts\Activate.ps1'
 59 | 
 60 | Write-Log -LogFile $script:LogFile -Message "Activating virtual environment"
 61 | & $ActivateScript
 62 | 
 63 | Write-Log -LogFile $script:LogFile -Message "Upgrading pip, setuptools, and wheel"
 64 | try {
 65 |   & $PipExe install --upgrade pip setuptools wheel 2>&1 | Tee-Object -FilePath $script:LogFile -Append | Out-Null
 66 | } catch {
 67 |   Write-Log -LogFile $script:LogFile -Message "pip bootstrap warning: $($_.Exception.Message)"
 68 | }
 69 | 
 70 | Write-Log -LogFile $script:LogFile -Message "Installing cua-computer-server"
 71 | try {
 72 |   & $PipExe install --upgrade cua-computer-server 2>&1 | Tee-Object -FilePath $script:LogFile -Append | Out-Null
 73 |   Write-Log -LogFile $script:LogFile -Message "cua-computer-server installed successfully"
 74 | } catch {
 75 |   Write-Log -LogFile $script:LogFile -Message "Server install error: $($_.Exception.Message)"
 76 |   throw
 77 | }
 78 | 
 79 | # Open firewall for port 5000
 80 | Write-Log -LogFile $script:LogFile -Message "Opening firewall for port 5000"
 81 | try {
 82 |   netsh advfirewall firewall add rule name="CUA Computer Server 5000" dir=in action=allow protocol=TCP localport=5000 | Out-Null
 83 |   Write-Log -LogFile $script:LogFile -Message "Firewall rule added successfully"
 84 | } catch {
 85 |   Write-Log -LogFile $script:LogFile -Message "Firewall rule warning: $($_.Exception.Message)"
 86 | }
 87 | 
 88 | # Create start script with auto-restart
 89 | $StartScript = Join-Path $CuaDir 'start-server.ps1'
 90 | $StartScriptContent = @"
 91 | param()
 92 | 
 93 | `$env:PYTHONUNBUFFERED = '1'
 94 | 
 95 | `$LogFile = Join-Path '$CuaDir' 'server.log'
 96 | `$ActivateScript = '$ActivateScript'
 97 | `$PipExe = '$PipExe'
 98 | `$Python = '$PyExe'
 99 | 
100 | function Start-Server {
101 |     Write-Output "Activating virtual environment and updating cua-computer-server..." | Out-File -FilePath `$LogFile -Append
102 |     & `$ActivateScript
103 |     & `$PipExe install --upgrade cua-computer-server 2>&1 | Out-File -FilePath `$LogFile -Append
104 | 
105 |     Write-Output "Starting CUA Computer Server on port 5000..." | Out-File -FilePath `$LogFile -Append
106 |     & `$Python -m computer_server --port 5000 2>&1 | Out-File -FilePath `$LogFile -Append
107 |     return `$LASTEXITCODE
108 | }
109 | 
110 | while (`$true) {
111 |     Start-Server
112 |     `$code = `$LASTEXITCODE
113 |     Write-Output "Server exited with code: `$code. Restarting in 5s..." | Out-File -FilePath `$LogFile -Append
114 |     Start-Sleep -Seconds 5
115 | }
116 | "@
117 | 
118 | Set-Content -Path $StartScript -Value $StartScriptContent -Encoding UTF8
119 | Write-Log -LogFile $script:LogFile -Message "Start script created at $StartScript"
120 | 
121 | # Create VBScript wrapper to launch PowerShell hidden
122 | $VbsWrapper = Join-Path $CuaDir 'start-server-hidden.vbs'
123 | $VbsContent = @"
124 | Set objShell = CreateObject("WScript.Shell")
125 | objShell.Run "powershell.exe -NoProfile -ExecutionPolicy Bypass -File ""$StartScript""", 0, False
126 | "@
127 | Set-Content -Path $VbsWrapper -Value $VbsContent -Encoding ASCII
128 | Write-Log -LogFile $script:LogFile -Message "VBScript wrapper created at $VbsWrapper"
129 | 
130 | # Create scheduled task to run at logon
131 | try {
132 |   $TaskName = 'CUA-Computer-Server'
133 |   $Username = 'Docker'  # Default user for Dockur Windows
134 | 
135 |   # Remove existing task if present
136 |   $existingTask = Get-ScheduledTask -TaskName $TaskName -ErrorAction SilentlyContinue
137 |   if ($existingTask) {
138 |     Write-Log -LogFile $script:LogFile -Message "Removing existing scheduled task: $TaskName"
139 |     Unregister-ScheduledTask -TaskName $TaskName -Confirm:$false
140 |   }
141 | 
142 |   # Create action to run VBScript wrapper (hidden)
143 |   $Action = New-ScheduledTaskAction -Execute 'wscript.exe' -Argument "`"$VbsWrapper`""
144 | 
145 |   # Trigger: At logon of user
146 |   $UserId = "$env:COMPUTERNAME\$Username"
147 |   $Trigger = New-ScheduledTaskTrigger -AtLogOn -User $UserId
148 | 
149 |   # Principal: Run in background without window (S4U = Service For User)
150 |   $Principal = New-ScheduledTaskPrincipal -UserId $UserId -LogonType S4U -RunLevel Highest
151 | 
152 |   # Task settings - hide window
153 |   $Settings = New-ScheduledTaskSettingsSet `
154 |     -AllowStartIfOnBatteries `
155 |     -DontStopIfGoingOnBatteries `
156 |     -StartWhenAvailable `
157 |     -RestartCount 999 `
158 |     -RestartInterval (New-TimeSpan -Minutes 1) `
159 |     -ExecutionTimeLimit (New-TimeSpan -Days 365) `
160 |     -Hidden
161 | 
162 |   # Register the task
163 |   Write-Log -LogFile $script:LogFile -Message "Registering scheduled task '$TaskName' to run as $Username at logon (hidden)"
164 |   Register-ScheduledTask `
165 |     -TaskName $TaskName `
166 |     -Action $Action `
167 |     -Trigger $Trigger `
168 |     -Principal $Principal `
169 |     -Settings $Settings `
170 |     -Force | Out-Null
171 | 
172 |   Write-Log -LogFile $script:LogFile -Message "Scheduled task '$TaskName' registered successfully (runs hidden in background)"
173 | 
174 | } catch {
175 |   Write-Log -LogFile $script:LogFile -Message "Scheduled task setup error: $($_.Exception.Message)"
176 |   throw
177 | }
178 | 
179 | Write-Log -LogFile $script:LogFile -Message "=== CUA Computer Server setup completed ==="
180 | exit 0
181 | 
```

--------------------------------------------------------------------------------
/blog/introducing-cua-cli.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Introducing the Cua CLI: Manage Cloud Sandboxes from Your Terminal
  2 | 
  3 | If you've been using our Cloud Sandboxes, you've probably been managing them through the web dashboard - clicking through forms to create instances, copying credentials, manually starting and stopping sandboxes. It works, but it's not exactly built for power users like yourself.
  4 | 
  5 | Today we're launching the **Cua CLI**: a command-line interface that brings the full power of our Cloud Sandbox platform to your terminal. Create, manage, and connect to Linux, Windows, or macOS sandboxes in seconds—all from a single command.
  6 | 
  7 | ![Cua CLI Banner](https://github.com/user-attachments/assets/f8358acf-9194-46ee-b9e3-50cfcff5e489)
  8 | 
  9 | ## What You Can Do
 10 | 
 11 | The Cua CLI handles everything you need to work with Cloud Sandboxes:
 12 | 
 13 | **Authentication**
 14 | 
 15 | - Browser-based OAuth login with automatic credential storage
 16 | - Direct API key support for CI/CD pipelines
 17 | - Export credentials to `.env` files for SDK integration
 18 | 
 19 | **Sandbox Management**
 20 | 
 21 | - Create sandboxes with your choice of OS, size, and region
 22 | - List all your sandboxes with status and connection details
 23 | - Start, stop, restart, and delete sandboxes
 24 | - Open remote desktop (VNC) connections directly in your browser
 25 | 
 26 | **Two Command Styles**
 27 | The CLI supports both flat and grouped command structures—use whichever fits your workflow:
 28 | 
 29 | ```bash
 30 | # Grouped style (explicit & clear)
 31 | cua sb ls
 32 | cua sb create --os linux --size small --region north-america
 33 | cua sb vnc my-sandbox
 34 | 
 35 | # Flat style (quick & concise)
 36 | cua ls
 37 | cua create --os linux --size small --region north-america
 38 | cua vnc my-sandbox
 39 | ```
 40 | 
 41 | Both styles work identically. The CLI shows grouped commands in help by default, but all flat commands remain available for backwards compatibility.
 42 | 
 43 | ## Installation
 44 | 
 45 | One command installs everything (includes Bun runtime + Cua CLI):
 46 | 
 47 | ```bash
 48 | # macOS/Linux
 49 | curl -LsSf https://cua.ai/cli/install.sh | sh
 50 | 
 51 | # Windows
 52 | powershell -ExecutionPolicy ByPass -c "irm https://cua.ai/cli/install.ps1 | iex"
 53 | ```
 54 | 
 55 | Or install via npm if you prefer:
 56 | 
 57 | ```bash
 58 | npm install -g @trycua/cli
 59 | ```
 60 | 
 61 | ## Getting Started
 62 | 
 63 | Authenticate with your Cua account:
 64 | 
 65 | ```bash
 66 | # Interactive browser login (recommended)
 67 | cua auth login
 68 | 
 69 | # Or provide your API key directly
 70 | cua auth login --api-key sk-your-api-key-here
 71 | ```
 72 | 
 73 | Create a sandbox:
 74 | 
 75 | ```bash
 76 | cua sb create --os linux --size small --region north-america
 77 | # Sandbox created and ready: my-sandbox-abc123
 78 | # Password: secure-password-here
 79 | # Host: my-sandbox-abc123.sandbox.cua.ai
 80 | ```
 81 | 
 82 | List your sandboxes:
 83 | 
 84 | ```bash
 85 | cua sb list
 86 | # NAME                 STATUS    HOST
 87 | # my-sandbox-abc123    running   my-sandbox-abc123.sandbox.cua.ai
 88 | # test-windows-456     stopped   test-windows-456.sandbox.cua.ai
 89 | ```
 90 | 
 91 | Open a remote desktop:
 92 | 
 93 | ```bash
 94 | cua sb vnc my-sandbox-abc123
 95 | # Opens your browser to the VNC interface with password pre-filled
 96 | ```
 97 | 
 98 | ## SDK Integration
 99 | 
100 | Export your API key to a `.env` file for seamless SDK integration:
101 | 
102 | ```bash
103 | cd my-project
104 | cua auth env
105 | # Wrote /path/to/my-project/.env
106 | ```
107 | 
108 | Then use it with our Python or TypeScript SDKs:
109 | 
110 | ```python
111 | from computer import Computer
112 | 
113 | computer = Computer(
114 |     os_type="linux",
115 |     provider_type="cloud",
116 |     name="my-sandbox-abc123",
117 |     api_key="your-api-key"  # Or load from .env
118 | )
119 | 
120 | await computer.run()
121 | ```
122 | 
123 | ## Sandbox Sizes & Regions
124 | 
125 | Create sandboxes in the size and region that fits your needs:
126 | 
127 | **Sizes:**
128 | 
129 | - `small` - 2 cores, 8 GB RAM, 128 GB SSD
130 | - `medium` - 4 cores, 16 GB RAM, 128 GB SSD
131 | - `large` - 8 cores, 32 GB RAM, 256 GB SSD
132 | 
133 | **Regions:**
134 | 
135 | - `north-america`
136 | - `europe`
137 | - `asia-pacific`
138 | - `south-america`
139 | 
140 | **OS Options:**
141 | 
142 | - `linux` - Ubuntu with XFCE desktop
143 | - `windows` - Windows 11 with Edge and Python
144 | - `macos` - macOS (preview access)
145 | 
146 | ## Example Workflows
147 | 
148 | **Quick Testing Environment**
149 | 
150 | ```bash
151 | # Spin up a sandbox, test something, tear it down
152 | cua sb create --os linux --size small --region north-america
153 | # ... do your testing ...
154 | cua sb delete my-sandbox-abc123
155 | ```
156 | 
157 | **Persistent Development Sandbox**
158 | 
159 | ```bash
160 | # Create a sandbox for long-term use
161 | cua sb create --os linux --size medium --region north-america
162 | 
163 | # Stop it when not in use (data persists)
164 | cua sb stop my-sandbox-abc123
165 | 
166 | # Start it again when needed
167 | cua sb start my-sandbox-abc123
168 | ```
169 | 
170 | **CI/CD Integration**
171 | 
172 | ```bash
173 | # Provision sandboxes in your pipeline
174 | export CUA_API_KEY="sk-your-api-key"
175 | cua auth login --api-key "$CUA_API_KEY"
176 | cua sb create --os linux --size large --region north-america
177 | 
178 | # Run your tests with the Cua Computer SDK
179 | python run_tests.py
180 | 
181 | # Clean up
182 | cua sb delete my-test-sandbox
183 | ```
184 | 
185 | ## Command Aliases
186 | 
187 | We've added aliases for common commands to speed up your workflow:
188 | 
189 | ```bash
190 | # List aliases
191 | cua list    # or: cua ls, cua ps, cua sb list
192 | 
193 | # VNC aliases
194 | cua vnc     # or: cua open, cua sb vnc
195 | ```
196 | 
197 | ## FAQs
198 | 
199 | <details>
200 | <summary><strong>Can I use this in scripts and CI/CD?</strong></summary>
201 | 
202 | Yes. All commands support non-interactive mode with `--api-key` flags, and the CLI exits with proper status codes for scripting. The flat command style (`cua list`, `cua create`) is particularly useful for quick scripts.
203 | 
204 | </details>
205 | 
206 | <details>
207 | <summary><strong>Where are my credentials stored?</strong></summary>
208 | 
209 | API keys are stored in `~/.cua/cli.sqlite` using a local SQLite database. They never leave your machine. Use `cua auth logout` to clear stored credentials.
210 | 
211 | </details>
212 | 
213 | <details>
214 | <summary><strong>What happens to passwords in the output?</strong></summary>
215 | 
216 | Passwords are hidden by default in `cua list` for security. Use `cua list --show-passwords` to display them when needed.
217 | 
218 | </details>
219 | 
220 | <details>
221 | <summary><strong>Can I manage sandboxes created through the web dashboard?</strong></summary>
222 | 
223 | Yes. The CLI and dashboard share the same API. Any sandbox you create in the dashboard will show up in `cua list`, and vice versa.
224 | 
225 | </details>
226 | 
227 | <details>
228 | <summary><strong>How do I update the CLI?</strong></summary>
229 | 
230 | If you installed via script:
231 | 
232 | ```bash
233 | curl -LsSf https://cua.ai/cli/install.sh | sh
234 | ```
235 | 
236 | If you installed via npm:
237 | 
238 | ```bash
239 | npm install -g @trycua/cli@latest
240 | ```
241 | 
242 | </details>
243 | 
244 | ## What's Next
245 | 
246 | We're actively iterating based on feedback. Planned features include:
247 | 
248 | - SSH key management for secure sandbox access
249 | - Template-based sandbox creation
250 | - Batch operations (start/stop multiple sandboxes)
251 | - Custom sandbox configurations
252 | - Snapshot management
253 | 
254 | If there's a feature you need, let us know in [Discord](https://discord.gg/cua-ai).
255 | 
256 | ## Need Help?
257 | 
258 | - **Documentation**: [https://cua.ai/docs/cli-playbook/commands](https://cua.ai/docs/cli-playbook/commands)
259 | - **Installation Guide**: [https://cua.ai/docs/cli-playbook](https://cua.ai/docs/cli-playbook)
260 | - **Discord Community**: [https://discord.gg/cua-ai](https://discord.gg/cua-ai)
261 | 
262 | ---
263 | 
264 | Get started at [cua.ai](https://cua.ai) or check out the [quickstart guide](https://cua.ai/docs/get-started/quickstart).
265 | 
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/providers/factory.py:
--------------------------------------------------------------------------------

```python
  1 | """Factory for creating VM providers."""
  2 | 
  3 | import logging
  4 | from typing import Any, Dict, Optional, Type, Union
  5 | 
  6 | from .base import BaseVMProvider, VMProviderType
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | 
 11 | class VMProviderFactory:
 12 |     """Factory for creating VM providers based on provider type."""
 13 | 
 14 |     @staticmethod
 15 |     def create_provider(
 16 |         provider_type: Union[str, VMProviderType],
 17 |         provider_port: int = 7777,
 18 |         host: str = "localhost",
 19 |         bin_path: Optional[str] = None,
 20 |         storage: Optional[str] = None,
 21 |         shared_path: Optional[str] = None,
 22 |         image: Optional[str] = None,
 23 |         verbose: bool = False,
 24 |         ephemeral: bool = False,
 25 |         noVNC_port: Optional[int] = None,
 26 |         api_port: Optional[int] = None,
 27 |         **kwargs,
 28 |     ) -> BaseVMProvider:
 29 |         """Create a VM provider of the specified type.
 30 | 
 31 |         Args:
 32 |             provider_type: Type of VM provider to create
 33 |             provider_port: Port for the provider's API server
 34 |             host: Hostname for the API server
 35 |             bin_path: Path to provider binary if needed
 36 |             storage: Path for persistent VM storage
 37 |             shared_path: Path for shared folder between host and VM
 38 |             image: VM image to use (for Lumier provider)
 39 |             verbose: Enable verbose logging
 40 |             ephemeral: Use ephemeral (temporary) storage
 41 |             noVNC_port: Specific port for noVNC interface (for Lumier and Docker provider)
 42 |             api_port: Specific port for Computer API server (for Docker provider)
 43 | 
 44 |         Returns:
 45 |             An instance of the requested VM provider
 46 | 
 47 |         Raises:
 48 |             ImportError: If the required dependencies for the provider are not installed
 49 |             ValueError: If the provider type is not supported
 50 |         """
 51 |         # Convert string to enum if needed
 52 |         if isinstance(provider_type, str):
 53 |             try:
 54 |                 provider_type = VMProviderType(provider_type.lower())
 55 |             except ValueError:
 56 |                 provider_type = VMProviderType.UNKNOWN
 57 | 
 58 |         if provider_type == VMProviderType.LUME:
 59 |             try:
 60 |                 from .lume import HAS_LUME, LumeProvider
 61 | 
 62 |                 if not HAS_LUME:
 63 |                     raise ImportError(
 64 |                         "The pylume package is required for LumeProvider. "
 65 |                         "Please install it with 'pip install cua-computer[lume]'"
 66 |                     )
 67 |                 return LumeProvider(
 68 |                     provider_port=provider_port,
 69 |                     host=host,
 70 |                     storage=storage,
 71 |                     verbose=verbose,
 72 |                     ephemeral=ephemeral,
 73 |                 )
 74 |             except ImportError as e:
 75 |                 logger.error(f"Failed to import LumeProvider: {e}")
 76 |                 raise ImportError(
 77 |                     "The pylume package is required for LumeProvider. "
 78 |                     "Please install it with 'pip install cua-computer[lume]'"
 79 |                 ) from e
 80 |         elif provider_type == VMProviderType.LUMIER:
 81 |             try:
 82 |                 from .lumier import HAS_LUMIER, LumierProvider
 83 | 
 84 |                 if not HAS_LUMIER:
 85 |                     raise ImportError(
 86 |                         "Docker is required for LumierProvider. "
 87 |                         "Please install Docker for Apple Silicon and Lume CLI before using this provider."
 88 |                     )
 89 |                 return LumierProvider(
 90 |                     provider_port=provider_port,
 91 |                     host=host,
 92 |                     storage=storage,
 93 |                     shared_path=shared_path,
 94 |                     image=image or "macos-sequoia-cua:latest",
 95 |                     verbose=verbose,
 96 |                     ephemeral=ephemeral,
 97 |                     noVNC_port=noVNC_port,
 98 |                 )
 99 |             except ImportError as e:
100 |                 logger.error(f"Failed to import LumierProvider: {e}")
101 |                 raise ImportError(
102 |                     "Docker and Lume CLI are required for LumierProvider. "
103 |                     "Please install Docker for Apple Silicon and run the Lume installer script."
104 |                 ) from e
105 | 
106 |         elif provider_type == VMProviderType.CLOUD:
107 |             try:
108 |                 from .cloud import CloudProvider
109 | 
110 |                 return CloudProvider(
111 |                     verbose=verbose,
112 |                     **kwargs,
113 |                 )
114 |             except ImportError as e:
115 |                 logger.error(f"Failed to import CloudProvider: {e}")
116 |                 raise ImportError(
117 |                     "The CloudProvider is not fully implemented yet. "
118 |                     "Please use LUME or LUMIER provider instead."
119 |                 ) from e
120 |         elif provider_type == VMProviderType.WINSANDBOX:
121 |             try:
122 |                 from .winsandbox import HAS_WINSANDBOX, WinSandboxProvider
123 | 
124 |                 if not HAS_WINSANDBOX:
125 |                     raise ImportError(
126 |                         "pywinsandbox is required for WinSandboxProvider. "
127 |                         "Please install it with 'pip install -U git+https://github.com/karkason/pywinsandbox.git'"
128 |                     )
129 |                 return WinSandboxProvider(
130 |                     host=host,
131 |                     storage=storage,
132 |                     verbose=verbose,
133 |                     ephemeral=ephemeral,
134 |                     **kwargs,
135 |                 )
136 |             except ImportError as e:
137 |                 logger.error(f"Failed to import WinSandboxProvider: {e}")
138 |                 raise ImportError(
139 |                     "pywinsandbox is required for WinSandboxProvider. "
140 |                     "Please install it with 'pip install -U git+https://github.com/karkason/pywinsandbox.git'"
141 |                 ) from e
142 |         elif provider_type == VMProviderType.DOCKER:
143 |             try:
144 |                 from .docker import HAS_DOCKER, DockerProvider
145 | 
146 |                 if not HAS_DOCKER:
147 |                     raise ImportError(
148 |                         "Docker is required for DockerProvider. "
149 |                         "Please install Docker and ensure it is running."
150 |                     )
151 |                 return DockerProvider(
152 |                     host=host,
153 |                     storage=storage,
154 |                     shared_path=shared_path,
155 |                     image=image or "trycua/cua-ubuntu:latest",
156 |                     verbose=verbose,
157 |                     ephemeral=ephemeral,
158 |                     vnc_port=noVNC_port,
159 |                     api_port=api_port,
160 |                 )
161 |             except ImportError as e:
162 |                 logger.error(f"Failed to import DockerProvider: {e}")
163 |                 raise ImportError(
164 |                     "Docker is required for DockerProvider. "
165 |                     "Please install Docker and ensure it is running."
166 |                 ) from e
167 |         else:
168 |             raise ValueError(f"Unsupported provider type: {provider_type}")
169 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/telemetry.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Telemetry
  3 | description: How telemetry works in Cua and how to control it
  4 | ---
  5 | 
  6 | # Telemetry
  7 | 
  8 | Cua collects anonymized usage and error statistics. We follow [Posthog's ethical telemetry approach](https://posthog.com/blog/open-source-telemetry-ethical). To opt out, set `telemetry_enabled` to false.
  9 | 
 10 | ## What we collect
 11 | 
 12 | ### Enabled by default (opt-out)
 13 | 
 14 | - System info: OS, OS version, Python version
 15 | - Module initialization: When modules are imported and their versions
 16 | - Performance: Agent run durations, step counts, token usage, API costs
 17 | - Session tracking: Anonymous session IDs and run IDs
 18 | 
 19 | ### Disabled by default (opt-in)
 20 | 
 21 | **Trajectory logging** captures full conversation history:
 22 | 
 23 | - User messages and agent responses
 24 | - Computer actions and outputs
 25 | - Agent reasoning traces
 26 | 
 27 | Must be explicitly enabled.
 28 | 
 29 | ### We don't collect
 30 | 
 31 | - Personal information or user identifiers
 32 | - API keys or credentials
 33 | - File contents or application data
 34 | - Files being accessed
 35 | - Screenshots or screen contents (unless trajectory logging is enabled)
 36 | - Text being typed, user inputs, model outputs, computer outputs, or tool call outputs (unless trajectory logging is enabled)
 37 | 
 38 | ## How to disable
 39 | 
 40 | ### Environment variable (global)
 41 | 
 42 | Set `CUA_TELEMETRY_ENABLED` to a falsy value (`0`, `false`, `no`, or `off`):
 43 | 
 44 | ```bash
 45 | export CUA_TELEMETRY_ENABLED=false
 46 | ```
 47 | 
 48 | Or in Python:
 49 | 
 50 | ```python
 51 | import os
 52 | os.environ["CUA_TELEMETRY_ENABLED"] = "false"
 53 | ```
 54 | 
 55 | <Callout type="info">
 56 |   **Deprecated environment variables:** The environment variables `CUA_TELEMETRY` and
 57 |   `CUA_TELEMETRY_DISABLED` are deprecated and no longer have any effect. Use `CUA_TELEMETRY_ENABLED`
 58 |   instead.
 59 | </Callout>
 60 | 
 61 | ### Per instance
 62 | 
 63 | **Computer SDK:**
 64 | 
 65 | ```python
 66 | from computer import Computer
 67 | 
 68 | computer = Computer(telemetry_enabled=False)
 69 | ```
 70 | 
 71 | **Agent SDK:**
 72 | 
 73 | ```python
 74 | from agent import ComputerAgent
 75 | import os
 76 | 
 77 | # Basic telemetry - performance metrics only (opt-out, enabled by default)
 78 | agent = ComputerAgent(
 79 |     model="claude-sonnet-4-5-20250929",
 80 |     telemetry_enabled=True  # Default is True
 81 | )
 82 | 
 83 | # Enable telemetry with full conversation trajectory logging (opt-in)
 84 | agent = ComputerAgent(
 85 |     model="claude-sonnet-4-5-20250929",
 86 |     telemetry_enabled={
 87 |         "log_trajectory": True  # Logs full conversation items
 88 |     }
 89 | )
 90 | 
 91 | # Disable completely
 92 | agent = ComputerAgent(
 93 |     model="claude-sonnet-4-5-20250929",
 94 |     telemetry_enabled=False
 95 | )
 96 | 
 97 | # Enable trajectory logging (opt-in)
 98 | agent = ComputerAgent(
 99 |     model="claude-sonnet-4-5-20250929",
100 |     telemetry_enabled={"log_trajectory": True}
101 | )
102 | ```
103 | 
104 | Check status:
105 | 
106 | ```python
107 | print(computer.telemetry_enabled)  # True or False
108 | print(agent.telemetry_enabled)     # True, False, or dict
109 | ```
110 | 
111 | Telemetry settings are configured at initialization and can't be changed afterward.
112 | 
113 | ## Events collected
114 | 
115 | ### Computer SDK
116 | 
117 | | Event Name               | Data Collected                                                                                                                        | Trigger Notes                                                           |
118 | | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------- |
119 | | **computer_initialized** | • `os`: Operating system (e.g., 'windows', 'darwin', 'linux')<br />• `os_version`: OS version<br />• `python_version`: Python version | Triggered when a Computer instance is created                           |
120 | | **module_init**          | • `module`: "computer"<br />• `version`: Package version<br />• `python_version`: Full Python version string                          | Triggered once when the computer package is imported for the first time |
121 | 
122 | ### Agent SDK
123 | 
124 | | Event Name              | Data Collected                                                                                                                                                                                                                                                                                                        | Trigger Notes                                                         |
125 | | ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------- |
126 | | **module_init**         | • `module`: "agent"<br />• `version`: Package version<br />• `python_version`: Full Python version string                                                                                                                                                                                                             | Triggered once when the agent package is imported for the first time  |
127 | | **agent_session_start** | • `session_id`: Unique UUID for this agent instance<br />• `agent_type`: Class name (e.g., "ComputerAgent")<br />• `model`: Model name (e.g., "claude-sonnet-4-5")<br />• `os`: Operating system<br />• `os_version`: OS version<br />• `python_version`: Python version                                              | Triggered when TelemetryCallback is initialized (agent instantiation) |
128 | | **agent_run_start**     | • `session_id`: Agent session UUID<br />• `run_id`: Unique UUID for this run<br />• `start_time`: Unix timestamp<br />• `input_context_size`: Character count of input messages<br />• `num_existing_messages`: Count of existing messages<br />• `uploaded_trajectory`: Full conversation items (opt-in)             | Triggered at the start of each agent.run() call                       |
129 | | **agent_run_end**       | • `session_id`: Agent session UUID<br />• `run_id`: Run UUID<br />• `end_time`: Unix timestamp<br />• `duration_seconds`: Total run duration<br />• `num_steps`: Total steps taken in this run<br />• `total_usage`: Accumulated token usage and costs<br />• `uploaded_trajectory`: Full conversation items (opt-in) | Triggered at the end of each agent.run() call                         |
130 | | **agent_step**          | • `session_id`: Agent session UUID<br />• `run_id`: Run UUID<br />• `step`: Step number (incremental)<br />• `timestamp`: Unix timestamp<br />• `duration_seconds`: Duration of previous step                                                                                                                         | Triggered on each agent response/step during a run                    |
131 | | **agent_usage**         | • `session_id`: Agent session UUID<br />• `run_id`: Run UUID<br />• `step`: Current step number<br />• `prompt_tokens`: Tokens in prompt<br />• `completion_tokens`: Tokens in response<br />• `total_tokens`: Total tokens used<br />• `response_cost`: Cost of this API call                                        | Triggered whenever usage information is received from LLM API         |
132 | 
133 | ## Questions
134 | 
135 | Questions about telemetry? Open an issue on our [GitHub repository](https://github.com/trycua/cua).
136 | 
```

--------------------------------------------------------------------------------
/libs/python/som/som/util/utils.py:
--------------------------------------------------------------------------------

```python
  1 | import logging
  2 | import signal
  3 | import time
  4 | from contextlib import contextmanager
  5 | from typing import Any, List, Optional, Sequence, Tuple, Union, cast
  6 | 
  7 | import cv2
  8 | import easyocr
  9 | import matplotlib.pyplot as plt
 10 | import numpy as np
 11 | from PIL import Image
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | class TimeoutException(Exception):
 17 |     pass
 18 | 
 19 | 
 20 | @contextmanager
 21 | def timeout(seconds):
 22 |     def timeout_handler(signum, frame):
 23 |         logger.warning(f"OCR process timed out after {seconds} seconds")
 24 |         raise TimeoutException("OCR processing timed out")
 25 | 
 26 |     # Register the signal handler
 27 |     original_handler = signal.signal(signal.SIGALRM, timeout_handler)
 28 |     signal.alarm(seconds)
 29 | 
 30 |     try:
 31 |         yield
 32 |     finally:
 33 |         signal.alarm(0)
 34 |         signal.signal(signal.SIGALRM, original_handler)
 35 | 
 36 | 
 37 | # Initialize EasyOCR with optimized settings
 38 | logger.info("Initializing EasyOCR with optimized settings...")
 39 | reader = easyocr.Reader(
 40 |     ["en"],
 41 |     gpu=True,  # Use GPU if available
 42 |     model_storage_directory=None,  # Use default directory
 43 |     download_enabled=True,
 44 |     detector=True,  # Enable text detection
 45 |     recognizer=True,  # Enable text recognition
 46 |     verbose=False,  # Disable verbose output
 47 |     quantize=True,  # Enable quantization for faster inference
 48 |     cudnn_benchmark=True,  # Enable cuDNN benchmarking
 49 | )
 50 | logger.info("EasyOCR initialization complete")
 51 | 
 52 | 
 53 | def check_ocr_box(
 54 |     image_source: Union[str, Image.Image],
 55 |     display_img=True,
 56 |     output_bb_format="xywh",
 57 |     goal_filtering=None,
 58 |     easyocr_args=None,
 59 |     use_paddleocr=False,
 60 | ) -> Tuple[Tuple[List[str], List[Tuple[float, float, float, float]]], Optional[Any]]:
 61 |     """Check OCR box using EasyOCR with optimized settings.
 62 | 
 63 |     Args:
 64 |         image_source: Either a file path or PIL Image
 65 |         display_img: Whether to display the annotated image
 66 |         output_bb_format: Format for bounding boxes ('xywh' or 'xyxy')
 67 |         goal_filtering: Optional filtering of results
 68 |         easyocr_args: Arguments for EasyOCR
 69 |         use_paddleocr: Ignored (kept for backward compatibility)
 70 | 
 71 |     Returns:
 72 |         Tuple containing:
 73 |         - Tuple of (text_list, bounding_boxes)
 74 |         - goal_filtering value
 75 |     """
 76 |     logger.info("Starting OCR processing...")
 77 |     start_time = time.time()
 78 | 
 79 |     if isinstance(image_source, str):
 80 |         logger.info(f"Loading image from path: {image_source}")
 81 |         image_source = Image.open(image_source)
 82 |     if image_source.mode == "RGBA":
 83 |         logger.info("Converting RGBA image to RGB")
 84 |         image_source = image_source.convert("RGB")
 85 |     image_np = np.array(image_source)
 86 |     w, h = image_source.size
 87 |     logger.info(f"Image size: {w}x{h}")
 88 | 
 89 |     # Default EasyOCR arguments optimized for speed
 90 |     default_args = {
 91 |         "paragraph": False,  # Disable paragraph detection
 92 |         "text_threshold": 0.5,  # Confidence threshold
 93 |         "link_threshold": 0.4,  # Text link threshold
 94 |         "canvas_size": 2560,  # Max image size
 95 |         "mag_ratio": 1.0,  # Magnification ratio
 96 |         "slope_ths": 0.1,  # Slope threshold
 97 |         "ycenter_ths": 0.5,  # Y-center threshold
 98 |         "height_ths": 0.5,  # Height threshold
 99 |         "width_ths": 0.5,  # Width threshold
100 |         "add_margin": 0.1,  # Margin around text
101 |         "min_size": 20,  # Minimum text size
102 |     }
103 | 
104 |     # Update with user-provided arguments
105 |     if easyocr_args:
106 |         logger.info(f"Using custom EasyOCR arguments: {easyocr_args}")
107 |         default_args.update(easyocr_args)
108 | 
109 |     try:
110 |         # Use EasyOCR with timeout
111 |         logger.info("Starting EasyOCR detection with 5 second timeout...")
112 |         with timeout(5):  # 5 second timeout
113 |             # EasyOCR's readtext returns a list of tuples, where each tuple is (bbox, text, confidence)
114 |             raw_result = reader.readtext(image_np, **default_args)
115 |             result = cast(Sequence[Tuple[List[Tuple[float, float]], str, float]], raw_result)
116 |             coord = [item[0] for item in result]  # item[0] is the bbox coordinates
117 |             text = [item[1] for item in result]  # item[1] is the text content
118 |             logger.info(f"OCR completed successfully. Found {len(text)} text regions")
119 |             logger.info(f"Detected text: {text}")
120 | 
121 |     except TimeoutException:
122 |         logger.error("OCR processing timed out after 5 seconds")
123 |         coord = []
124 |         text = []
125 |     except Exception as e:
126 |         logger.error(f"OCR processing failed with error: {str(e)}")
127 |         coord = []
128 |         text = []
129 | 
130 |     processing_time = time.time() - start_time
131 |     logger.info(f"Total OCR processing time: {processing_time:.2f} seconds")
132 | 
133 |     if display_img:
134 |         logger.info("Creating visualization of OCR results...")
135 |         opencv_img = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
136 |         bb = []
137 |         for item in coord:
138 |             x, y, a, b = get_xywh(item)
139 |             bb.append((x, y, a, b))
140 |             # Convert float coordinates to integers for cv2.rectangle
141 |             x_val = cast(float, x)
142 |             y_val = cast(float, y)
143 |             a_val = cast(float, a)
144 |             b_val = cast(float, b)
145 |             x_int, y_int = int(x_val), int(y_val)
146 |             a_int, b_int = int(a_val), int(b_val)
147 |             cv2.rectangle(
148 |                 opencv_img, (x_int, y_int), (x_int + a_int, y_int + b_int), (0, 255, 0), 2
149 |             )
150 |         plt.imshow(cv2.cvtColor(opencv_img, cv2.COLOR_BGR2RGB))
151 |     else:
152 |         if output_bb_format == "xywh":
153 |             bb = [get_xywh(item) for item in coord]
154 |         elif output_bb_format == "xyxy":
155 |             bb = [get_xyxy(item) for item in coord]
156 | 
157 |     # Cast the bounding boxes to the expected type
158 |     bb = cast(List[Tuple[float, float, float, float]], bb)
159 | 
160 |     logger.info("OCR processing complete")
161 |     return (text, bb), goal_filtering
162 | 
163 | 
164 | def get_xywh(box):
165 |     """
166 |     Convert a bounding box to xywh format (x, y, width, height).
167 | 
168 |     Args:
169 |         box: Bounding box coordinates (various formats supported)
170 | 
171 |     Returns:
172 |         Tuple of (x, y, width, height)
173 |     """
174 |     # Handle different input formats
175 |     if len(box) == 4:
176 |         # If already in xywh format or xyxy format
177 |         if isinstance(box[0], (int, float)) and isinstance(box[2], (int, float)):
178 |             if box[2] < box[0] or box[3] < box[1]:
179 |                 # Already xyxy format, convert to xywh
180 |                 x1, y1, x2, y2 = box
181 |                 return x1, y1, x2 - x1, y2 - y1
182 |             else:
183 |                 # Already in xywh format
184 |                 return box
185 |     elif len(box) == 2:
186 |         # Format like [[x1,y1],[x2,y2]] from some OCR engines
187 |         (x1, y1), (x2, y2) = box
188 |         return x1, y1, x2 - x1, y2 - y1
189 | 
190 |     # Default case - try to convert assuming it's a list of points
191 |     x_coords = [p[0] for p in box]
192 |     y_coords = [p[1] for p in box]
193 |     x1, y1 = min(x_coords), min(y_coords)
194 |     width, height = max(x_coords) - x1, max(y_coords) - y1
195 |     return x1, y1, width, height
196 | 
197 | 
198 | def get_xyxy(box):
199 |     """
200 |     Convert a bounding box to xyxy format (x1, y1, x2, y2).
201 | 
202 |     Args:
203 |         box: Bounding box coordinates (various formats supported)
204 | 
205 |     Returns:
206 |         Tuple of (x1, y1, x2, y2)
207 |     """
208 |     # Get xywh first, then convert to xyxy
209 |     x, y, w, h = get_xywh(box)
210 |     return x, y, x + w, y + h
211 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/benchmarks/ss-v2.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | ScreenSpot-v2 Benchmark Script
  4 | 
  5 | Evaluates models on the ScreenSpot-v2 dataset for click prediction accuracy.
  6 | Supports both ComputerAgent model strings and custom model classes.
  7 | """
  8 | 
  9 | import argparse
 10 | import asyncio
 11 | import random
 12 | import statistics
 13 | import time
 14 | from typing import Optional
 15 | 
 16 | from datasets import load_dataset
 17 | from tqdm import tqdm
 18 | from utils import (
 19 |     ModelWrapper,
 20 |     get_available_models,
 21 |     get_gpu_memory,
 22 |     is_click_in_bbox,
 23 |     save_results_to_markdown,
 24 |     save_visualizations,
 25 | )
 26 | 
 27 | 
 28 | async def evaluate_model(
 29 |     model_wrapper: ModelWrapper, samples, max_samples: Optional[int] = None
 30 | ) -> dict:
 31 |     """
 32 |     Evaluate a model on any iterable of samples.
 33 | 
 34 |     Args:
 35 |         model_wrapper: ModelWrapper instance
 36 |         samples: Iterable of dicts with keys: image, bbox, instruction
 37 |         max_samples: Maximum number of samples to evaluate (None for all)
 38 | 
 39 |     Returns:
 40 |         Dictionary with evaluation results
 41 |     """
 42 |     print(f"\nEvaluating model: {model_wrapper.model_name}")
 43 | 
 44 |     # Load model
 45 |     await model_wrapper.load_model()
 46 | 
 47 |     # Convert to list if needed and limit samples
 48 |     if hasattr(samples, "__len__"):
 49 |         total_samples = len(samples)
 50 |         if max_samples is not None:
 51 |             total_samples = min(max_samples, total_samples)
 52 |         sample_list = list(samples)[:total_samples]
 53 |     else:
 54 |         # For iterators, take max_samples or all
 55 |         sample_list = list(samples)
 56 |         if max_samples is not None:
 57 |             sample_list = sample_list[:max_samples]
 58 |         total_samples = len(sample_list)
 59 | 
 60 |     correct_predictions = 0
 61 |     error_predictions = 0
 62 |     results = []
 63 | 
 64 |     for i, sample in enumerate(tqdm(sample_list, desc=f"Evaluating {model_wrapper.model_name}")):
 65 |         # Extract required data (only these 3 keys matter)
 66 |         image = sample["image"]
 67 |         instruction = sample["instruction"]
 68 |         bbox = sample["bbox"]  # [x1, y1, x2, y2]
 69 | 
 70 |         # Predict click coordinates with timing
 71 |         start_time = time.time()
 72 |         click_coords = await model_wrapper.predict_click(image, instruction)
 73 |         prediction_time = time.time() - start_time
 74 | 
 75 |         # Check if prediction is correct
 76 |         is_correct = is_click_in_bbox(click_coords, bbox)
 77 | 
 78 |         if is_correct:
 79 |             correct_predictions += 1
 80 | 
 81 |         results.append(
 82 |             {
 83 |                 "sample_idx": i,
 84 |                 "instruction": instruction,
 85 |                 "bbox": bbox,
 86 |                 "predicted_coords": click_coords,
 87 |                 "is_correct": is_correct,
 88 |                 "failed": False,
 89 |                 "prediction_time": prediction_time,
 90 |             }
 91 |         )
 92 | 
 93 |     # Unload model
 94 |     await model_wrapper.unload_model()
 95 | 
 96 |     # Calculate metrics
 97 |     accuracy = correct_predictions / total_samples if total_samples > 0 else 0.0
 98 |     error_rate = error_predictions / total_samples if total_samples > 0 else 0.0
 99 | 
100 |     # Calculate timing statistics
101 |     successful_times = [r["prediction_time"] for r in results if not r["failed"]]
102 |     avg_prediction_time = sum(successful_times) / len(successful_times) if successful_times else 0.0
103 |     median_prediction_time = statistics.median(successful_times) if successful_times else 0.0
104 |     min_prediction_time = min(successful_times) if successful_times else 0.0
105 |     max_prediction_time = max(successful_times) if successful_times else 0.0
106 | 
107 |     # Get VRAM statistics
108 |     vram_stats = model_wrapper.get_vram_stats()
109 | 
110 |     return {
111 |         "model_name": model_wrapper.model_name,
112 |         "total_samples": total_samples,
113 |         "correct_predictions": correct_predictions,
114 |         "failed_predictions": error_predictions,
115 |         "accuracy": accuracy,
116 |         "failure_rate": error_rate,
117 |         "avg_prediction_time": avg_prediction_time,
118 |         "median_prediction_time": median_prediction_time,
119 |         "min_prediction_time": min_prediction_time,
120 |         "max_prediction_time": max_prediction_time,
121 |         "vram_max_mb": vram_stats["max_mb"],
122 |         "vram_avg_mb": vram_stats["avg_mb"],
123 |         "results": results,
124 |     }
125 | 
126 | 
127 | async def main():
128 |     """
129 |     Main function to run the benchmark.
130 |     """
131 |     # Parse command line arguments
132 |     parser = argparse.ArgumentParser(description="ScreenSpot-v2 Benchmark Script")
133 |     parser.add_argument(
134 |         "--samples", type=int, default=500, help="Number of samples to evaluate (default: 500)"
135 |     )
136 |     parser.add_argument(
137 |         "--seed", type=int, default=42, help="Random seed for shuffling (default: 42)"
138 |     )
139 |     args = parser.parse_args()
140 | 
141 |     # Set random seed
142 |     random.seed(args.seed)
143 | 
144 |     # Load dataset
145 |     print("Loading ScreenSpot-v2 dataset...")
146 |     ds = load_dataset("lmms-lab/ScreenSpot-v2")
147 |     dataset = ds["train"]  # type: ignore
148 |     # Convert to simple list of dicts with only required keys
149 |     samples = []
150 |     for item in dataset:
151 |         # Convert dataset item to dict if needed
152 |         item_dict = dict(item) if hasattr(item, "keys") else item
153 | 
154 |         # Convert ScreenSpot-v2 bbox format [x, y, w, h] to [x1, y1, x2, y2]
155 |         bbox_xywh = item_dict["bbox"]  # type: ignore
156 |         x, y, w, h = bbox_xywh
157 |         bbox_xyxy = [x, y, x + w, y + h]
158 | 
159 |         samples.append(
160 |             {
161 |                 "image": item_dict["image"],  # type: ignore
162 |                 "instruction": item_dict["instruction"],  # type: ignore
163 |                 "bbox": bbox_xyxy,
164 |             }
165 |         )
166 |     print(f"Dataset loaded: {len(samples)} samples")
167 | 
168 |     # Shuffle samples with seed
169 |     random.shuffle(samples)
170 |     print(f"Samples shuffled with seed {args.seed}")
171 | 
172 |     # Get available models
173 |     models = get_available_models()
174 | 
175 |     # Evaluation settings
176 |     max_samples = args.samples  # Use command line argument
177 | 
178 |     # Run evaluations
179 |     all_results = []
180 | 
181 |     for model in models:
182 |         model_wrapper = ModelWrapper(model)
183 |         result = await evaluate_model(model_wrapper, samples, max_samples)
184 |         all_results.append(result)
185 | 
186 |         # Print summary
187 |         print(f"\n{result['model_name']} Results:")
188 |         print(f"  Accuracy: {result['accuracy']*100:.2f}%")
189 |         print(f"  Correct: {result['correct_predictions']}/{result['total_samples']}")
190 |         print(f"  Errors: {result['failed_predictions']}")
191 |         print(f"  Error Rate: {result['failure_rate']*100:.2f}%")
192 |         print(f"  Avg Time: {result['avg_prediction_time']:.2f}s")
193 |         print(f"  Median Time: {result['median_prediction_time']:.2f}s")
194 |         print(
195 |             f"  Time Range: {result['min_prediction_time']:.2f}s - {result['max_prediction_time']:.2f}s"
196 |         )
197 |         print(f"  VRAM Max: {result['vram_max_mb']:.1f}MB")
198 |         print(f"  VRAM Avg: {result['vram_avg_mb']:.1f}MB")
199 | 
200 |         # Print GPU memory info
201 |         gpu_memory = get_gpu_memory()
202 |         if gpu_memory and gpu_memory[0] > 0:
203 |             print(f"  GPU Free Memory: {gpu_memory[0]:.1f}MB")
204 | 
205 |     # Save results
206 |     if all_results:
207 |         save_results_to_markdown(
208 |             all_results, "screenspot_v2_results.md", title="ScreenSpot-v2 Benchmark Results"
209 |         )
210 |         save_visualizations(all_results, samples)
211 |         print("\nBenchmark completed successfully!")
212 |     else:
213 |         print("\nNo successful evaluations completed.")
214 | 
215 | 
216 | if __name__ == "__main__":
217 |     asyncio.run(main())
218 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/callbacks/telemetry.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Telemetry callback handler for Computer-Use Agent (cua-agent)
  3 | """
  4 | 
  5 | import platform
  6 | import time
  7 | import uuid
  8 | from typing import Any, Dict, List, Optional, Union
  9 | 
 10 | from core.telemetry import (
 11 |     is_telemetry_enabled,
 12 |     record_event,
 13 | )
 14 | 
 15 | from .base import AsyncCallbackHandler
 16 | 
 17 | SYSTEM_INFO = {
 18 |     "os": platform.system().lower(),
 19 |     "os_version": platform.release(),
 20 |     "python_version": platform.python_version(),
 21 | }
 22 | 
 23 | 
 24 | class TelemetryCallback(AsyncCallbackHandler):
 25 |     """
 26 |     Telemetry callback handler for Computer-Use Agent (cua-agent)
 27 | 
 28 |     Tracks agent usage, performance metrics, and optionally trajectory data.
 29 |     """
 30 | 
 31 |     def __init__(self, agent, log_trajectory: bool = False):
 32 |         """
 33 |         Initialize telemetry callback.
 34 | 
 35 |         Args:
 36 |             agent: The ComputerAgent instance
 37 |             log_trajectory: Whether to log full trajectory items (opt-in)
 38 |         """
 39 |         self.agent = agent
 40 |         self.log_trajectory = log_trajectory
 41 | 
 42 |         # Generate session/run IDs
 43 |         self.session_id = str(uuid.uuid4())
 44 |         self.run_id = None
 45 | 
 46 |         # Track timing and metrics
 47 |         self.run_start_time = None
 48 |         self.step_count = 0
 49 |         self.step_start_time = None
 50 |         self.total_usage = {
 51 |             "prompt_tokens": 0,
 52 |             "completion_tokens": 0,
 53 |             "total_tokens": 0,
 54 |             "response_cost": 0.0,
 55 |         }
 56 | 
 57 |         # Record agent initialization
 58 |         if is_telemetry_enabled():
 59 |             self._record_agent_initialization()
 60 | 
 61 |     def _record_agent_initialization(self) -> None:
 62 |         """Record agent type/model and session initialization."""
 63 |         # Get the agent loop type (class name)
 64 |         agent_type = "unknown"
 65 |         if hasattr(self.agent, "agent_loop") and self.agent.agent_loop is not None:
 66 |             agent_type = type(self.agent.agent_loop).__name__
 67 | 
 68 |         agent_info = {
 69 |             "session_id": self.session_id,
 70 |             "agent_type": agent_type,
 71 |             "model": getattr(self.agent, "model", "unknown"),
 72 |             **SYSTEM_INFO,
 73 |         }
 74 | 
 75 |         record_event("agent_session_start", agent_info)
 76 | 
 77 |     async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
 78 |         """Called at the start of an agent run loop."""
 79 |         if not is_telemetry_enabled():
 80 |             return
 81 | 
 82 |         self.run_id = str(uuid.uuid4())
 83 |         self.run_start_time = time.time()
 84 |         self.step_count = 0
 85 | 
 86 |         # Calculate input context size
 87 |         input_context_size = self._calculate_context_size(old_items)
 88 | 
 89 |         run_data = {
 90 |             "session_id": self.session_id,
 91 |             "run_id": self.run_id,
 92 |             "start_time": self.run_start_time,
 93 |             "input_context_size": input_context_size,
 94 |             "num_existing_messages": len(old_items),
 95 |         }
 96 | 
 97 |         # Log trajectory if opted in
 98 |         if self.log_trajectory:
 99 |             trajectory = self._extract_trajectory(old_items)
100 |             if trajectory:
101 |                 run_data["uploaded_trajectory"] = trajectory
102 | 
103 |         record_event("agent_run_start", run_data)
104 | 
105 |     async def on_run_end(
106 |         self,
107 |         kwargs: Dict[str, Any],
108 |         old_items: List[Dict[str, Any]],
109 |         new_items: List[Dict[str, Any]],
110 |     ) -> None:
111 |         """Called at the end of an agent run loop."""
112 |         if not is_telemetry_enabled() or not self.run_start_time:
113 |             return
114 | 
115 |         run_duration = time.time() - self.run_start_time
116 | 
117 |         run_data = {
118 |             "session_id": self.session_id,
119 |             "run_id": self.run_id,
120 |             "end_time": time.time(),
121 |             "duration_seconds": run_duration,
122 |             "num_steps": self.step_count,
123 |             "total_usage": self.total_usage.copy(),
124 |         }
125 | 
126 |         # Log trajectory if opted in
127 |         if self.log_trajectory:
128 |             trajectory = self._extract_trajectory(new_items)
129 |             if trajectory:
130 |                 run_data["uploaded_trajectory"] = trajectory
131 | 
132 |         record_event("agent_run_end", run_data)
133 | 
134 |     async def on_usage(self, usage: Dict[str, Any]) -> None:
135 |         """Called when usage information is received."""
136 |         if not is_telemetry_enabled():
137 |             return
138 | 
139 |         # Accumulate usage stats
140 |         self.total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
141 |         self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
142 |         self.total_usage["total_tokens"] += usage.get("total_tokens", 0)
143 |         self.total_usage["response_cost"] += usage.get("response_cost", 0.0)
144 | 
145 |         # Record individual usage event
146 |         usage_data = {
147 |             "session_id": self.session_id,
148 |             "run_id": self.run_id,
149 |             "step": self.step_count,
150 |             **usage,
151 |         }
152 | 
153 |         record_event("agent_usage", usage_data)
154 | 
155 |     async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
156 |         """Called when responses are received."""
157 |         if not is_telemetry_enabled():
158 |             return
159 | 
160 |         self.step_count += 1
161 |         step_duration = None
162 | 
163 |         if self.step_start_time:
164 |             step_duration = time.time() - self.step_start_time
165 | 
166 |         self.step_start_time = time.time()
167 | 
168 |         step_data = {
169 |             "session_id": self.session_id,
170 |             "run_id": self.run_id,
171 |             "step": self.step_count,
172 |             "timestamp": self.step_start_time,
173 |         }
174 | 
175 |         if step_duration is not None:
176 |             step_data["duration_seconds"] = step_duration
177 | 
178 |         record_event("agent_step", step_data)
179 | 
180 |     def _calculate_context_size(self, items: List[Dict[str, Any]]) -> int:
181 |         """Calculate approximate context size in tokens/characters."""
182 |         total_size = 0
183 | 
184 |         for item in items:
185 |             if item.get("type") == "message" and "content" in item:
186 |                 content = item["content"]
187 |                 if isinstance(content, str):
188 |                     total_size += len(content)
189 |                 elif isinstance(content, list):
190 |                     for part in content:
191 |                         if isinstance(part, dict) and "text" in part:
192 |                             total_size += len(part["text"])
193 |             elif "content" in item and isinstance(item["content"], str):
194 |                 total_size += len(item["content"])
195 | 
196 |         return total_size
197 | 
198 |     def _extract_trajectory(self, items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
199 |         """Extract trajectory items that should be logged."""
200 |         trajectory = []
201 | 
202 |         for item in items:
203 |             # Include user messages, assistant messages, reasoning, computer calls, and computer outputs
204 |             if (
205 |                 item.get("role") == "user"  # User inputs
206 |                 or (
207 |                     item.get("type") == "message" and item.get("role") == "assistant"
208 |                 )  # Model outputs
209 |                 or item.get("type") == "reasoning"  # Reasoning traces
210 |                 or item.get("type") == "computer_call"  # Computer actions
211 |                 or item.get("type") == "computer_call_output"  # Computer outputs
212 |             ):
213 |                 # Create a copy of the item with timestamp
214 |                 trajectory_item = item.copy()
215 |                 trajectory_item["logged_at"] = time.time()
216 |                 trajectory.append(trajectory_item)
217 | 
218 |         return trajectory
219 | 
```

--------------------------------------------------------------------------------
/blog/computer-use-agents-for-growth-hacking.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Computer Use Agents for Growth Hacking: The Cua-la Strategy
  2 | 
  3 | _Published on January 16, 2025 by Sarina Li_
  4 | 
  5 | <img src="./assets/esther-and-sarina.JPG" alt="Esther and Sarina at DevFest Toronto">
  6 | 
  7 | Growing a developer-focused product is hard. Traditional marketing doesn't work. Booth rentals cost thousands. Sponsorships cost tens of thousands.
  8 | 
  9 | So we tried something different at Google DevFest Toronto: show up with backpacks full of cute cua-la keychains and see what happens.
 10 | 
 11 | This is the story of how two new hires, a growth engineer and a designer/artist, guerrilla marketed their way through a major tech conference with $200 worth of merch and a post-event automation pipeline.
 12 | 
 13 | ## Meet the Team
 14 | 
 15 | **Sarina** (Growth Engineering): Built the post-event automation pipeline that extracts LinkedIn connections and generates personalized messages while you sleep.
 16 | 
 17 | **Esther** (Design + Art): Hand-crafted every piece of artwork, giving life to Cua through illustrations, branding, and yes, extremely cute cua-la keychains.
 18 | 
 19 | The thesis: what if we could draw people in with irresistible physical merch, then use computer use agents to handle all the tedious follow-up work?
 20 | 
 21 | ## The cua-la Strategy
 22 | 
 23 | <img src="./assets/cua-at-devfest.JPG" alt="Guerrilla marketing at DevFest Toronto">
 24 | 
 25 | Google DevFest Toronto brought together hundreds of developers and AI enthusiasts. We didn't have a booth. We didn't have demos. We showed up with backpacks full of cua-la keychains with the cua.ai logo and started handing them out.
 26 | 
 27 | That's it. Pure guerrilla marketing, the cua-las were absurdly effective.
 28 | 
 29 | People would literally crowd around us, not because they were interested in computer use (at first), but because they wanted a cua-la. We'd pitch Cua while handing out keychains, and suddenly we had an engaged audience!
 30 | 
 31 | <img src="./assets/devfest-image.JPG" alt="DevFest crowd">
 32 | 
 33 | ### The Magic Moment
 34 | 
 35 | A few people stuck the cua-las on their bags immediately. Then, throughout the event, we started getting approached:
 36 | 
 37 | "Wait, are you the Cua girls?"
 38 | 
 39 | They'd seen the cua-las on someone's bag, asked about it, and tracked us down! The keychains became walking advertisements.
 40 | 
 41 | <img src="./assets/htn-at-devfest.JPG" alt="Hack the North recognition at DevFest">
 42 | 
 43 | Even better: two attendees recognized Cua from Hack the North. Our previous event marketing was actually working. People remembered us.
 44 | 
 45 | ## Part 2: The Automation (Try It Yourself)
 46 | 
 47 | After DevFest, we had 20+ new LinkedIn connections. Normally, this means hours of:
 48 | 
 49 | - Manually copying names, roles, companies
 50 | - Opening each profile to find contact info
 51 | - Crafting personalized follow-up messages
 52 | - Updating your CRM
 53 | 
 54 | Sarina had a better idea: build the automation we wish existed, then open source it.
 55 | 
 56 | **The automation is live**: [Post-Event Contact Export cookbook](https://cua.ai/docs/example-usecases/post-event-contact-export)
 57 | 
 58 | ### How It Works
 59 | 
 60 | <video controls width="100%">
 61 |   <source src="./assets/linkedin-scraping.mp4" type="video/mp4">
 62 |   LinkedIn scraping automation in action
 63 | </video>
 64 | 
 65 | The agent navigates LinkedIn like a human would: click profile, extract info, navigate back, repeat. But it does it overnight while you sleep.
 66 | 
 67 | The secret sauce: **VM session persistence**. By logging into LinkedIn once through Cua's VM, the session stays alive. No captchas, no bot detection, just smooth automation.
 68 | 
 69 | <video controls width="100%">
 70 |   <source src="./assets/adding-row-csv.mp4" type="video/mp4">
 71 |   Automatic CSV generation
 72 | </video>
 73 | 
 74 | Wake up to a clean CSV with:
 75 | 
 76 | - First name, last name
 77 | - Current role and company
 78 | - LinkedIn profile URLs
 79 | - Pre-generated messaging links
 80 | 
 81 | Then use that data to craft personalized messages. Sarina wrote unique follow-ups for each person, mentioning specific conversations from DevFest.
 82 | 
 83 | **Works for any platform**: LinkedIn, X/Twitter, or wherever your connections are. The cookbook includes full setup instructions and customizable code.
 84 | 
 85 | ## The Results
 86 | 
 87 | **Cost Breakdown**
 88 | 
 89 | - Booth rental: $0 (didn't have one)
 90 | - Sponsorship: $0 (didn't buy one)
 91 | - cua-la keychains: ~$200
 92 | - Automation: Built by Sarina in a few hours post-event
 93 | - **Total spend: $200**
 94 | 
 95 | **What We Got**
 96 | 
 97 | - People crowding around us for cua-las
 98 | - Walking advertisements on bags throughout the event
 99 | - Instant brand recognition ("Are you the Cua girls?")
100 | - Two people who remembered us from Hack the North
101 | - 20+ quality connections extracted and messaged within 24 hours
102 | - Several demo requests from personalized follow-ups
103 | 
104 | **ROI**
105 | Traditional event marketing at this scale: $5-10K minimum for booth + sponsorship.
106 | 
107 | Our approach: $200 + scrappy execution.
108 | 
109 | The automation is reuseable and will save hours of manual work, and the cua-las created more organic conversations than any booth could have.
110 | 
111 | ## What Didn't Work (Yet)
112 | 
113 | **cua-la Distribution**
114 | We ran out faster than expected! Next time: bigger bag, or limit to one per person.
115 | 
116 | **Automation Setup**
117 | The VM login step added friction. "Log in manually first, then run the script" confused some people who wanted to try it themselves. Need better first-run UX.
118 | 
119 | **Message Personalization**
120 | While the extraction was automated, I still wrote each follow-up message manually, I think we are looking for ways to better enrich messages with context from the event, which is hard to automate.
121 | 
122 | ## What's Next: NeurIPS 2025
123 | 
124 | NeurIPS is the biggest AI conference of the year. Thousands of researchers, hundreds of companies.
125 | 
126 | **The good news**: We still have one giant bag of cua-las left. They're already packed and ready.
127 | 
128 | **The better news**: We're upgrading the automation.
129 | 
130 | ### The Hypothesis
131 | 
132 | The cua-las get people interested. The automation ensures we actually follow through.
133 | 
134 | Most event marketing fails at the follow-up stage. You collect business cards, connect on LinkedIn, and then... nothing. The moment passes. People forget.
135 | 
136 | With Cua handling the mechanical work (data organization, connection tracking, follow-up scheduling), we can focus on the human part: genuine conversations, valuable introductions, and actually helping people.
137 | 
138 | ## The Framework: Cute Merch + Smart Automation
139 | 
140 | Traditional event marketing: show up, pitch, collect cards.
141 | 
142 | Our approach: combine two forces that shouldn't work together but do.
143 | 
144 | **The Physical Hook**
145 | 
146 | - Make something people actually want (not another branded pen)
147 | - Hand-crafted, memorable, Instagram-worthy
148 | - Turns attendees into walking billboards
149 | - Creates natural conversation starters
150 | 
151 | **The Digital Follow-Through**
152 | 
153 | - Automate the tedious post-event work
154 | - Extract connections while you sleep
155 | - Personalize follow-ups with real context
156 | - Actually close the loop before the moment passes
157 | 
158 | **Why It Works**
159 | The cua-las get you in the door. The automation ensures you don't waste the opportunity.
160 | 
161 | Most companies nail one or the other:
162 | 
163 | - Great merch, terrible follow-up → missed opportunities
164 | - Amazing automation, boring presence → no one cares
165 | 
166 | Do both, and you create a flywheel: each event builds brand recognition for the next, while automation ensures maximum value from every connection.
167 | 
168 | See you at NeurIPS 2025!
169 | 
170 | ---
171 | 
172 | _Want to build your own growth hacking automations? Check out [Cua on GitHub](https://github.com/trycua/cua) or join our [Discord](https://discord.gg/cua) to share your experiments. cua-las not included (yet)._
173 | 
```

--------------------------------------------------------------------------------
/blog/ubuntu-docker-support.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Ubuntu Docker Support in Cua with Kasm
  2 | 
  3 | _Published Aug 26, 2025 by Francesco Bonacci_
  4 | 
  5 | Today we’re shipping **Ubuntu Docker support** in Cua. You get a full Linux desktop inside a Docker container, viewable right in your browser—no VM spin-up, no extra clients. It behaves the same on macOS, Windows, and Linux.
  6 | 
  7 | <img src="./assets/docker-ubuntu-support.png" alt="Cua + KasmVNC Ubuntu container desktop">
  8 | 
  9 | ## Why we did this
 10 | 
 11 | If you build automation or RL workflows with Cua, you’ve probably run into the usual platform walls: macOS VMs (via Lume) are Apple-Silicon only; Windows Sandbox needs Pro/Enterprise; giving agents your host desktop is… exciting, but risky; and little OS quirks make “build once, run anywhere” harder than it should be.
 12 | 
 13 | We wanted something lightweight, isolated, and identical across machines. So we put a desktop in a container.
 14 | 
 15 | ## Why we didn’t use QEMU/KVM
 16 | 
 17 | Short answer: **portability, startup time, and ops friction.**
 18 | 
 19 | - **Runs everywhere, no hypervisor drama.** KVM needs Linux; Hyper-V/Virtualization.Framework setups vary by host and policy. Docker is ubiquitous across macOS/Windows/Linux and allowed in most CI runners—so your GUI env actually runs where your team works.
 20 | - **Faster boot & smaller footprints.** Containers cold-start in seconds and images are GB-scale; VMs tend to be minutes and tens of GB. That matters for parallel agents, CI, and local iteration.
 21 | - **Lower ops overhead.** No nested virt, kernel modules, or privileged host tweaks that many orgs (and cloud runners) block. Pull → run → browser.
 22 | - **Same image, everywhere.** One Docker image gives you an identical desktop on every dev laptop and in CI.
 23 | - **Web-first access out of the box.** KasmVNC serves the desktop over HTTP—no extra VNC/RDP clients or SPICE config.
 24 | 
 25 | **When we _do_ reach for QEMU/KVM:**
 26 | 
 27 | - You need **true OS isolation** or to run **non-Linux** guests.
 28 | - You want **kernel-level features** or **device/GPU passthrough** (VFIO).
 29 | - You’re optimizing for **hardware realism** over startup speed and density.
 30 | 
 31 | For this release, the goal was a **cross-platform Linux desktop that feels instant and identical** across local dev and CI. Containers + KasmVNC hit that sweet spot.
 32 | 
 33 | ## What we built
 34 | 
 35 | Under the hood it’s **KasmVNC + Ubuntu 22.04 (Xfce) in Docker**, pre-configured for computer-use automation. You get a proper GUI desktop served over HTTP (no VNC/RDP client), accessible from any modern browser. Cua’s Computer server boots automatically so your agents can connect immediately.
 36 | 
 37 | ### How it works (at a glance)
 38 | 
 39 | ```
 40 | Your System
 41 | └─ Docker Container
 42 |    └─ Xfce Desktop + KasmVNC → open in your browser
 43 | ```
 44 | 
 45 | ---
 46 | 
 47 | ## Quick start
 48 | 
 49 | 1. **Install Docker** — Docker Desktop (macOS/Windows) or Docker Engine (Linux).
 50 | 
 51 | 2. **Pull or build the image**
 52 | 
 53 | ```bash
 54 | # Pull (recommended)
 55 | docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest
 56 | 
 57 | # Or build locally
 58 | cd libs/kasm
 59 | docker build -t cua-ubuntu:latest .
 60 | ```
 61 | 
 62 | 3. **Run with Cua’s Computer SDK**
 63 | 
 64 | ```python
 65 | from computer import Computer
 66 | 
 67 | computer = Computer(
 68 |     os_type="linux",
 69 |     provider_type="docker",
 70 |     image="trycua/cua-ubuntu:latest",
 71 |     name="my-automation-container"
 72 | )
 73 | 
 74 | await computer.run()
 75 | ```
 76 | 
 77 | ### Make an agent that drives this desktop
 78 | 
 79 | ```python
 80 | from agent import ComputerAgent
 81 | 
 82 | # assumes `computer` is the instance created above
 83 | agent = ComputerAgent("openrouter/z-ai/glm-4.5v", tools=[computer])
 84 | 
 85 | async for _ in agent.run("Click on the search bar and type 'hello world'"):
 86 |     pass
 87 | ```
 88 | 
 89 | > Use any VLM with tool use; just make sure your OpenRouter creds are set.
 90 | 
 91 | By default you land on **Ubuntu 22.04 + Xfce** with a browser and desktop basics, the **Computer server** is running, the **web viewer** is available at `http://localhost:8006`, and common automation tools are preinstalled.
 92 | 
 93 | ---
 94 | 
 95 | ## What’s inside (in plain English)
 96 | 
 97 | A tidy Linux desktop with web access through **KasmVNC**, Python 3.11 and dev tools, plus utilities you’ll actually use for automation—`wmctrl` for windows, `xclip` for clipboard, `ffmpeg` for media, screenshot helpers, and so on. It starts as a **non-root `kasm-user`**, lives in an **isolated filesystem** (unless you mount volumes), and ships with **SSL off for local dev** so you terminate TLS upstream when you deploy.
 98 | 
 99 | ---
100 | 
101 | ## How it compares
102 | 
103 | | Feature          | KasmVNC Docker        | Lume (macOS VM)       | Windows Sandbox        |
104 | | ---------------- | --------------------- | --------------------- | ---------------------- |
105 | | Platform support | macOS, Windows, Linux | macOS (Apple Silicon) | Windows Pro/Enterprise |
106 | | Resource usage   | Low (container)       | Medium (full VM)      | Medium (full VM)       |
107 | | Setup time       | \~30s                 | 2–5 min               | 1–2 min                |
108 | | GUI desktop      | Linux                 | macOS                 | Windows                |
109 | | Web access       | Browser (no client)   | Typically VNC client  | Typically RDP client   |
110 | | Consistency      | Same everywhere       | Hardware-dependent    | OS-dependent           |
111 | 
112 | **Use KasmVNC Docker when…** you want the **same GUI env across devs/CI/platforms**, you’re doing **RL or end-to-end GUI tests**, or you need **many isolated desktops on one machine**.
113 | **Use alternatives when…** you need native **macOS** (→ Lume) or native **Windows** (→ Windows Sandbox).
114 | 
115 | ---
116 | 
117 | ## Using the Agent Framework (parallel example)
118 | 
119 | A compact pattern for running multiple desktops and agents side-by-side:
120 | 
121 | ```python
122 | import asyncio
123 | from computer import Computer
124 | from agent import ComputerAgent
125 | 
126 | # Create multiple computer instances (each gets its own desktop)
127 | computers = []
128 | for i in range(3):
129 |     c = Computer(
130 |         os_type="linux",
131 |         provider_type="docker",
132 |         image="trycua/cua-ubuntu:latest",
133 |         name=f"parallel-desktop-{i}"
134 |     )
135 |     computers.append(c)
136 |     await c.run()
137 | 
138 | # Pair each desktop with a task
139 | tasks = [
140 |     "open github and search for 'trycua/cua'",
141 |     "open a text editor and write 'hello world'",
142 |     "open the browser and go to google.com",
143 | ]
144 | 
145 | agents = [
146 |     ComputerAgent(model="openrouter/z-ai/glm-4.5v", tools=[c])
147 |     for c in computers
148 | ]
149 | 
150 | async def run_agent(agent, task):
151 |     async for _ in agent.run(task):
152 |         pass
153 | 
154 | await asyncio.gather(*[run_agent(a, t) for a, t in zip(agents, tasks)])
155 | ```
156 | 
157 | ---
158 | 
159 | ## What’s next
160 | 
161 | We’re polishing a **CLI to push/scale these containers on Cua Cloud**, exploring **GPU acceleration** for in-container inference, and publishing **prebuilt images** for Playwright, Selenium, and friends.
162 | 
163 | ---
164 | 
165 | ## Try it
166 | 
167 | ```python
168 | from computer import Computer
169 | computer = Computer(os_type="linux", provider_type="docker", image="trycua/cua-ubuntu:latest")
170 | await computer.run()
171 | ```
172 | 
173 | ---
174 | 
175 | ## Links
176 | 
177 | - **Docker Provider Docs:** [https://cua.ai/docs/computers/docker](https://cua.ai/docs/computer-sdk/computers#linux-on-docker)
178 | - **KasmVNC:** [https://github.com/kasmtech/KasmVNC](https://github.com/kasmtech/KasmVNC)
179 | - **Container Source:** [https://github.com/trycua/cua/tree/main/libs/kasm](https://github.com/trycua/cua/tree/main/libs/kasm)
180 | - **Computer SDK:** [https://cua.ai/docs/computer-sdk/computers](https://cua.ai/docs/computer-sdk/computers)
181 | - **Discord:** [https://discord.gg/cua-ai](https://discord.gg/cua-ai)
182 | 
183 | Questions or weird edge cases? Ping us on Discord—we’re curious to see what you build.
184 | 
```

--------------------------------------------------------------------------------
/libs/python/bench-ui/bench_ui/child.py:
--------------------------------------------------------------------------------

```python
  1 | import asyncio
  2 | import json
  3 | import os
  4 | import random
  5 | import socket
  6 | import sys
  7 | import threading
  8 | from pathlib import Path
  9 | from typing import Optional
 10 | 
 11 | import webview
 12 | from aiohttp import web
 13 | 
 14 | 
 15 | def _get_free_port() -> int:
 16 |     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
 17 |         s.bind(("127.0.0.1", 0))
 18 |         return s.getsockname()[1]
 19 | 
 20 | 
 21 | def _start_http_server(
 22 |     window: webview.Window,
 23 |     port: int,
 24 |     ready_event: threading.Event,
 25 |     html_content: str | None = None,
 26 |     folder_path: str | None = None,
 27 | ):
 28 |     async def rect_handler(request: web.Request):
 29 |         try:
 30 |             data = await request.json()
 31 |         except Exception:
 32 |             return web.json_response({"error": "invalid_json"}, status=400)
 33 |         selector = data.get("selector")
 34 |         space = data.get("space", "window")
 35 |         if not isinstance(selector, str):
 36 |             return web.json_response({"error": "selector_required"}, status=400)
 37 | 
 38 |         # Ensure window content is loaded
 39 |         if not ready_event.is_set():
 40 |             # give it a short chance to finish loading
 41 |             ready_event.wait(timeout=2.0)
 42 |         if not ready_event.is_set():
 43 |             return web.json_response({"error": "window_not_ready"}, status=409)
 44 | 
 45 |         # Safely embed selector into JS
 46 |         selector_js = json.dumps(selector)
 47 |         if space == "screen":
 48 |             # Compute approximate screen coordinates using window metrics
 49 |             js = (
 50 |                 "(function(){"
 51 |                 f"const s = {selector_js};"
 52 |                 "const el = document.querySelector(s);"
 53 |                 "if(!el){return null;}"
 54 |                 "const r = el.getBoundingClientRect();"
 55 |                 "const sx = (window.screenX ?? window.screenLeft ?? 0);"
 56 |                 "const syRaw = (window.screenY ?? window.screenTop ?? 0);"
 57 |                 "const frameH = (window.outerHeight - window.innerHeight) || 0;"
 58 |                 "const sy = syRaw + frameH;"
 59 |                 "return {x:sx + r.left, y:sy + r.top, width:r.width, height:r.height};"
 60 |                 "})()"
 61 |             )
 62 |         else:
 63 |             js = (
 64 |                 "(function(){"
 65 |                 f"const s = {selector_js};"
 66 |                 "const el = document.querySelector(s);"
 67 |                 "if(!el){return null;}"
 68 |                 "const r = el.getBoundingClientRect();"
 69 |                 "return {x:r.left,y:r.top,width:r.width,height:r.height};"
 70 |                 "})()"
 71 |             )
 72 |         try:
 73 |             # Evaluate JS on the target window; this call is thread-safe in pywebview
 74 |             result = window.evaluate_js(js)
 75 |         except Exception as e:
 76 |             return web.json_response({"error": str(e)}, status=500)
 77 |         return web.json_response({"rect": result})
 78 | 
 79 |     async def eval_handler(request: web.Request):
 80 |         try:
 81 |             data = await request.json()
 82 |         except Exception:
 83 |             return web.json_response({"error": "invalid_json"}, status=400)
 84 |         code = data.get("javascript") or data.get("code")
 85 |         if not isinstance(code, str):
 86 |             return web.json_response({"error": "javascript_required"}, status=400)
 87 | 
 88 |         if not ready_event.is_set():
 89 |             ready_event.wait(timeout=2.0)
 90 |         if not ready_event.is_set():
 91 |             return web.json_response({"error": "window_not_ready"}, status=409)
 92 | 
 93 |         try:
 94 |             result = window.evaluate_js(code)
 95 |         except Exception as e:
 96 |             return web.json_response({"error": str(e)}, status=500)
 97 |         return web.json_response({"result": result})
 98 | 
 99 |     async def index_handler(request: web.Request):
100 |         if html_content is None:
101 |             return web.json_response({"status": "ok", "message": "bench-ui control server"})
102 |         return web.Response(text=html_content, content_type="text/html")
103 | 
104 |     app = web.Application()
105 | 
106 |     # If serving a folder, add static file routes
107 |     if folder_path:
108 |         app.router.add_static("/", folder_path, show_index=True)
109 |     else:
110 |         app.router.add_get("/", index_handler)
111 | 
112 |     app.router.add_post("/rect", rect_handler)
113 |     app.router.add_post("/eval", eval_handler)
114 | 
115 |     loop = asyncio.new_event_loop()
116 | 
117 |     def run_loop():
118 |         asyncio.set_event_loop(loop)
119 |         runner = web.AppRunner(app)
120 |         loop.run_until_complete(runner.setup())
121 |         site = web.TCPSite(runner, "127.0.0.1", port)
122 |         loop.run_until_complete(site.start())
123 |         loop.run_forever()
124 | 
125 |     t = threading.Thread(target=run_loop, daemon=True)
126 |     t.start()
127 | 
128 | 
129 | def main():
130 |     if len(sys.argv) < 2:
131 |         print("Usage: python -m bench_ui.child <config.json>", file=sys.stderr)
132 |         sys.exit(2)
133 | 
134 |     cfg_path = Path(sys.argv[1])
135 |     cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
136 | 
137 |     html: Optional[str] = cfg.get("html") or ""
138 |     url: Optional[str] = cfg.get("url")
139 |     folder: Optional[str] = cfg.get("folder")
140 |     title: str = cfg.get("title", "Window")
141 |     x: Optional[int] = cfg.get("x")
142 |     y: Optional[int] = cfg.get("y")
143 |     width: int = int(cfg.get("width", 600))
144 |     height: int = int(cfg.get("height", 400))
145 |     icon: Optional[str] = cfg.get("icon")
146 |     use_inner_size: bool = bool(cfg.get("use_inner_size", False))
147 |     title_bar_style: str = cfg.get("title_bar_style", "default")
148 | 
149 |     # Choose port early so we can point the window to it when serving inline HTML or folder
150 |     port = _get_free_port()
151 | 
152 |     # Create window
153 |     if url:
154 |         window = webview.create_window(
155 |             title,
156 |             url=url,
157 |             width=width,
158 |             height=height,
159 |             x=x,
160 |             y=y,
161 |             confirm_close=False,
162 |             text_select=True,
163 |             background_color="#FFFFFF",
164 |         )
165 |         html_for_server = None
166 |         folder_for_server = None
167 |     elif folder:
168 |         # Serve static folder at control server root and point window to index.html
169 |         resolved_url = f"http://127.0.0.1:{port}/index.html"
170 |         window = webview.create_window(
171 |             title,
172 |             url=resolved_url,
173 |             width=width,
174 |             height=height,
175 |             x=x,
176 |             y=y,
177 |             confirm_close=False,
178 |             text_select=True,
179 |             background_color="#FFFFFF",
180 |         )
181 |         html_for_server = None
182 |         folder_for_server = folder
183 |     else:
184 |         # Serve inline HTML at control server root and point window to it
185 |         resolved_url = f"http://127.0.0.1:{port}/"
186 |         window = webview.create_window(
187 |             title,
188 |             url=resolved_url,
189 |             width=width,
190 |             height=height,
191 |             x=x,
192 |             y=y,
193 |             confirm_close=False,
194 |             text_select=True,
195 |             background_color="#FFFFFF",
196 |         )
197 |         html_for_server = html
198 |         folder_for_server = None
199 | 
200 |     # Track when the page is loaded so JS execution succeeds
201 |     window_ready = threading.Event()
202 | 
203 |     def _on_loaded():
204 |         window_ready.set()
205 | 
206 |     window.events.loaded += _on_loaded  # type: ignore[attr-defined]
207 | 
208 |     # Start HTTP server for control (and optionally serve inline HTML or static folder)
209 |     _start_http_server(
210 |         window, port, window_ready, html_content=html_for_server, folder_path=folder_for_server
211 |     )
212 | 
213 |     # Print startup info for parent to read
214 |     print(json.dumps({"pid": os.getpid(), "port": port}), flush=True)
215 | 
216 |     # Start GUI (blocking)
217 |     webview.start(debug=os.environ.get("CUA_BENCH_UI_DEBUG", "false").lower() in ("true", "1"))
218 | 
219 | 
220 | if __name__ == "__main__":
221 |     main()
222 | 
```

--------------------------------------------------------------------------------
/libs/lume/src/Commands/Config.swift:
--------------------------------------------------------------------------------

```swift
  1 | import ArgumentParser
  2 | import Foundation
  3 | 
  4 | struct Config: ParsableCommand {
  5 |     static let configuration = CommandConfiguration(
  6 |         commandName: "config",
  7 |         abstract: "Get or set lume configuration",
  8 |         subcommands: [Get.self, Storage.self, Cache.self, Caching.self],
  9 |         defaultSubcommand: Get.self
 10 |     )
 11 | 
 12 |     // MARK: - Basic Configuration Subcommands
 13 | 
 14 |     struct Get: ParsableCommand {
 15 |         static let configuration = CommandConfiguration(
 16 |             commandName: "get",
 17 |             abstract: "Get current configuration"
 18 |         )
 19 | 
 20 |         func run() throws {
 21 |             let controller = LumeController()
 22 |             let settings = controller.getSettings()
 23 | 
 24 |             // Display default location
 25 |             print(
 26 |                 "Default VM storage: \(settings.defaultLocationName) (\(settings.defaultLocation?.path ?? "not set"))"
 27 |             )
 28 | 
 29 |             // Display cache directory
 30 |             print("Cache directory: \(settings.cacheDirectory)")
 31 | 
 32 |             // Display caching enabled status
 33 |             print("Caching enabled: \(settings.cachingEnabled)")
 34 | 
 35 |             // Display all locations
 36 |             if !settings.vmLocations.isEmpty {
 37 |                 print("\nConfigured VM storage locations:")
 38 |                 for location in settings.sortedLocations {
 39 |                     let isDefault = location.name == settings.defaultLocationName
 40 |                     let defaultMark = isDefault ? " (default)" : ""
 41 |                     print("  - \(location.name): \(location.path)\(defaultMark)")
 42 |                 }
 43 |             }
 44 |         }
 45 |     }
 46 | 
 47 |     // MARK: - Debug Command
 48 | 
 49 |     struct Debug: ParsableCommand {
 50 |         static let configuration = CommandConfiguration(
 51 |             commandName: "debug",
 52 |             abstract: "Output detailed debug information about current configuration",
 53 |             shouldDisplay: false
 54 |         )
 55 | 
 56 |         func run() throws {
 57 |             let debugInfo = SettingsManager.shared.debugSettings()
 58 |             print(debugInfo)
 59 |         }
 60 |     }
 61 | 
 62 |     // MARK: - Caching Management Subcommands
 63 | 
 64 |     struct Caching: ParsableCommand {
 65 |         static let configuration = CommandConfiguration(
 66 |             commandName: "caching",
 67 |             abstract: "Manage image caching settings",
 68 |             subcommands: [GetCaching.self, SetCaching.self]
 69 |         )
 70 | 
 71 |         struct GetCaching: ParsableCommand {
 72 |             static let configuration = CommandConfiguration(
 73 |                 commandName: "get",
 74 |                 abstract: "Show current caching status"
 75 |             )
 76 | 
 77 |             func run() throws {
 78 |                 let controller = LumeController()
 79 |                 let cachingEnabled = controller.isCachingEnabled()
 80 |                 print("Caching enabled: \(cachingEnabled)")
 81 |             }
 82 |         }
 83 | 
 84 |         struct SetCaching: ParsableCommand {
 85 |             static let configuration = CommandConfiguration(
 86 |                 commandName: "set",
 87 |                 abstract: "Enable or disable image caching"
 88 |             )
 89 | 
 90 |             @Argument(help: "Enable or disable caching (true/false)")
 91 |             var enabled: Bool
 92 | 
 93 |             func run() throws {
 94 |                 let controller = LumeController()
 95 |                 try controller.setCachingEnabled(enabled)
 96 |                 print("Caching \(enabled ? "enabled" : "disabled")")
 97 |             }
 98 |         }
 99 |     }
100 | 
101 |     // MARK: - Cache Management Subcommands
102 | 
103 |     struct Cache: ParsableCommand {
104 |         static let configuration = CommandConfiguration(
105 |             commandName: "cache",
106 |             abstract: "Manage cache settings",
107 |             subcommands: [GetCache.self, SetCache.self]
108 |         )
109 | 
110 |         struct GetCache: ParsableCommand {
111 |             static let configuration = CommandConfiguration(
112 |                 commandName: "get",
113 |                 abstract: "Get current cache directory"
114 |             )
115 | 
116 |             func run() throws {
117 |                 let controller = LumeController()
118 |                 let cacheDir = controller.getCacheDirectory()
119 |                 print("Cache directory: \(cacheDir)")
120 |             }
121 |         }
122 | 
123 |         struct SetCache: ParsableCommand {
124 |             static let configuration = CommandConfiguration(
125 |                 commandName: "set",
126 |                 abstract: "Set cache directory"
127 |             )
128 | 
129 |             @Argument(help: "Path to cache directory")
130 |             var path: String
131 | 
132 |             func run() throws {
133 |                 let controller = LumeController()
134 |                 try controller.setCacheDirectory(path: path)
135 |                 print("Cache directory set to: \(path)")
136 |             }
137 |         }
138 |     }
139 | 
140 |     // MARK: - Storage Management Subcommands
141 | 
142 |     struct Storage: ParsableCommand {
143 |         static let configuration = CommandConfiguration(
144 |             commandName: "storage",
145 |             abstract: "Manage VM storage locations",
146 |             subcommands: [Add.self, Remove.self, List.self, Default.self]
147 |         )
148 | 
149 |         struct Add: ParsableCommand {
150 |             static let configuration = CommandConfiguration(
151 |                 commandName: "add",
152 |                 abstract: "Add a new VM storage location"
153 |             )
154 | 
155 |             @Argument(help: "Storage name (alphanumeric with dashes/underscores)")
156 |             var name: String
157 | 
158 |             @Argument(help: "Path to VM storage directory")
159 |             var path: String
160 | 
161 |             func run() throws {
162 |                 let controller = LumeController()
163 |                 try controller.addLocation(name: name, path: path)
164 |                 print("Added VM storage location: \(name) at \(path)")
165 |             }
166 |         }
167 | 
168 |         struct Remove: ParsableCommand {
169 |             static let configuration = CommandConfiguration(
170 |                 commandName: "remove",
171 |                 abstract: "Remove a VM storage location"
172 |             )
173 | 
174 |             @Argument(help: "Storage name to remove")
175 |             var name: String
176 | 
177 |             func run() throws {
178 |                 let controller = LumeController()
179 |                 try controller.removeLocation(name: name)
180 |                 print("Removed VM storage location: \(name)")
181 |             }
182 |         }
183 | 
184 |         struct List: ParsableCommand {
185 |             static let configuration = CommandConfiguration(
186 |                 commandName: "list",
187 |                 abstract: "List all VM storage locations"
188 |             )
189 | 
190 |             func run() throws {
191 |                 let controller = LumeController()
192 |                 let settings = controller.getSettings()
193 | 
194 |                 if settings.vmLocations.isEmpty {
195 |                     print("No VM storage locations configured")
196 |                     return
197 |                 }
198 | 
199 |                 print("VM Storage Locations:")
200 |                 for location in settings.sortedLocations {
201 |                     let isDefault = location.name == settings.defaultLocationName
202 |                     let defaultMark = isDefault ? " (default)" : ""
203 |                     print("  - \(location.name): \(location.path)\(defaultMark)")
204 |                 }
205 |             }
206 |         }
207 | 
208 |         struct Default: ParsableCommand {
209 |             static let configuration = CommandConfiguration(
210 |                 commandName: "default",
211 |                 abstract: "Set the default VM storage location"
212 |             )
213 | 
214 |             @Argument(help: "Storage name to set as default")
215 |             var name: String
216 | 
217 |             func run() throws {
218 |                 let controller = LumeController()
219 |                 try controller.setDefaultLocation(name: name)
220 |                 print("Set default VM storage location to: \(name)")
221 |             }
222 |         }
223 |     }
224 | }
225 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/loops/holo.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Holo 1.5 agent loop implementation for click prediction using litellm.acompletion.
  3 | 
  4 | Implements the Holo1.5 grounding behavior:
  5 | - Prompt asks for absolute pixel coordinates in JSON: {"action":"click_absolute","x":int,"y":int}
  6 | - Optionally resizes the image using Qwen2-VL smart_resize parameters (via transformers AutoProcessor)
  7 | - If resized, maps predicted coordinates back to the original screenshot resolution
  8 | 
  9 | Note: We do NOT manually load the model; acompletions (via HuggingFaceLocalAdapter)
 10 | will handle loading based on the provided model name.
 11 | """
 12 | 
 13 | from __future__ import annotations
 14 | 
 15 | import base64
 16 | import json
 17 | from io import BytesIO
 18 | from typing import Any, Dict, List, Optional, Tuple
 19 | 
 20 | import litellm
 21 | from PIL import Image
 22 | 
 23 | from ..decorators import register_agent
 24 | from ..types import AgentCapability
 25 | from .base import AsyncAgentConfig
 26 | 
 27 | 
 28 | def _strip_hf_prefix(model: str) -> str:
 29 |     """Strip provider prefixes like 'huggingface-local/' from model names for HF processor load."""
 30 |     if "/" in model and model.lower().startswith("huggingface-local/"):
 31 |         return model.split("/", 1)[1]
 32 |     return model
 33 | 
 34 | 
 35 | def _maybe_smart_resize(image: Image.Image, model: str) -> Tuple[Image.Image, Tuple[int, int]]:
 36 |     """
 37 |     Try to compute Qwen2-VL smart_resize output size using transformers AutoProcessor.
 38 | 
 39 |     Returns (processed_image, (orig_w, orig_h)). If transformers or processor unavailable,
 40 |     returns the original image and size without resizing.
 41 |     """
 42 |     orig_w, orig_h = image.size
 43 |     try:
 44 |         # Import lazily to avoid hard dependency if not installed
 45 |         from transformers import AutoProcessor  # type: ignore
 46 |         from transformers.models.qwen2_vl.image_processing_qwen2_vl import (  # type: ignore
 47 |             smart_resize,
 48 |         )
 49 | 
 50 |         processor_name = _strip_hf_prefix(model)
 51 |         processor = AutoProcessor.from_pretrained(processor_name)
 52 |         image_processor = getattr(processor, "image_processor", None)
 53 |         if image_processor is None:
 54 |             return image, (orig_w, orig_h)
 55 | 
 56 |         factor = getattr(image_processor, "patch_size", 14) * getattr(
 57 |             image_processor, "merge_size", 1
 58 |         )
 59 |         min_pixels = getattr(image_processor, "min_pixels", 256 * 256)
 60 |         max_pixels = getattr(image_processor, "max_pixels", 1536 * 1536)
 61 | 
 62 |         resized_h, resized_w = smart_resize(
 63 |             orig_h,
 64 |             orig_w,
 65 |             factor=factor,
 66 |             min_pixels=min_pixels,
 67 |             max_pixels=max_pixels,
 68 |         )
 69 | 
 70 |         if (resized_w, resized_h) == (orig_w, orig_h):
 71 |             return image, (orig_w, orig_h)
 72 | 
 73 |         processed = image.resize((resized_w, resized_h), resample=Image.Resampling.LANCZOS)
 74 |         return processed, (orig_w, orig_h)
 75 |     except Exception:
 76 |         # If any failure (no transformers, processor load error), fall back to original
 77 |         return image, (orig_w, orig_h)
 78 | 
 79 | 
 80 | def _build_holo_prompt(instruction: str) -> str:
 81 |     """Construct the Holo1.5 grounding prompt."""
 82 |     # Keep it close to the cookbook while avoiding heavy schema generation
 83 |     schema_hint = '{"action": "click_absolute", "x": <int>, "y": <int>}'
 84 |     return (
 85 |         "Localize an element on the GUI image according to the provided target and output a click position. "
 86 |         f"You must output a valid JSON following the format: {schema_hint} "
 87 |         f"Your target is: {instruction}"
 88 |     )
 89 | 
 90 | 
 91 | def _parse_click_json(output_text: str) -> Optional[Tuple[int, int]]:
 92 |     """
 93 |     Parse JSON from model output and extract x, y ints.
 94 |     Tries to find the first JSON object substring if extra text is present.
 95 |     """
 96 |     try:
 97 |         # Fast path: direct JSON
 98 |         data = json.loads(output_text)
 99 |     except Exception:
100 |         # Try to locate a JSON object within the text
101 |         start = output_text.find("{")
102 |         end = output_text.rfind("}")
103 |         if start == -1 or end == -1 or end <= start:
104 |             return None
105 |         try:
106 |             data = json.loads(output_text[start : end + 1])
107 |         except Exception:
108 |             return None
109 | 
110 |     try:
111 |         x = int(data.get("x"))
112 |         y = int(data.get("y"))
113 |         return x, y
114 |     except Exception:
115 |         return None
116 | 
117 | 
118 | @register_agent(models=r"(?i).*(Holo1\.5|Hcompany/Holo1\.5).*")
119 | class HoloConfig(AsyncAgentConfig):
120 |     """Holo is a family of UI grounding models from H Company"""
121 | 
122 |     async def predict_step(
123 |         self,
124 |         messages: List[Dict[str, Any]],
125 |         model: str,
126 |         tools: Optional[List[Dict[str, Any]]] = None,
127 |         max_retries: Optional[int] = None,
128 |         stream: bool = False,
129 |         computer_handler=None,
130 |         _on_api_start=None,
131 |         _on_api_end=None,
132 |         _on_usage=None,
133 |         _on_screenshot=None,
134 |         **kwargs,
135 |     ) -> Dict[str, Any]:
136 |         # Holo models are only trained on UI localization tasks, not all-in-one agent
137 |         raise NotImplementedError()
138 | 
139 |     async def predict_click(
140 |         self,
141 |         model: str,
142 |         image_b64: str,
143 |         instruction: str,
144 |         **kwargs,
145 |     ) -> Optional[Tuple[int, int]]:
146 |         """
147 |         Predict click coordinates using Holo1.5 via litellm.acompletion.
148 | 
149 |         - Optionally smart-resizes the image using Qwen2-VL rules if transformers are available
150 |         - Prompts for JSON with absolute pixel coordinates
151 |         - Parses x,y and maps back to original screenshot size if resized
152 |         """
153 |         try:
154 |             img_bytes = base64.b64decode(image_b64)
155 |             original_img = Image.open(BytesIO(img_bytes))
156 |         except Exception:
157 |             return None
158 | 
159 |         # Optional preprocessing
160 |         processed_img, (orig_w, orig_h) = _maybe_smart_resize(original_img, model)
161 | 
162 |         # If we resized, send the resized image; otherwise send original
163 |         img_to_send = processed_img
164 |         buf = BytesIO()
165 |         img_to_send.save(buf, format="PNG")
166 |         processed_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
167 | 
168 |         prompt = _build_holo_prompt(instruction)
169 | 
170 |         messages = [
171 |             {
172 |                 "role": "user",
173 |                 "content": [
174 |                     {
175 |                         "type": "image_url",
176 |                         "image_url": {"url": f"data:image/png;base64,{processed_b64}"},
177 |                     },
178 |                     {"type": "text", "text": prompt},
179 |                 ],
180 |             }
181 |         ]
182 | 
183 |         api_kwargs = {
184 |             "model": model,
185 |             "messages": messages,
186 |             # Deterministic, small output
187 |             "max_tokens": kwargs.get("max_tokens", 256),
188 |             "temperature": kwargs.get("temperature", 0.0),
189 |         }
190 | 
191 |         response = await litellm.acompletion(**api_kwargs)
192 |         output_text = (response.choices[0].message.content or "").strip()  # type: ignore
193 | 
194 |         coords = _parse_click_json(output_text)
195 |         if coords is None:
196 |             return None
197 | 
198 |         x, y = coords
199 | 
200 |         # Map back to original size if we resized
201 |         proc_w, proc_h = img_to_send.size
202 |         if (proc_w, proc_h) != (orig_w, orig_h):
203 |             try:
204 |                 sx = orig_w / float(proc_w)
205 |                 sy = orig_h / float(proc_h)
206 |                 x = int(round(x * sx))
207 |                 y = int(round(y * sy))
208 |             except Exception:
209 |                 # Fallback: clamp within original bounds
210 |                 pass
211 | 
212 |         # Clamp to original image bounds
213 |         x = max(0, min(orig_w - 1, x))
214 |         y = max(0, min(orig_h - 1, y))
215 |         return x, y
216 | 
217 |     def get_capabilities(self) -> List[AgentCapability]:
218 |         return ["click"]
219 | 
```
Page 9/28FirstPrevNextLast