trycua/cua # codebase.md

This is page 7 of 16. Use http://codebase.md/trycua/cua?page={x} to view the full context.

# Directory Structure

```
├── .all-contributorsrc
├── .cursorignore
├── .devcontainer
│   ├── devcontainer.json
│   ├── post-install.sh
│   └── README.md
├── .dockerignore
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── scripts
│   │   ├── get_pyproject_version.py
│   │   └── tests
│   │       ├── __init__.py
│   │       ├── README.md
│   │       └── test_get_pyproject_version.py
│   └── workflows
│       ├── ci-lume.yml
│       ├── docker-publish-kasm.yml
│       ├── docker-publish-xfce.yml
│       ├── docker-reusable-publish.yml
│       ├── npm-publish-computer.yml
│       ├── npm-publish-core.yml
│       ├── publish-lume.yml
│       ├── pypi-publish-agent.yml
│       ├── pypi-publish-computer-server.yml
│       ├── pypi-publish-computer.yml
│       ├── pypi-publish-core.yml
│       ├── pypi-publish-mcp-server.yml
│       ├── pypi-publish-pylume.yml
│       ├── pypi-publish-som.yml
│       ├── pypi-reusable-publish.yml
│       └── test-validation-script.yml
├── .gitignore
├── .vscode
│   ├── docs.code-workspace
│   ├── launch.json
│   ├── libs-ts.code-workspace
│   ├── lume.code-workspace
│   ├── lumier.code-workspace
│   ├── py.code-workspace
│   └── settings.json
├── blog
│   ├── app-use.md
│   ├── assets
│   │   ├── composite-agents.png
│   │   ├── docker-ubuntu-support.png
│   │   ├── hack-booth.png
│   │   ├── hack-closing-ceremony.jpg
│   │   ├── hack-cua-ollama-hud.jpeg
│   │   ├── hack-leaderboard.png
│   │   ├── hack-the-north.png
│   │   ├── hack-winners.jpeg
│   │   ├── hack-workshop.jpeg
│   │   ├── hud-agent-evals.png
│   │   └── trajectory-viewer.jpeg
│   ├── bringing-computer-use-to-the-web.md
│   ├── build-your-own-operator-on-macos-1.md
│   ├── build-your-own-operator-on-macos-2.md
│   ├── composite-agents.md
│   ├── cua-hackathon.md
│   ├── hack-the-north.md
│   ├── hud-agent-evals.md
│   ├── human-in-the-loop.md
│   ├── introducing-cua-cloud-containers.md
│   ├── lume-to-containerization.md
│   ├── sandboxed-python-execution.md
│   ├── training-computer-use-models-trajectories-1.md
│   ├── trajectory-viewer.md
│   ├── ubuntu-docker-support.md
│   └── windows-sandbox.md
├── CONTRIBUTING.md
├── Development.md
├── Dockerfile
├── docs
│   ├── .gitignore
│   ├── .prettierrc
│   ├── content
│   │   └── docs
│   │       ├── agent-sdk
│   │       │   ├── agent-loops.mdx
│   │       │   ├── benchmarks
│   │       │   │   ├── index.mdx
│   │       │   │   ├── interactive.mdx
│   │       │   │   ├── introduction.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── osworld-verified.mdx
│   │       │   │   ├── screenspot-pro.mdx
│   │       │   │   └── screenspot-v2.mdx
│   │       │   ├── callbacks
│   │       │   │   ├── agent-lifecycle.mdx
│   │       │   │   ├── cost-saving.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── logging.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── pii-anonymization.mdx
│   │       │   │   └── trajectories.mdx
│   │       │   ├── chat-history.mdx
│   │       │   ├── custom-computer-handlers.mdx
│   │       │   ├── custom-tools.mdx
│   │       │   ├── customizing-computeragent.mdx
│   │       │   ├── integrations
│   │       │   │   ├── hud.mdx
│   │       │   │   └── meta.json
│   │       │   ├── message-format.mdx
│   │       │   ├── meta.json
│   │       │   ├── migration-guide.mdx
│   │       │   ├── prompt-caching.mdx
│   │       │   ├── supported-agents
│   │       │   │   ├── composed-agents.mdx
│   │       │   │   ├── computer-use-agents.mdx
│   │       │   │   ├── grounding-models.mdx
│   │       │   │   ├── human-in-the-loop.mdx
│   │       │   │   └── meta.json
│   │       │   ├── supported-model-providers
│   │       │   │   ├── index.mdx
│   │       │   │   └── local-models.mdx
│   │       │   └── usage-tracking.mdx
│   │       ├── computer-sdk
│   │       │   ├── cloud-vm-management.mdx
│   │       │   ├── commands.mdx
│   │       │   ├── computer-ui.mdx
│   │       │   ├── computers.mdx
│   │       │   ├── meta.json
│   │       │   └── sandboxed-python.mdx
│   │       ├── index.mdx
│   │       ├── libraries
│   │       │   ├── agent
│   │       │   │   └── index.mdx
│   │       │   ├── computer
│   │       │   │   └── index.mdx
│   │       │   ├── computer-server
│   │       │   │   ├── Commands.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── REST-API.mdx
│   │       │   │   └── WebSocket-API.mdx
│   │       │   ├── core
│   │       │   │   └── index.mdx
│   │       │   ├── lume
│   │       │   │   ├── cli-reference.mdx
│   │       │   │   ├── faq.md
│   │       │   │   ├── http-api.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── prebuilt-images.mdx
│   │       │   ├── lumier
│   │       │   │   ├── building-lumier.mdx
│   │       │   │   ├── docker-compose.mdx
│   │       │   │   ├── docker.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   └── meta.json
│   │       │   ├── mcp-server
│   │       │   │   ├── client-integrations.mdx
│   │       │   │   ├── configuration.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── llm-integrations.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── tools.mdx
│   │       │   │   └── usage.mdx
│   │       │   └── som
│   │       │       ├── configuration.mdx
│   │       │       └── index.mdx
│   │       ├── meta.json
│   │       ├── quickstart-cli.mdx
│   │       ├── quickstart-devs.mdx
│   │       └── telemetry.mdx
│   ├── next.config.mjs
│   ├── package-lock.json
│   ├── package.json
│   ├── pnpm-lock.yaml
│   ├── postcss.config.mjs
│   ├── public
│   │   └── img
│   │       ├── agent_gradio_ui.png
│   │       ├── agent.png
│   │       ├── cli.png
│   │       ├── computer.png
│   │       ├── som_box_threshold.png
│   │       └── som_iou_threshold.png
│   ├── README.md
│   ├── source.config.ts
│   ├── src
│   │   ├── app
│   │   │   ├── (home)
│   │   │   │   ├── [[...slug]]
│   │   │   │   │   └── page.tsx
│   │   │   │   └── layout.tsx
│   │   │   ├── api
│   │   │   │   └── search
│   │   │   │       └── route.ts
│   │   │   ├── favicon.ico
│   │   │   ├── global.css
│   │   │   ├── layout.config.tsx
│   │   │   ├── layout.tsx
│   │   │   ├── llms.mdx
│   │   │   │   └── [[...slug]]
│   │   │   │       └── route.ts
│   │   │   └── llms.txt
│   │   │       └── route.ts
│   │   ├── assets
│   │   │   ├── discord-black.svg
│   │   │   ├── discord-white.svg
│   │   │   ├── logo-black.svg
│   │   │   └── logo-white.svg
│   │   ├── components
│   │   │   ├── iou.tsx
│   │   │   └── mermaid.tsx
│   │   ├── lib
│   │   │   ├── llms.ts
│   │   │   └── source.ts
│   │   └── mdx-components.tsx
│   └── tsconfig.json
├── examples
│   ├── agent_examples.py
│   ├── agent_ui_examples.py
│   ├── cloud_api_examples.py
│   ├── computer_examples_windows.py
│   ├── computer_examples.py
│   ├── computer_ui_examples.py
│   ├── computer-example-ts
│   │   ├── .env.example
│   │   ├── .gitignore
│   │   ├── .prettierrc
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── README.md
│   │   ├── src
│   │   │   ├── helpers.ts
│   │   │   └── index.ts
│   │   └── tsconfig.json
│   ├── docker_examples.py
│   ├── evals
│   │   ├── hud_eval_examples.py
│   │   └── wikipedia_most_linked.txt
│   ├── pylume_examples.py
│   ├── sandboxed_functions_examples.py
│   ├── som_examples.py
│   ├── utils.py
│   └── winsandbox_example.py
├── img
│   ├── agent_gradio_ui.png
│   ├── agent.png
│   ├── cli.png
│   ├── computer.png
│   ├── logo_black.png
│   └── logo_white.png
├── libs
│   ├── kasm
│   │   ├── Dockerfile
│   │   ├── LICENSE
│   │   ├── README.md
│   │   └── src
│   │       └── ubuntu
│   │           └── install
│   │               └── firefox
│   │                   ├── custom_startup.sh
│   │                   ├── firefox.desktop
│   │                   └── install_firefox.sh
│   ├── lume
│   │   ├── .cursorignore
│   │   ├── CONTRIBUTING.md
│   │   ├── Development.md
│   │   ├── img
│   │   │   └── cli.png
│   │   ├── Package.resolved
│   │   ├── Package.swift
│   │   ├── README.md
│   │   ├── resources
│   │   │   └── lume.entitlements
│   │   ├── scripts
│   │   │   ├── build
│   │   │   │   ├── build-debug.sh
│   │   │   │   ├── build-release-notarized.sh
│   │   │   │   └── build-release.sh
│   │   │   └── install.sh
│   │   ├── src
│   │   │   ├── Commands
│   │   │   │   ├── Clone.swift
│   │   │   │   ├── Config.swift
│   │   │   │   ├── Create.swift
│   │   │   │   ├── Delete.swift
│   │   │   │   ├── Get.swift
│   │   │   │   ├── Images.swift
│   │   │   │   ├── IPSW.swift
│   │   │   │   ├── List.swift
│   │   │   │   ├── Logs.swift
│   │   │   │   ├── Options
│   │   │   │   │   └── FormatOption.swift
│   │   │   │   ├── Prune.swift
│   │   │   │   ├── Pull.swift
│   │   │   │   ├── Push.swift
│   │   │   │   ├── Run.swift
│   │   │   │   ├── Serve.swift
│   │   │   │   ├── Set.swift
│   │   │   │   └── Stop.swift
│   │   │   ├── ContainerRegistry
│   │   │   │   ├── ImageContainerRegistry.swift
│   │   │   │   ├── ImageList.swift
│   │   │   │   └── ImagesPrinter.swift
│   │   │   ├── Errors
│   │   │   │   └── Errors.swift
│   │   │   ├── FileSystem
│   │   │   │   ├── Home.swift
│   │   │   │   ├── Settings.swift
│   │   │   │   ├── VMConfig.swift
│   │   │   │   ├── VMDirectory.swift
│   │   │   │   └── VMLocation.swift
│   │   │   ├── LumeController.swift
│   │   │   ├── Main.swift
│   │   │   ├── Server
│   │   │   │   ├── Handlers.swift
│   │   │   │   ├── HTTP.swift
│   │   │   │   ├── Requests.swift
│   │   │   │   ├── Responses.swift
│   │   │   │   └── Server.swift
│   │   │   ├── Utils
│   │   │   │   ├── CommandRegistry.swift
│   │   │   │   ├── CommandUtils.swift
│   │   │   │   ├── Logger.swift
│   │   │   │   ├── NetworkUtils.swift
│   │   │   │   ├── Path.swift
│   │   │   │   ├── ProcessRunner.swift
│   │   │   │   ├── ProgressLogger.swift
│   │   │   │   ├── String.swift
│   │   │   │   └── Utils.swift
│   │   │   ├── Virtualization
│   │   │   │   ├── DarwinImageLoader.swift
│   │   │   │   ├── DHCPLeaseParser.swift
│   │   │   │   ├── ImageLoaderFactory.swift
│   │   │   │   └── VMVirtualizationService.swift
│   │   │   ├── VM
│   │   │   │   ├── DarwinVM.swift
│   │   │   │   ├── LinuxVM.swift
│   │   │   │   ├── VM.swift
│   │   │   │   ├── VMDetails.swift
│   │   │   │   ├── VMDetailsPrinter.swift
│   │   │   │   ├── VMDisplayResolution.swift
│   │   │   │   └── VMFactory.swift
│   │   │   └── VNC
│   │   │       ├── PassphraseGenerator.swift
│   │   │       └── VNCService.swift
│   │   └── tests
│   │       ├── Mocks
│   │       │   ├── MockVM.swift
│   │       │   ├── MockVMVirtualizationService.swift
│   │       │   └── MockVNCService.swift
│   │       ├── VM
│   │       │   └── VMDetailsPrinterTests.swift
│   │       ├── VMTests.swift
│   │       ├── VMVirtualizationServiceTests.swift
│   │       └── VNCServiceTests.swift
│   ├── lumier
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   └── src
│   │       ├── bin
│   │       │   └── entry.sh
│   │       ├── config
│   │       │   └── constants.sh
│   │       ├── hooks
│   │       │   └── on-logon.sh
│   │       └── lib
│   │           ├── utils.sh
│   │           └── vm.sh
│   ├── python
│   │   ├── agent
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── agent
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── adapters
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── huggingfacelocal_adapter.py
│   │   │   │   │   ├── human_adapter.py
│   │   │   │   │   ├── mlxvlm_adapter.py
│   │   │   │   │   └── models
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── generic.py
│   │   │   │   │       ├── internvl.py
│   │   │   │   │       ├── opencua.py
│   │   │   │   │       └── qwen2_5_vl.py
│   │   │   │   ├── agent.py
│   │   │   │   ├── callbacks
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── budget_manager.py
│   │   │   │   │   ├── image_retention.py
│   │   │   │   │   ├── logging.py
│   │   │   │   │   ├── operator_validator.py
│   │   │   │   │   ├── pii_anonymization.py
│   │   │   │   │   ├── prompt_instructions.py
│   │   │   │   │   ├── telemetry.py
│   │   │   │   │   └── trajectory_saver.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── computers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cua.py
│   │   │   │   │   └── custom.py
│   │   │   │   ├── decorators.py
│   │   │   │   ├── human_tool
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── server.py
│   │   │   │   │   └── ui.py
│   │   │   │   ├── integrations
│   │   │   │   │   └── hud
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── agent.py
│   │   │   │   │       └── proxy.py
│   │   │   │   ├── loops
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── anthropic.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── composed_grounded.py
│   │   │   │   │   ├── gemini.py
│   │   │   │   │   ├── glm45v.py
│   │   │   │   │   ├── gta1.py
│   │   │   │   │   ├── holo.py
│   │   │   │   │   ├── internvl.py
│   │   │   │   │   ├── model_types.csv
│   │   │   │   │   ├── moondream3.py
│   │   │   │   │   ├── omniparser.py
│   │   │   │   │   ├── openai.py
│   │   │   │   │   ├── opencua.py
│   │   │   │   │   └── uitars.py
│   │   │   │   ├── proxy
│   │   │   │   │   ├── examples.py
│   │   │   │   │   └── handlers.py
│   │   │   │   ├── responses.py
│   │   │   │   ├── types.py
│   │   │   │   └── ui
│   │   │   │       ├── __init__.py
│   │   │   │       ├── __main__.py
│   │   │   │       └── gradio
│   │   │   │           ├── __init__.py
│   │   │   │           ├── app.py
│   │   │   │           └── ui_components.py
│   │   │   ├── benchmarks
│   │   │   │   ├── .gitignore
│   │   │   │   ├── contrib.md
│   │   │   │   ├── interactive.py
│   │   │   │   ├── models
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   └── gta1.py
│   │   │   │   ├── README.md
│   │   │   │   ├── ss-pro.py
│   │   │   │   ├── ss-v2.py
│   │   │   │   └── utils.py
│   │   │   ├── example.py
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── computer
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer
│   │   │   │   ├── __init__.py
│   │   │   │   ├── computer.py
│   │   │   │   ├── diorama_computer.py
│   │   │   │   ├── helpers.py
│   │   │   │   ├── interface
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── logger.py
│   │   │   │   ├── models.py
│   │   │   │   ├── providers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cloud
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── docker
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── lume
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── lume_api.py
│   │   │   │   │   ├── lumier
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── types.py
│   │   │   │   │   └── winsandbox
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── provider.py
│   │   │   │   │       └── setup_script.ps1
│   │   │   │   ├── ui
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   └── gradio
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── app.py
│   │   │   │   └── utils.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── computer-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── diorama
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── diorama_computer.py
│   │   │   │   │   ├── diorama.py
│   │   │   │   │   ├── draw.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── safezone.py
│   │   │   │   ├── handlers
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── main.py
│   │   │   │   ├── server.py
│   │   │   │   └── watchdog.py
│   │   │   ├── examples
│   │   │   │   ├── __init__.py
│   │   │   │   └── usage_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   ├── run_server.py
│   │   │   └── test_connection.py
│   │   ├── core
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── core
│   │   │   │   ├── __init__.py
│   │   │   │   └── telemetry
│   │   │   │       ├── __init__.py
│   │   │   │       └── posthog.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── mcp-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── CONCURRENT_SESSIONS.md
│   │   │   ├── mcp_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── server.py
│   │   │   │   └── session_manager.py
│   │   │   ├── pdm.lock
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── scripts
│   │   │       ├── install_mcp_server.sh
│   │   │       └── start_mcp_server.sh
│   │   ├── pylume
│   │   │   ├── __init__.py
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── pylume
│   │   │   │   ├── __init__.py
│   │   │   │   ├── client.py
│   │   │   │   ├── exceptions.py
│   │   │   │   ├── lume
│   │   │   │   ├── models.py
│   │   │   │   ├── pylume.py
│   │   │   │   └── server.py
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   └── som
│   │       ├── .bumpversion.cfg
│   │       ├── LICENSE
│   │       ├── poetry.toml
│   │       ├── pyproject.toml
│   │       ├── README.md
│   │       ├── som
│   │       │   ├── __init__.py
│   │       │   ├── detect.py
│   │       │   ├── detection.py
│   │       │   ├── models.py
│   │       │   ├── ocr.py
│   │       │   ├── util
│   │       │   │   └── utils.py
│   │       │   └── visualization.py
│   │       └── tests
│   │           └── test_omniparser.py
│   ├── typescript
│   │   ├── .gitignore
│   │   ├── .nvmrc
│   │   ├── agent
│   │   │   ├── examples
│   │   │   │   ├── playground-example.html
│   │   │   │   └── README.md
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── client.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   └── client.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── biome.json
│   │   ├── computer
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── computer
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── providers
│   │   │   │   │   │   ├── base.ts
│   │   │   │   │   │   ├── cloud.ts
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── base.ts
│   │   │   │   │   ├── factory.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── linux.ts
│   │   │   │   │   ├── macos.ts
│   │   │   │   │   └── windows.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   ├── computer
│   │   │   │   │   └── cloud.test.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── factory.test.ts
│   │   │   │   │   ├── index.test.ts
│   │   │   │   │   ├── linux.test.ts
│   │   │   │   │   ├── macos.test.ts
│   │   │   │   │   └── windows.test.ts
│   │   │   │   └── setup.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── core
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── index.ts
│   │   │   │   └── telemetry
│   │   │   │       ├── clients
│   │   │   │       │   ├── index.ts
│   │   │   │       │   └── posthog.ts
│   │   │   │       └── index.ts
│   │   │   ├── tests
│   │   │   │   └── telemetry.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── pnpm-workspace.yaml
│   │   └── README.md
│   └── xfce
│       ├── .dockerignore
│       ├── .gitignore
│       ├── Dockerfile
│       ├── README.md
│       └── src
│           ├── scripts
│           │   ├── resize-display.sh
│           │   ├── start-computer-server.sh
│           │   ├── start-novnc.sh
│           │   ├── start-vnc.sh
│           │   └── xstartup.sh
│           ├── supervisor
│           │   └── supervisord.conf
│           └── xfce-config
│               ├── helpers.rc
│               ├── xfce4-power-manager.xml
│               └── xfce4-session.xml
├── LICENSE.md
├── Makefile
├── notebooks
│   ├── agent_nb.ipynb
│   ├── blog
│   │   ├── build-your-own-operator-on-macos-1.ipynb
│   │   └── build-your-own-operator-on-macos-2.ipynb
│   ├── composite_agents_docker_nb.ipynb
│   ├── computer_nb.ipynb
│   ├── computer_server_nb.ipynb
│   ├── customizing_computeragent.ipynb
│   ├── eval_osworld.ipynb
│   ├── ollama_nb.ipynb
│   ├── pylume_nb.ipynb
│   ├── README.md
│   ├── sota_hackathon_cloud.ipynb
│   └── sota_hackathon.ipynb
├── pdm.lock
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── samples
│   └── community
│       ├── global-online
│       │   └── README.md
│       └── hack-the-north
│           └── README.md
├── scripts
│   ├── build-uv.sh
│   ├── build.ps1
│   ├── build.sh
│   ├── cleanup.sh
│   ├── playground-docker.sh
│   ├── playground.sh
│   └── run-docker-dev.sh
└── tests
    ├── pytest.ini
    ├── shell_cmd.py
    ├── test_files.py
    ├── test_mcp_server_session_management.py
    ├── test_mcp_server_streaming.py
    ├── test_shell_bash.py
    ├── test_telemetry.py
    ├── test_venv.py
    └── test_watchdog.py
```

# Files

--------------------------------------------------------------------------------
/libs/kasm/src/ubuntu/install/firefox/install_firefox.sh:
--------------------------------------------------------------------------------

```bash
#!/usr/bin/env bash
set -xe

# Add icon
if [ -f /dockerstartup/install/ubuntu/install/firefox/firefox.desktop ]; then
  mv /dockerstartup/install/ubuntu/install/firefox/firefox.desktop $HOME/Desktop/
fi

ARCH=$(arch | sed 's/aarch64/arm64/g' | sed 's/x86_64/amd64/g')

set_desktop_icon() {
  sed -i -e 's!Icon=.\+!Icon=/usr/share/icons/hicolor/48x48/apps/firefox.png!' "$HOME/Desktop/firefox.desktop"
}

echo "Install Firefox"
if [[ "${DISTRO}" == @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|fedora39|fedora40) ]]; then
  dnf install -y firefox p11-kit
elif [ "${DISTRO}" == "opensuse" ]; then
  zypper install -yn p11-kit-tools MozillaFirefox
elif grep -q Jammy /etc/os-release || grep -q Noble /etc/os-release; then
  if [ ! -f '/etc/apt/preferences.d/mozilla-firefox' ]; then
    add-apt-repository -y ppa:mozillateam/ppa
    echo '
Package: *
Pin: release o=LP-PPA-mozillateam
Pin-Priority: 1001
' > /etc/apt/preferences.d/mozilla-firefox
  fi
  apt-get install -y firefox p11-kit-modules
elif grep -q "ID=kali" /etc/os-release; then
  apt-get update
  apt-get install -y firefox-esr p11-kit-modules
  rm -f $HOME/Desktop/firefox.desktop
  cp \
    /usr/share/applications/firefox-esr.desktop \
    $HOME/Desktop/
  chmod +x $HOME/Desktop/firefox-esr.desktop
elif grep -q "ID=debian" /etc/os-release || grep -q "ID=parrot" /etc/os-release; then
  if [ "${ARCH}" == "amd64" ]; then
    install -d -m 0755 /etc/apt/keyrings
    wget -q https://packages.mozilla.org/apt/repo-signing-key.gpg -O- > /etc/apt/keyrings/packages.mozilla.org.asc
    echo "deb [signed-by=/etc/apt/keyrings/packages.mozilla.org.asc] https://packages.mozilla.org/apt mozilla main" > /etc/apt/sources.list.d/mozilla.list
echo '
Package: *
Pin: origin packages.mozilla.org
Pin-Priority: 1000
' > /etc/apt/preferences.d/mozilla
    apt-get update
    apt-get install -y firefox p11-kit-modules
  else
    apt-get update
    apt-get install -y firefox-esr p11-kit-modules
    rm -f $HOME/Desktop/firefox.desktop
    cp \
      /usr/share/applications/firefox-esr.desktop \
      $HOME/Desktop/
    chmod +x $HOME/Desktop/firefox-esr.desktop
  fi
else
  apt-mark unhold firefox || :
  apt-get remove firefox
  apt-get update
  apt-get install -y firefox p11-kit-modules
fi

# Add Langpacks
FIREFOX_VERSION=$(curl -sI https://download.mozilla.org/?product=firefox-latest | awk -F '(releases/|/win32)' '/Location/ {print $2}')
RELEASE_URL="https://releases.mozilla.org/pub/firefox/releases/${FIREFOX_VERSION}/win64/xpi/"
LANGS=$(curl -Ls ${RELEASE_URL} | awk -F '(xpi">|</a>)' '/href.*xpi/ {print $2}' | tr '\n' ' ')
EXTENSION_DIR=/usr/lib/firefox-addons/distribution/extensions/
mkdir -p ${EXTENSION_DIR}
for LANG in ${LANGS}; do
  LANGCODE=$(echo ${LANG} | sed 's/\.xpi//g')
  echo "Downloading ${LANG} Language pack"
  curl -o \
    ${EXTENSION_DIR}langpack-${LANGCODE}@firefox.mozilla.org.xpi -Ls \
    ${RELEASE_URL}${LANG}
done

# Cleanup and install flash if supported
if [[ "${DISTRO}" == @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|fedora39|fedora40) ]]; then
  if [ -z ${SKIP_CLEAN+x} ]; then
    dnf clean all
  fi
elif [ "${DISTRO}" == "opensuse" ]; then
  if [ -z ${SKIP_CLEAN+x} ]; then
    zypper clean --all
  fi
else
  if [ "$ARCH" == "arm64" ] && [ "$(lsb_release -cs)" == "focal" ] ; then
    echo "Firefox flash player not supported on arm64 Ubuntu Focal Skipping"
  elif grep -q "ID=debian" /etc/os-release || grep -q "ID=kali" /etc/os-release || grep -q "ID=parrot" /etc/os-release; then
    echo "Firefox flash player not supported on Debian"
  elif grep -q Focal /etc/os-release; then
    # Plugin to support running flash videos for sites like vimeo 
    apt-get update
    apt-get install -y browser-plugin-freshplayer-pepperflash
    apt-mark hold firefox
    if [ -z ${SKIP_CLEAN+x} ]; then
      apt-get autoclean
      rm -rf \
        /var/lib/apt/lists/* \
        /var/tmp/*
    fi
  fi
fi

if [[ "${DISTRO}" != @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|opensuse|fedora39|fedora40) ]]; then
  # Update firefox to utilize the system certificate store instead of the one that ships with firefox
  if grep -q "ID=debian" /etc/os-release || grep -q "ID=kali" /etc/os-release || grep -q "ID=parrot" /etc/os-release && [ "${ARCH}" == "arm64" ]; then
    rm -f /usr/lib/firefox-esr/libnssckbi.so
    ln /usr/lib/$(arch)-linux-gnu/pkcs11/p11-kit-trust.so /usr/lib/firefox-esr/libnssckbi.so
  elif grep -q "ID=kali" /etc/os-release  && [ "${ARCH}" == "amd64" ]; then
    rm -f /usr/lib/firefox-esr/libnssckbi.so
    ln /usr/lib/$(arch)-linux-gnu/pkcs11/p11-kit-trust.so /usr/lib/firefox-esr/libnssckbi.so
  else
    rm -f /usr/lib/firefox/libnssckbi.so
    ln /usr/lib/$(arch)-linux-gnu/pkcs11/p11-kit-trust.so /usr/lib/firefox/libnssckbi.so
  fi
fi

if [[ "${DISTRO}" == @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|fedora39|fedora40) ]]; then
  if [[ "${DISTRO}" == @(fedora39|fedora40) ]]; then
    preferences_file=/usr/lib64/firefox/browser/defaults/preferences/firefox-redhat-default-prefs.js
  else
    preferences_file=/usr/lib64/firefox/browser/defaults/preferences/all-redhat.js
  fi
  sed -i -e '/homepage/d' "$preferences_file"
elif [ "${DISTRO}" == "opensuse" ]; then
  preferences_file=/usr/lib64/firefox/browser/defaults/preferences/firefox.js
elif grep -q "ID=kali" /etc/os-release; then
  preferences_file=/usr/lib/firefox-esr/defaults/pref/firefox.js
elif grep -q "ID=debian" /etc/os-release || grep -q "ID=parrot" /etc/os-release; then
  if [ "${ARCH}" == "amd64" ]; then
    preferences_file=/usr/lib/firefox/defaults/pref/firefox.js
  else
    preferences_file=/usr/lib/firefox-esr/defaults/pref/firefox.js
  fi
else
  preferences_file=/usr/lib/firefox/browser/defaults/preferences/firefox.js
fi

# Disabling default first run URL for Debian based images
if [[ "${DISTRO}" != @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|opensuse|fedora39|fedora40) ]]; then
cat >"$preferences_file" <<EOF
pref("datareporting.policy.firstRunURL", "");
pref("datareporting.policy.dataSubmissionEnabled", false);
pref("datareporting.healthreport.service.enabled", false);
pref("datareporting.healthreport.uploadEnabled", false);
pref("trailhead.firstrun.branches", "nofirstrun-empty");
pref("browser.aboutwelcome.enabled", false);
EOF
fi

if [[ "${DISTRO}" == @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|opensuse|fedora39|fedora40) ]]; then
  # Creating a default profile
  chown -R root:root $HOME
  firefox -headless -CreateProfile "kasm $HOME/.mozilla/firefox/kasm"
  # Generate a certdb to be detected on squid start
  HOME=/root firefox --headless &
  mkdir -p /root/.mozilla
  CERTDB=$(find  /root/.mozilla* -name "cert9.db")
  while [ -z "${CERTDB}" ] ; do
    sleep 1
    echo "waiting for certdb"
    CERTDB=$(find  /root/.mozilla* -name "cert9.db")
  done
  sleep 2
  kill $(pgrep firefox)
  CERTDIR=$(dirname ${CERTDB})
  mv ${CERTDB} $HOME/.mozilla/firefox/kasm/
  rm -Rf /root/.mozilla
else
  # Creating Default Profile
  chown -R 0:0 $HOME
  firefox -headless -CreateProfile "kasm $HOME/.mozilla/firefox/kasm"
fi

# Silence Firefox security nag "Some of Firefox's features may offer less protection on your current operating system".
echo 'user_pref("security.sandbox.warn_unprivileged_namespaces", false);' > $HOME/.mozilla/firefox/kasm/user.js
chown 1000:1000 $HOME/.mozilla/firefox/kasm/user.js

if [[ "${DISTRO}" == @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|opensuse|fedora39|fedora40) ]]; then
  set_desktop_icon
fi

# Starting with version 67, Firefox creates a unique profile mapping per installation which is hash generated
#   based off the installation path. Because that path will be static for our deployments we can assume the hash
#   and thus assign our profile to the default for the installation
if grep -q "ID=kali" /etc/os-release; then
cat >>$HOME/.mozilla/firefox/profiles.ini <<EOL
[Install3B6073811A6ABF12]
Default=kasm
Locked=1
EOL
elif grep -q "ID=debian" /etc/os-release || grep -q "ID=parrot" /etc/os-release; then
  if [ "${ARCH}" != "amd64" ]; then
    cat >>$HOME/.mozilla/firefox/profiles.ini <<EOL
[Install3B6073811A6ABF12]
Default=kasm
Locked=1
EOL
  else
    cat >>$HOME/.mozilla/firefox/profiles.ini <<EOL
  [Install4F96D1932A9F858E]
  Default=kasm
  Locked=1
EOL
  fi
elif [[ "${DISTRO}" != @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|opensuse|fedora39|fedora40) ]]; then
cat >>$HOME/.mozilla/firefox/profiles.ini <<EOL
[Install4F96D1932A9F858E]
Default=kasm
Locked=1
EOL
elif [[ "${DISTRO}" == @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|opensuse|fedora39|fedora40) ]]; then
cat >>$HOME/.mozilla/firefox/profiles.ini <<EOL
[Install11457493C5A56847]
Default=kasm
Locked=1
EOL
fi

# Desktop Icon FIxes
if [[ "${DISTRO}" == @(rockylinux9|oracle9|rhel9|almalinux9|fedora39|fedora40) ]]; then
  sed -i 's#Icon=/usr/lib/firefox#Icon=/usr/lib64/firefox#g' $HOME/Desktop/firefox.desktop
fi

# Cleanup for app layer
chown -R 1000:0 $HOME
find /usr/share/ -name "icon-theme.cache" -exec rm -f {} \;
if [ -f $HOME/Desktop/firefox.desktop ]; then
  chmod +x $HOME/Desktop/firefox.desktop
fi
chown -R 1000:1000 $HOME/.mozilla


```

--------------------------------------------------------------------------------
/docs/content/docs/quickstart-devs.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Quickstart
description: Get started with Cua in three steps
icon: Rocket
---

import { Step, Steps } from 'fumadocs-ui/components/steps';
import { Tab, Tabs } from 'fumadocs-ui/components/tabs';

This quickstart guides you through setting up your [computer environment](#set-up-your-computer-environment), programmatic control with a [Cua computer](#using-computer), and task automation with a [Cua agent](#using-agent):

<Steps>

<Step>

## Set Up Your Computer Environment

Choose how you want to run your Cua computer. This will be the environment where your automated tasks will execute.

You can run your Cua computer in the cloud (recommended for easiest setup), locally on macOS with Lume, locally on Windows with a Windows Sandbox, or in a Docker container on any platform. Choose the option that matches your system and needs.

<Tabs items={['☁️ Cloud', '🐳 Docker', '🍎 Lume', '🪟 Windows Sandbox']}>
  <Tab value="☁️ Cloud">

    Cua Cloud Sandbox provides virtual machines that run Ubuntu.

    1. Go to [trycua.com/signin](https://www.trycua.com/signin)
    2. Navigate to **Dashboard > Containers > Create Instance**
    3. Create a **Medium, Ubuntu 22** sandbox
    4. Note your sandbox name and API key

    Your Cloud Sandbox will be automatically configured and ready to use.

  </Tab>
  <Tab value="🍎 Lume">

    Lume containers are macOS virtual machines that run on a macOS host machine.
    
    1. Install the Lume CLI:

    ```bash
    /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
    ```

    2. Start a local Cua sandbox:

    ```bash
    lume run macos-sequoia-cua:latest
    ```

  </Tab>
  <Tab value="🪟 Windows Sandbox">

  Windows Sandbox provides Windows virtual environments that run on a Windows host machine.
    
    1. Enable [Windows Sandbox](https://learn.microsoft.com/en-us/windows/security/application-security/application-isolation/windows-sandbox/windows-sandbox-install) (requires Windows 10 Pro/Enterprise or Windows 11)
    2. Install the `pywinsandbox` dependency:

    ```bash
    pip install -U git+git://github.com/karkason/pywinsandbox.git
    ```

    3. Windows Sandbox will be automatically configured when you run the CLI

  </Tab>
  <Tab value="🐳 Docker">

  Docker provides a way to run Ubuntu containers on any host machine.
    
    1. Install Docker Desktop or Docker Engine:

    2. Pull the CUA Ubuntu sandbox:

    ```bash
    docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest
    ```

  </Tab>
</Tabs>

</Step>

<Step>

## Using Computer

Connect to your Cua computer and perform basic interactions, such as taking screenshots or simulating user input.

<Tabs items={['Python', 'TypeScript']}>
  <Tab value="Python">
    Install the Cua computer Python SDK:
    ```bash
    pip install cua-computer
    ```

    Then, connect to your desired computer environment:

    <Tabs items={['☁️ Cloud', '🐳 Docker', '🍎 Lume', '🪟 Windows Sandbox', '🖥️ Host Desktop']}>
      <Tab value="☁️ Cloud">
        ```python
        from computer import Computer

        computer = Computer(
            os_type="linux",
            provider_type="cloud",
            name="your-sandbox-name",
            api_key="your-api-key"
        )
        await computer.run() # Connect to the sandbox
        ```
      </Tab>
      <Tab value="🍎 Lume">
        ```python
        from computer import Computer

        computer = Computer(
            os_type="macos",
            provider_type="lume",
            name="macos-sequoia-cua:latest"
        )
        await computer.run() # Launch & connect to the container
        ```
      </Tab>
      <Tab value="🪟 Windows Sandbox">
        ```python
        from computer import Computer

        computer = Computer(
            os_type="windows",
            provider_type="windows_sandbox"
        )
        await computer.run() # Launch & connect to the container
        ```
      </Tab>
      <Tab value="🐳 Docker">
        ```python
        from computer import Computer

        computer = Computer(
            os_type="linux",
            provider_type="docker",
            name="trycua/cua-ubuntu:latest"
        )
        await computer.run() # Launch & connect to the container
        ```
      </Tab>
      <Tab value="🖥️ Host Desktop">
        Install and run `cua-computer-server`:
        ```bash
        pip install cua-computer-server
        python -m computer_server
        ```

        Then, use the `Computer` object to connect:
        ```python
        from computer import Computer

        computer = Computer(use_host_computer_server=True)
        await computer.run() # Connect to the host desktop
        ```
      </Tab>
    </Tabs>

    Once connected, you can perform interactions:
    ```python
    try:
        # Take a screenshot of the computer's current display
        screenshot = await computer.interface.screenshot()
        # Simulate a left-click at coordinates (100, 100)
        await computer.interface.left_click(100, 100)
        # Type "Hello!" into the active application
        await computer.interface.type("Hello!")
    finally:
        await computer.close()
    ```
  </Tab>
  <Tab value="TypeScript">
    Install the Cua computer TypeScript SDK:
    ```bash
    npm install @trycua/computer
    ```

    Then, connect to your desired computer environment:

    <Tabs items={['☁️ Cloud','🐳 Docker', '🍎 Lume', '🪟 Windows Sandbox', '🖥️ Host Desktop']}>
      <Tab value="☁️ Cloud">
        ```typescript
        import { Computer, OSType } from '@trycua/computer';

        const computer = new Computer({
          osType: OSType.LINUX,
          name: "your-sandbox-name",
          apiKey: "your-api-key"
        });
        await computer.run(); // Connect to the sandbox
        ```
      </Tab>
      <Tab value="🍎 Lume">
        ```typescript
        import { Computer, OSType, ProviderType } from '@trycua/computer';

        const computer = new Computer({
          osType: OSType.MACOS,
          providerType: ProviderType.LUME,
          name: "macos-sequoia-cua:latest"
        });
        await computer.run(); // Launch & connect to the container
        ```
      </Tab>
      <Tab value="🪟 Windows Sandbox">
        ```typescript
        import { Computer, OSType, ProviderType } from '@trycua/computer';

        const computer = new Computer({
          osType: OSType.WINDOWS,
          providerType: ProviderType.WINDOWS_SANDBOX
        });
        await computer.run(); // Launch & connect to the container
        ```
      </Tab>
      <Tab value="🐳 Docker">
        ```typescript
        import { Computer, OSType, ProviderType } from '@trycua/computer';

        const computer = new Computer({
          osType: OSType.LINUX,
          providerType: ProviderType.DOCKER,
          name: "trycua/cua-ubuntu:latest"
        });
        await computer.run(); // Launch & connect to the container
        ```
      </Tab>
      <Tab value="🖥️ Host Desktop">
        First, install and run `cua-computer-server`:
        ```bash
        pip install cua-computer-server
        python -m computer_server
        ```

        Then, use the `Computer` object to connect:
        ```typescript
        import { Computer } from '@trycua/computer';

        const computer = new Computer({ useHostComputerServer: true });
        await computer.run(); // Connect to the host desktop
        ```
      </Tab>
    </Tabs>

    Once connected, you can perform interactions:
    ```typescript
    try {
      // Take a screenshot of the computer's current display
      const screenshot = await computer.interface.screenshot();
      // Simulate a left-click at coordinates (100, 100)
      await computer.interface.leftClick(100, 100);
      // Type "Hello!" into the active application
      await computer.interface.typeText("Hello!");
    } finally {
      await computer.close();
    }
    ```
  </Tab>
</Tabs>

Learn more about computers in the [Cua computers documentation](/computer-sdk/computers). You will see how to automate computers with agents in the next step.

</Step>

<Step>

## Using Agent

Utilize an Agent to automate complex tasks by providing it with a goal and allowing it to interact with the computer environment.

Install the Cua agent Python SDK:
```bash
pip install "cua-agent[all]"
```

Then, use the `ComputerAgent` object:
```python
from agent import ComputerAgent

agent = ComputerAgent(
    model="anthropic/claude-3-5-sonnet-20241022",
    tools=[computer],
    max_trajectory_budget=5.0
)

messages = [{"role": "user", "content": "Take a screenshot and tell me what you see"}]

async for result in agent.run(messages):
    for item in result["output"]:
        if item["type"] == "message":
            print(item["content"][0]["text"])
```

Learn more about agents in [Agent Loops](/agent-sdk/agent-loops) and available models in [Supported Models](/agent-sdk/supported-model-providers/).

</Step>
</Steps>

## Next Steps

- Learn more about [Cua computers](/computer-sdk/computers) and [computer commands](/computer-sdk/commands)
- Read about [Agent loops](/agent-sdk/agent-loops), [tools](/agent-sdk/custom-tools), and [supported model providers](/agent-sdk/supported-model-providers/)
- Join our [Discord community](https://discord.com/invite/mVnXXpdE85) for help

```

--------------------------------------------------------------------------------
/libs/python/computer-server/test_connection.py:
--------------------------------------------------------------------------------

```python
#!/usr/bin/env python
"""
Connection test script for Computer Server.

This script tests both WebSocket (/ws) and REST (/cmd) connections to the Computer Server
and keeps it alive, allowing you to verify the server is running correctly.
"""

import asyncio
import json
import websockets
import argparse
import sys
import aiohttp
import os

import dotenv
dotenv.load_dotenv()

async def test_websocket_connection(host="localhost", port=8000, keep_alive=False, container_name=None, api_key=None):
    """Test WebSocket connection to the Computer Server."""
    if container_name:
        # Container mode: use WSS with container domain and port 8443
        uri = f"wss://{container_name}.containers.cloud.trycua.com:8443/ws"
        print(f"Connecting to container {container_name} at {uri}...")
    else:
        # Local mode: use WS with specified host and port
        uri = f"ws://{host}:{port}/ws"
        print(f"Connecting to local server at {uri}...")

    try:
        async with websockets.connect(uri) as websocket:
            print("WebSocket connection established!")

            # If container connection, send authentication first
            if container_name:
                if not api_key:
                    print("Error: API key required for container connections")
                    return False
                
                print("Sending authentication...")
                auth_message = {
                    "command": "authenticate",
                    "params": {
                        "api_key": api_key,
                        "container_name": container_name
                    }
                }
                await websocket.send(json.dumps(auth_message))
                auth_response = await websocket.recv()
                print(f"Authentication response: {auth_response}")
                
                # Check if authentication was successful
                auth_data = json.loads(auth_response)
                if not auth_data.get("success", False):
                    print("Authentication failed!")
                    return False
                print("Authentication successful!")

            # Send a test command to get version
            await websocket.send(json.dumps({"command": "version", "params": {}}))
            response = await websocket.recv()
            print(f"Version response: {response}")

            # Send a test command to get screen size
            await websocket.send(json.dumps({"command": "get_screen_size", "params": {}}))
            response = await websocket.recv()
            print(f"Screen size response: {response}")

            if keep_alive:
                print("\nKeeping WebSocket connection alive. Press Ctrl+C to exit...")
                while True:
                    # Send a command every 5 seconds to keep the connection alive
                    await asyncio.sleep(5)
                    await websocket.send(
                        json.dumps({"command": "get_cursor_position", "params": {}})
                    )
                    response = await websocket.recv()
                    print(f"Cursor position: {response}")
    except websockets.exceptions.ConnectionClosed as e:
        print(f"WebSocket connection closed: {e}")
        return False
    except ConnectionRefusedError:
        print(f"Connection refused. Is the server running at {host}:{port}?")
        return False
    except Exception as e:
        print(f"WebSocket error: {e}")
        return False

    return True


async def test_rest_connection(host="localhost", port=8000, keep_alive=False, container_name=None, api_key=None):
    """Test REST connection to the Computer Server."""
    if container_name:
        # Container mode: use HTTPS with container domain and port 8443
        base_url = f"https://{container_name}.containers.cloud.trycua.com:8443"
        print(f"Connecting to container {container_name} at {base_url}...")
    else:
        # Local mode: use HTTP with specified host and port
        base_url = f"http://{host}:{port}"
        print(f"Connecting to local server at {base_url}...")

    try:
        async with aiohttp.ClientSession() as session:
            print("REST connection established!")

            # Prepare headers for container authentication
            headers = {}
            if container_name:
                if not api_key:
                    print("Error: API key required for container connections")
                    return False
                headers["X-Container-Name"] = container_name
                headers["X-API-Key"] = api_key
                print(f"Using container authentication headers")

            # Test screenshot endpoint
            async with session.post(
                f"{base_url}/cmd",
                json={"command": "screenshot", "params": {}},
                headers=headers
            ) as response:
                if response.status == 200:
                    text = await response.text()
                    print(f"Screenshot response: {text}")
                else:
                    print(f"Screenshot request failed with status: {response.status}")
                    print(await response.text())
                    return False

            # Test screen size endpoint
            async with session.post(
                f"{base_url}/cmd",
                json={"command": "get_screen_size", "params": {}},
                headers=headers
            ) as response:
                if response.status == 200:
                    text = await response.text()
                    print(f"Screen size response: {text}")
                else:
                    print(f"Screen size request failed with status: {response.status}")
                    print(await response.text())
                    return False

            if keep_alive:
                print("\nKeeping REST connection alive. Press Ctrl+C to exit...")
                while True:
                    # Send a command every 5 seconds to keep testing
                    await asyncio.sleep(5)
                    async with session.post(
                        f"{base_url}/cmd",
                        json={"command": "get_cursor_position", "params": {}},
                        headers=headers
                    ) as response:
                        if response.status == 200:
                            text = await response.text()
                            print(f"Cursor position: {text}")
                        else:
                            print(f"Cursor position request failed with status: {response.status}")
                            print(await response.text())
                            return False

    except aiohttp.ClientError as e:
        print(f"REST connection error: {e}")
        return False
    except Exception as e:
        print(f"REST error: {e}")
        return False

    return True


async def test_connection(host="localhost", port=8000, keep_alive=False, container_name=None, use_rest=False, api_key=None):
    """Test connection to the Computer Server using WebSocket or REST."""
    if use_rest:
        return await test_rest_connection(host, port, keep_alive, container_name, api_key)
    else:
        return await test_websocket_connection(host, port, keep_alive, container_name, api_key)


def parse_args():
    parser = argparse.ArgumentParser(description="Test connection to Computer Server")
    parser.add_argument("--host", default="localhost", help="Host address (default: localhost)")
    parser.add_argument("-p", "--port", type=int, default=8000, help="Port number (default: 8000)")
    parser.add_argument("-c", "--container-name", help="Container name for cloud connection (uses WSS/HTTPS and port 8443)")
    parser.add_argument("--api-key", help="API key for container authentication (can also use CUA_API_KEY env var)")
    parser.add_argument("--keep-alive", action="store_true", help="Keep connection alive")
    parser.add_argument("--rest", action="store_true", help="Use REST endpoint (/cmd) instead of WebSocket (/ws)")
    return parser.parse_args()


async def main():
    args = parse_args()
    
    # Convert hyphenated argument to underscore for function parameter
    container_name = getattr(args, 'container_name', None)
    
    # Get API key from argument or environment variable
    api_key = getattr(args, 'api_key', None) or os.environ.get('CUA_API_KEY')
    
    # Check if container name is provided but API key is missing
    if container_name and not api_key:
        print("Warning: Container name provided but no API key found.")
        print("Please provide --api-key argument or set CUA_API_KEY environment variable.")
        return 1
    
    print(f"Testing {'REST' if args.rest else 'WebSocket'} connection...")
    if container_name:
        print(f"Container: {container_name}")
        print(f"API Key: {'***' + api_key[-4:] if api_key and len(api_key) > 4 else 'Not provided'}")
    
    success = await test_connection(
        host=args.host, 
        port=args.port, 
        keep_alive=args.keep_alive,
        container_name=container_name,
        use_rest=args.rest,
        api_key=api_key
    )
    return 0 if success else 1


if __name__ == "__main__":
    try:
        sys.exit(asyncio.run(main()))
    except KeyboardInterrupt:
        print("\nExiting...")
        sys.exit(0)

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/proxy/handlers.py:
--------------------------------------------------------------------------------

```python
"""
Request handlers for the proxy endpoints.
"""

import asyncio
import json
import logging
import os
from contextlib import contextmanager
from typing import Dict, Any, List, Union, Optional

from ..agent import ComputerAgent
from computer import Computer

logger = logging.getLogger(__name__)


class ResponsesHandler:
    """Handler for /responses endpoint that processes agent requests."""
    
    def __init__(self):
        self.computer = None
        self.agent = None
        # Simple in-memory caches
        self._computer_cache: Dict[str, Any] = {}
        self._agent_cache: Dict[str, Any] = {}
    
    async def setup_computer_agent(
        self,
        model: str,
        agent_kwargs: Optional[Dict[str, Any]] = None,
        computer_kwargs: Optional[Dict[str, Any]] = None,
    ):
        """Set up (and cache) computer and agent instances.

        Caching keys:
        - Computer cache key: computer_kwargs
        - Agent cache key: {"model": model, **agent_kwargs}
        """
        agent_kwargs = agent_kwargs or {}
        computer_kwargs = computer_kwargs or {}

        def _stable_key(obj: Dict[str, Any]) -> str:
            try:
                return json.dumps(obj, sort_keys=True, separators=(",", ":"))
            except Exception:
                # Fallback: stringify non-serializable values
                safe_obj = {}
                for k, v in obj.items():
                    try:
                        json.dumps(v)
                        safe_obj[k] = v
                    except Exception:
                        safe_obj[k] = str(v)
                return json.dumps(safe_obj, sort_keys=True, separators=(",", ":"))

        # Determine if custom tools are supplied; if so, skip computer setup entirely
        has_custom_tools = bool(agent_kwargs.get("tools"))

        computer = None
        if not has_custom_tools:
            # ---------- Computer setup (with cache) ----------
            comp_key = _stable_key(computer_kwargs)

            computer = self._computer_cache.get(comp_key)
            if computer is None:
                # Default computer configuration
                default_c_config = {
                    "os_type": "linux",
                    "provider_type": "cloud",
                    "name": os.getenv("CUA_CONTAINER_NAME"),
                    "api_key": os.getenv("CUA_API_KEY"),
                }
                default_c_config.update(computer_kwargs)
                computer = Computer(**default_c_config)
                await computer.__aenter__()
                self._computer_cache[comp_key] = computer
                logger.info(f"Computer created and cached with key={comp_key} config={default_c_config}")
            else:
                logger.info(f"Reusing cached computer for key={comp_key}")

        # Bind current computer reference (None if custom tools supplied)
        self.computer = computer

        # ---------- Agent setup (with cache) ----------
        # Build agent cache key from {model} + agent_kwargs (excluding tools unless explicitly passed)
        agent_kwargs_for_key = dict(agent_kwargs)
        agent_key_payload = {"model": model, **agent_kwargs_for_key}
        agent_key = _stable_key(agent_key_payload)

        agent = self._agent_cache.get(agent_key)
        if agent is None:
            # Default agent configuration
            default_a_config: Dict[str, Any] = {"model": model}
            if not has_custom_tools:
                default_a_config["tools"] = [computer]
            # Apply user overrides, but keep tools unless user explicitly sets
            if agent_kwargs:
                if not has_custom_tools:
                    agent_kwargs.setdefault("tools", [computer])
                default_a_config.update(agent_kwargs)
            # JSON-derived kwargs may have loose types; ignore static arg typing here
            agent = ComputerAgent(**default_a_config)  # type: ignore[arg-type]
            self._agent_cache[agent_key] = agent
            logger.info(f"Agent created and cached with key={agent_key} model={model}")
        else:
            # Ensure cached agent uses the current computer tool (in case object differs)
            # Only update if tools not explicitly provided in agent_kwargs
            if not has_custom_tools:
                try:
                    agent.tools = [computer]
                except Exception:
                    pass
            logger.info(f"Reusing cached agent for key={agent_key}")

        # Bind current agent reference
        self.agent = agent
    
    async def process_request(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
        """
        Process a /responses request and return the result.
        
        Args:
            request_data: Dictionary containing model, input, and optional kwargs
            
        Returns:
            Dictionary with the agent's response
        """
        try:
            # Extract request parameters
            model = request_data.get("model")
            input_data = request_data.get("input")
            agent_kwargs = request_data.get("agent_kwargs", {})
            computer_kwargs = request_data.get("computer_kwargs", {})
            env_overrides = request_data.get("env", {}) or {}
            
            if not model:
                raise ValueError("Model is required")
            if not input_data:
                raise ValueError("Input is required")
            
            # Apply env overrides for the duration of this request
            with self._env_overrides(env_overrides):
                # Set up (and possibly reuse) computer and agent via caches
                await self.setup_computer_agent(model, agent_kwargs, computer_kwargs)

                # Defensive: ensure agent is initialized for type checkers
                agent = self.agent
                if agent is None:
                    raise RuntimeError("Agent failed to initialize")

                # Convert input to messages format
                messages = self._convert_input_to_messages(input_data)

                # Run agent and get first result
                async for result in agent.run(messages):
                    # Return the first result and break
                    return {
                        "success": True,
                        "result": result,
                        "model": model
                    }
                
            # If no results were yielded
            return {
                "success": False,
                "error": "No results from agent",
                "model": model
            }
            
        except Exception as e:
            logger.error(f"Error processing request: {e}")
            return {
                "success": False,
                "error": str(e),
                "model": request_data.get("model", "unknown")
            }
    
    def _convert_input_to_messages(self, input_data: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
        """Convert input data to messages format."""
        if isinstance(input_data, str):
            # Simple string input
            return [{"role": "user", "content": input_data}]
        elif isinstance(input_data, list):
            # Already in messages format
            messages = []
            for msg in input_data:
                # Convert content array format if needed
                if isinstance(msg.get("content"), list):
                    content_parts = []
                    for part in msg["content"]:
                        if part.get("type") == "input_text":
                            content_parts.append({"type": "text", "text": part["text"]})
                        elif part.get("type") == "input_image":
                            content_parts.append({
                                "type": "image_url",
                                "image_url": {"url": part["image_url"]}
                            })
                        else:
                            content_parts.append(part)
                    messages.append({
                        "role": msg["role"],
                        "content": content_parts
                    })
                else:
                    messages.append(msg)
            return messages
        else:
            raise ValueError("Input must be string or list of messages")
    
    async def cleanup(self):
        """Clean up resources."""
        if self.computer:
            try:
                await self.computer.__aexit__(None, None, None)
            except Exception as e:
                logger.error(f"Error cleaning up computer: {e}")
            finally:
                self.computer = None
        self.agent = None

    @staticmethod
    @contextmanager
    def _env_overrides(env: Dict[str, str]):
        """Temporarily apply environment variable overrides for the current process.
        Restores previous values after the context exits.

        Args:
            env: Mapping of env var names to override for this request.
        """
        if not env:
            # No-op context
            yield
            return

        original: Dict[str, Optional[str]] = {}
        try:
            for k, v in env.items():
                original[k] = os.environ.get(k)
                os.environ[k] = str(v)
            yield
        finally:
            for k, old in original.items():
                if old is None:
                    # Was not set before
                    os.environ.pop(k, None)
                else:
                    os.environ[k] = old

```

--------------------------------------------------------------------------------
/.github/workflows/publish-lume.yml:
--------------------------------------------------------------------------------

```yaml
name: Publish Notarized Lume

on:
  push:
    tags:
      - "lume-v*"
  workflow_dispatch:
    inputs:
      version:
        description: "Version to notarize (without v prefix)"
        required: true
        default: "0.1.0"
  workflow_call:
    inputs:
      version:
        description: "Version to notarize"
        required: true
        type: string
    secrets:
      APPLICATION_CERT_BASE64:
        required: true
      INSTALLER_CERT_BASE64:
        required: true
      CERT_PASSWORD:
        required: true
      APPLE_ID:
        required: true
      TEAM_ID:
        required: true
      APP_SPECIFIC_PASSWORD:
        required: true
      DEVELOPER_NAME:
        required: true

permissions:
  contents: write

env:
  APPLICATION_CERT_BASE64: ${{ secrets.APPLICATION_CERT_BASE64 }}
  INSTALLER_CERT_BASE64: ${{ secrets.INSTALLER_CERT_BASE64 }}
  CERT_PASSWORD: ${{ secrets.CERT_PASSWORD }}
  APPLE_ID: ${{ secrets.APPLE_ID }}
  TEAM_ID: ${{ secrets.TEAM_ID }}
  APP_SPECIFIC_PASSWORD: ${{ secrets.APP_SPECIFIC_PASSWORD }}
  DEVELOPER_NAME: ${{ secrets.DEVELOPER_NAME }}

jobs:
  notarize:
    runs-on: macos-15
    outputs:
      sha256_checksums: ${{ steps.generate_checksums.outputs.checksums }}
      version: ${{ steps.set_version.outputs.version }}
    steps:
      - uses: actions/checkout@v4

      - name: Select Xcode 16
        run: |
          sudo xcode-select -s /Applications/Xcode_16.app
          xcodebuild -version

      - name: Install dependencies
        run: |
          brew install cpio

      - name: Create .release directory
        run: mkdir -p .release

      - name: Set version
        id: set_version
        run: |
          # Determine version from tag or input
          if [[ "$GITHUB_REF" == refs/tags/lume-v* ]]; then
            VERSION="${GITHUB_REF#refs/tags/lume-v}"
            echo "Using version from tag: $VERSION"
          elif [[ -n "${{ inputs.version }}" ]]; then
            VERSION="${{ inputs.version }}"
            echo "Using version from input: $VERSION"
          elif [[ -n "${{ inputs.version }}" ]]; then
            VERSION="${{ inputs.version }}"
            echo "Using version from workflow_call input: $VERSION"
          else
            echo "Error: No version found in tag or input"
            exit 1
          fi

          # Update version in Main.swift
          echo "Updating version in Main.swift to $VERSION"
          sed -i '' "s/static let current: String = \".*\"/static let current: String = \"$VERSION\"/" libs/lume/src/Main.swift

          # Set output for later steps
          echo "version=$VERSION" >> $GITHUB_OUTPUT

      - name: Import Certificates
        env:
          APPLICATION_CERT_BASE64: ${{ secrets.APPLICATION_CERT_BASE64 }}
          INSTALLER_CERT_BASE64: ${{ secrets.INSTALLER_CERT_BASE64 }}
          CERT_PASSWORD: ${{ secrets.CERT_PASSWORD }}
          KEYCHAIN_PASSWORD: "temp_password"
        run: |
          # Create a temporary keychain
          security create-keychain -p "$KEYCHAIN_PASSWORD" build.keychain
          security default-keychain -s build.keychain
          security unlock-keychain -p "$KEYCHAIN_PASSWORD" build.keychain
          security set-keychain-settings -t 3600 -l build.keychain

          # Import certificates
          echo $APPLICATION_CERT_BASE64 | base64 --decode > application.p12
          echo $INSTALLER_CERT_BASE64 | base64 --decode > installer.p12

          # Import certificates silently (minimize output)
          security import application.p12 -k build.keychain -P "$CERT_PASSWORD" -T /usr/bin/codesign -T /usr/bin/pkgbuild > /dev/null 2>&1
          security import installer.p12 -k build.keychain -P "$CERT_PASSWORD" -T /usr/bin/codesign -T /usr/bin/pkgbuild > /dev/null 2>&1

          # Allow codesign to access the certificates (minimal output)
          security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k "$KEYCHAIN_PASSWORD" build.keychain > /dev/null 2>&1

          # Verify certificates were imported
          echo "Verifying signing identities..."
          CERT_COUNT=$(security find-identity -v -p codesigning build.keychain | grep -c "Developer ID Application" || echo "0")
          INSTALLER_COUNT=$(security find-identity -v build.keychain | grep -c "Developer ID Installer" || echo "0")

          if [ "$CERT_COUNT" -eq 0 ]; then
            echo "Error: No Developer ID Application certificate found"
            security find-identity -v -p codesigning build.keychain
            exit 1
          fi

          if [ "$INSTALLER_COUNT" -eq 0 ]; then
            echo "Error: No Developer ID Installer certificate found"  
            security find-identity -v build.keychain
            exit 1
          fi

          echo "Found $CERT_COUNT Developer ID Application certificate(s) and $INSTALLER_COUNT Developer ID Installer certificate(s)"
          echo "All required certificates verified successfully"

          # Clean up certificate files
          rm application.p12 installer.p12

      - name: Build and Notarize
        id: build_notarize
        env:
          APPLE_ID: ${{ secrets.APPLE_ID }}
          TEAM_ID: ${{ secrets.TEAM_ID }}
          APP_SPECIFIC_PASSWORD: ${{ secrets.APP_SPECIFIC_PASSWORD }}
          # These will now reference the imported certificates
          CERT_APPLICATION_NAME: "Developer ID Application: ${{ secrets.DEVELOPER_NAME }} (${{ secrets.TEAM_ID }})"
          CERT_INSTALLER_NAME: "Developer ID Installer: ${{ secrets.DEVELOPER_NAME }} (${{ secrets.TEAM_ID }})"
          VERSION: ${{ steps.set_version.outputs.version }}
        working-directory: ./libs/lume
        run: |
          # Minimal debug information
          echo "Starting build process..."
          echo "Swift version: $(swift --version | head -n 1)"
          echo "Building version: $VERSION"

          # Ensure .release directory exists
          mkdir -p .release
          chmod 755 .release

          # Build the project first (redirect verbose output)
          echo "Building project..."
          swift build --configuration release > build.log 2>&1
          echo "Build completed."

          # Run the notarization script with LOG_LEVEL env var
          chmod +x scripts/build/build-release-notarized.sh
          cd scripts/build
          LOG_LEVEL=minimal ./build-release-notarized.sh

          # Return to the lume directory
          cd ../..

          # Debug: List what files were actually created
          echo "Files in .release directory:"
          find .release -type f -name "*.tar.gz" -o -name "*.pkg.tar.gz"

          # Get architecture for output filename
          ARCH=$(uname -m)
          OS_IDENTIFIER="darwin-${ARCH}"

          # Output paths for later use
          echo "tarball_path=.release/lume-${VERSION}-${OS_IDENTIFIER}.tar.gz" >> $GITHUB_OUTPUT
          echo "pkg_path=.release/lume-${VERSION}-${OS_IDENTIFIER}.pkg.tar.gz" >> $GITHUB_OUTPUT

      - name: Generate SHA256 Checksums
        id: generate_checksums
        working-directory: ./libs/lume/.release
        run: |
          # Use existing checksums file if it exists, otherwise generate one
          if [ -f "checksums.txt" ]; then
            echo "Using existing checksums file"
            cat checksums.txt
          else
            echo "## SHA256 Checksums" > checksums.txt
            echo '```' >> checksums.txt
            shasum -a 256 lume-*.tar.gz >> checksums.txt
            echo '```' >> checksums.txt
          fi

          checksums=$(cat checksums.txt)
          echo "checksums<<EOF" >> $GITHUB_OUTPUT
          echo "$checksums" >> $GITHUB_OUTPUT
          echo "EOF" >> $GITHUB_OUTPUT

          # Debug: Show all files in the release directory
          echo "All files in release directory:"
          ls -la

      - name: Create Standard Version Releases
        working-directory: ./libs/lume/.release
        run: |
          VERSION=${{ steps.set_version.outputs.version }}
          ARCH=$(uname -m)
          OS_IDENTIFIER="darwin-${ARCH}"

          # Create OS-tagged symlinks
          ln -sf "lume-${VERSION}-${OS_IDENTIFIER}.tar.gz" "lume-darwin.tar.gz"
          ln -sf "lume-${VERSION}-${OS_IDENTIFIER}.pkg.tar.gz" "lume-darwin.pkg.tar.gz"

          # Create simple symlinks
          ln -sf "lume-${VERSION}-${OS_IDENTIFIER}.tar.gz" "lume.tar.gz"
          ln -sf "lume-${VERSION}-${OS_IDENTIFIER}.pkg.tar.gz" "lume.pkg.tar.gz"

          # List all files (including symlinks)
          echo "Files with symlinks in release directory:"
          ls -la

      - name: Upload Notarized Package (Tarball)
        uses: actions/upload-artifact@v4
        with:
          name: lume-notarized-tarball
          path: ./libs/lume/${{ steps.build_notarize.outputs.tarball_path }}
          if-no-files-found: error

      - name: Upload Notarized Package (Installer)
        uses: actions/upload-artifact@v4
        with:
          name: lume-notarized-installer
          path: ./libs/lume/${{ steps.build_notarize.outputs.pkg_path }}
          if-no-files-found: error

      - name: Create Release
        if: startsWith(github.ref, 'refs/tags/lume-v')
        uses: softprops/action-gh-release@v1
        with:
          files: |
            ./libs/lume/${{ steps.build_notarize.outputs.tarball_path }}
            ./libs/lume/${{ steps.build_notarize.outputs.pkg_path }}
            ./libs/lume/.release/lume-darwin.tar.gz
            ./libs/lume/.release/lume-darwin.pkg.tar.gz
            ./libs/lume/.release/lume.tar.gz
            ./libs/lume/.release/lume.pkg.tar.gz
          body: |
            ${{ steps.generate_checksums.outputs.checksums }}

            ### Installation with script

            /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
            ```
          generate_release_notes: true
          make_latest: true

```

--------------------------------------------------------------------------------
/scripts/playground-docker.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash

set -e

# Colors for output
GREEN='\033[0;32m'
BLUE='\033[0;34m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Print with color
print_info() {
    echo -e "${BLUE}==> $1${NC}"
}

print_success() {
    echo -e "${GREEN}==> $1${NC}"
}

print_error() {
    echo -e "${RED}==> $1${NC}"
}

print_warning() {
    echo -e "${YELLOW}==> $1${NC}"
}

echo "🚀 Launching Cua Computer-Use Agent UI..."

# Check if Docker is installed
if ! command -v docker &> /dev/null; then
    print_error "Docker is not installed!"
    echo ""
    echo "To use Cua with Docker containers, you need to install Docker first:"
    echo ""
    echo "📦 Install Docker:"
    echo "  • macOS: Download Docker Desktop from https://docker.com/products/docker-desktop"
    echo "  • Windows: Download Docker Desktop from https://docker.com/products/docker-desktop"
    echo "  • Linux: Follow instructions at https://docs.docker.com/engine/install/"
    echo ""
    echo "After installing Docker, run this script again."
    exit 1
fi

# Check if Docker daemon is running
if ! docker info &> /dev/null; then
    print_error "Docker is installed but not running!"
    echo ""
    echo "Please start Docker Desktop and try again."
    exit 1
fi

print_success "Docker is installed and running!"

# Save the original working directory
ORIGINAL_DIR="$(pwd)"

DEMO_DIR="$HOME/.cua"
mkdir -p "$DEMO_DIR"


# Check if we're already in the cua repository
# Look for the specific trycua identifier in pyproject.toml
if [[ -f "pyproject.toml" ]] && grep -q "[email protected]" "pyproject.toml"; then
  print_success "Already in Cua repository - using current directory"
  REPO_DIR="$ORIGINAL_DIR"
  USE_EXISTING_REPO=true
else
  # Directories used by the script when not in repo
  REPO_DIR="$DEMO_DIR/cua"
  USE_EXISTING_REPO=false
fi

# Function to clean up on exit
cleanup() {
  cd "$ORIGINAL_DIR" 2>/dev/null || true
}
trap cleanup EXIT

echo ""
echo "Choose your Cua setup:"
echo "1) ☁️  Cua Cloud Sandbox (works on any system)"
echo "2) 🖥️  Local macOS VMs (requires Apple Silicon Mac + macOS 15+)"
echo "3) 🖥️  Local Windows VMs (requires Windows 10 / 11)"
echo ""
read -p "Enter your choice (1, 2, or 3): " CHOICE

if [[ "$CHOICE" == "1" ]]; then
  # Cua Cloud Sandbox setup
  echo ""
  print_info "Setting up Cua Cloud Sandbox..."
  echo ""
  
  # Check if existing .env.local already has CUA_API_KEY
  REPO_ENV_FILE="$REPO_DIR/.env.local"
  CURRENT_ENV_FILE="$ORIGINAL_DIR/.env.local"
  
  CUA_API_KEY=""
  
  # First check current directory
  if [[ -f "$CURRENT_ENV_FILE" ]] && grep -q "CUA_API_KEY=" "$CURRENT_ENV_FILE"; then
    EXISTING_CUA_KEY=$(grep "CUA_API_KEY=" "$CURRENT_ENV_FILE" | cut -d'=' -f2- | tr -d '"' | tr -d "'" | xargs)
    if [[ -n "$EXISTING_CUA_KEY" && "$EXISTING_CUA_KEY" != "your_cua_api_key_here" && "$EXISTING_CUA_KEY" != "" ]]; then
      CUA_API_KEY="$EXISTING_CUA_KEY"
    fi
  fi
  
  # Then check repo directory if not found in current dir
  if [[ -z "$CUA_API_KEY" ]] && [[ -f "$REPO_ENV_FILE" ]] && grep -q "CUA_API_KEY=" "$REPO_ENV_FILE"; then
    EXISTING_CUA_KEY=$(grep "CUA_API_KEY=" "$REPO_ENV_FILE" | cut -d'=' -f2- | tr -d '"' | tr -d "'" | xargs)
    if [[ -n "$EXISTING_CUA_KEY" && "$EXISTING_CUA_KEY" != "your_cua_api_key_here" && "$EXISTING_CUA_KEY" != "" ]]; then
      CUA_API_KEY="$EXISTING_CUA_KEY"
    fi
  fi
  
  # If no valid API key found, prompt for one
  if [[ -z "$CUA_API_KEY" ]]; then
    echo "To use Cua Cloud Sandbox, you need to:"
    echo "1. Sign up at https://trycua.com"
    echo "2. Create a Cloud Sandbox"
    echo "3. Generate an Api Key"
    echo ""
    read -p "Enter your Cua Api Key: " CUA_API_KEY
    
    if [[ -z "$CUA_API_KEY" ]]; then
      print_error "Cua Api Key is required for Cloud Sandbox."
      exit 1
    fi
  else
    print_success "Found existing CUA API key"
  fi
  
  USE_CLOUD=true
  COMPUTER_TYPE="cloud"

elif [[ "$CHOICE" == "2" ]]; then
  # Local macOS VM setup
  echo ""
  print_info "Setting up local macOS VMs..."
  
  # Check for Apple Silicon Mac
  if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then
    print_error "Local macOS VMs require an Apple Silicon Mac (M1/M2/M3/M4)."
    echo "💡 Consider using Cua Cloud Sandbox instead (option 1)."
    exit 1
  fi

  # Check for macOS 15 (Sequoia) or newer
  OSVERSION=$(sw_vers -productVersion)
  if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then
    print_error "Local macOS VMs require macOS 15 (Sequoia) or newer. You have $OSVERSION."
    echo "💡 Consider using Cua Cloud Sandbox instead (option 1)."
    exit 1
  fi

  USE_CLOUD=false
  COMPUTER_TYPE="macos"

elif [[ "$CHOICE" == "3" ]]; then
  # Local Windows VM setup
  echo ""
  print_info "Setting up local Windows VMs..."
  
  # Check if we're on Windows
  if [[ $(uname -s) != MINGW* && $(uname -s) != CYGWIN* && $(uname -s) != MSYS* ]]; then
    print_error "Local Windows VMs require Windows 10 or 11."
    echo "💡 Consider using Cua Cloud Sandbox instead (option 1)."
    echo ""
    echo "🔗 If you are using WSL, refer to the blog post to get started: https://www.trycua.com/blog/windows-sandbox"
    exit 1
  fi

  USE_CLOUD=false
  COMPUTER_TYPE="windows"

else
  print_error "Invalid choice. Please run the script again and choose 1, 2, or 3."
  exit 1
fi

print_success "All checks passed! 🎉"

# Create demo directory and handle repository
if [[ "$USE_EXISTING_REPO" == "true" ]]; then
  print_info "Using existing repository in current directory"
  cd "$REPO_DIR"
else  
  # Clone or update the repository
  if [[ ! -d "$REPO_DIR" ]]; then
    print_info "Cloning Cua repository..."
    cd "$DEMO_DIR"
    git clone https://github.com/trycua/cua.git
  else
    print_info "Updating Cua repository..."
    cd "$REPO_DIR"
    git pull origin main
  fi
  
  cd "$REPO_DIR"
fi

# Create .env.local file with API keys
ENV_FILE="$REPO_DIR/.env.local"
if [[ ! -f "$ENV_FILE" ]]; then
  cat > "$ENV_FILE" << EOF
# Uncomment and add your API keys here
# OPENAI_API_KEY=your_openai_api_key_here
# ANTHROPIC_API_KEY=your_anthropic_api_key_here
CUA_API_KEY=your_cua_api_key_here
EOF
  print_success "Created .env.local file with API key placeholders"
else
  print_success "Found existing .env.local file - keeping your current settings"
fi

if [[ "$USE_CLOUD" == "true" ]]; then
  # Add CUA API key to .env.local if not already present
  if ! grep -q "CUA_API_KEY" "$ENV_FILE"; then
    echo "CUA_API_KEY=$CUA_API_KEY" >> "$ENV_FILE"
    print_success "Added CUA_API_KEY to .env.local"
  elif grep -q "CUA_API_KEY=your_cua_api_key_here" "$ENV_FILE"; then
    # Update placeholder with actual key
    sed -i.bak "s/CUA_API_KEY=your_cua_api_key_here/CUA_API_KEY=$CUA_API_KEY/" "$ENV_FILE"
    print_success "Updated CUA_API_KEY in .env.local"
  fi
fi

# Build the Docker image if it doesn't exist
print_info "Checking Docker image..."
if ! docker image inspect cua-dev-image &> /dev/null; then
  print_info "Building Docker image (this may take a while)..."
  ./scripts/run-docker-dev.sh build
else
  print_success "Docker image already exists"
fi

# Install Lume if needed for local VMs
if [[ "$USE_CLOUD" == "false" && "$COMPUTER_TYPE" == "macos" ]]; then
  if ! command -v lume &> /dev/null; then
    print_info "Installing Lume CLI..."
    curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash
    
    # Add lume to PATH for this session if it's not already there
    if ! command -v lume &> /dev/null; then
      export PATH="$PATH:$HOME/.local/bin"
    fi
  fi

  # Pull the macOS CUA image if not already present
  if ! lume ls | grep -q "macos-sequoia-cua"; then
    # Check available disk space
    IMAGE_SIZE_GB=30
    AVAILABLE_SPACE_KB=$(df -k $HOME | tail -1 | awk '{print $4}')
    AVAILABLE_SPACE_GB=$(($AVAILABLE_SPACE_KB / 1024 / 1024))
    
    echo "📊 The macOS CUA image will use approximately ${IMAGE_SIZE_GB}GB of disk space."
    echo "   You currently have ${AVAILABLE_SPACE_GB}GB available on your system."
    
    # Prompt for confirmation
    read -p "   Continue? [y]/n: " CONTINUE
    CONTINUE=${CONTINUE:-y}
    
    if [[ $CONTINUE =~ ^[Yy]$ ]]; then
      print_info "Pulling macOS CUA image (this may take a while)..."
      
      # Use caffeinate on macOS to prevent system sleep during the pull
      if command -v caffeinate &> /dev/null; then
        print_info "Using caffeinate to prevent system sleep during download..."
        caffeinate -i lume pull macos-sequoia-cua:latest
      else
        lume pull macos-sequoia-cua:latest
      fi
    else
      print_error "Installation cancelled."
      exit 1
    fi
  fi

  # Check if the VM is running
  print_info "Checking if the macOS CUA VM is running..."
  VM_RUNNING=$(lume ls | grep "macos-sequoia-cua" | grep "running" || echo "")

  if [ -z "$VM_RUNNING" ]; then
    print_info "Starting the macOS CUA VM in the background..."
    lume run macos-sequoia-cua:latest &
    # Wait a moment for the VM to initialize
    sleep 5
    print_success "VM started successfully."
  else
    print_success "macOS CUA VM is already running."
  fi
fi

# Create a convenience script to run the demo
cat > "$DEMO_DIR/start_ui.sh" << EOF
#!/bin/bash
cd "$REPO_DIR"
./scripts/run-docker-dev.sh run agent_ui_examples.py
EOF
chmod +x "$DEMO_DIR/start_ui.sh"

print_success "Setup complete!"

if [[ "$USE_CLOUD" == "true" ]]; then
  echo "☁️  Cua Cloud Sandbox setup complete!"
else
  echo "🖥️  Cua Local VM setup complete!"
fi

echo "📝 Edit $ENV_FILE to update your API keys"
echo "🖥️  Start the playground by running: $DEMO_DIR/start_ui.sh"

# Start the demo automatically
echo
print_info "Starting the Cua Computer-Use Agent UI..."
echo ""

print_success "Cua Computer-Use Agent UI is now running at http://localhost:7860/"
echo
echo "🌐 Open your browser and go to: http://localhost:7860/"
echo
"$DEMO_DIR/start_ui.sh"

```

--------------------------------------------------------------------------------
/tests/test_mcp_server_streaming.py:
--------------------------------------------------------------------------------

```python
import asyncio
import importlib.util
import sys
import types
from pathlib import Path

import pytest


def _install_stub_module(name: str, module: types.ModuleType, registry: dict[str, types.ModuleType | None]) -> None:
    registry[name] = sys.modules.get(name)
    sys.modules[name] = module


@pytest.fixture
def server_module():
    stubbed_modules: dict[str, types.ModuleType | None] = {}

    # Stub MCP Context primitives
    mcp_module = types.ModuleType("mcp")
    mcp_module.__path__ = []  # mark as package

    mcp_server_module = types.ModuleType("mcp.server")
    mcp_server_module.__path__ = []

    fastmcp_module = types.ModuleType("mcp.server.fastmcp")

    class _StubContext:
        async def yield_message(self, *args, **kwargs):
            return None

        async def yield_tool_call(self, *args, **kwargs):
            return None

        async def yield_tool_output(self, *args, **kwargs):
            return None

        def report_progress(self, *_args, **_kwargs):
            return None

        def info(self, *_args, **_kwargs):
            return None

        def error(self, *_args, **_kwargs):
            return None

    class _StubImage:
        def __init__(self, format: str, data: bytes):
            self.format = format
            self.data = data

    class _StubFastMCP:
        def __init__(self, name: str):
            self.name = name
            self._tools: dict[str, types.FunctionType] = {}

        def tool(self, *args, **kwargs):
            def decorator(func):
                self._tools[func.__name__] = func
                return func

            return decorator

        def run(self):
            return None

    fastmcp_module.Context = _StubContext
    fastmcp_module.FastMCP = _StubFastMCP
    fastmcp_module.Image = _StubImage

    _install_stub_module("mcp", mcp_module, stubbed_modules)
    _install_stub_module("mcp.server", mcp_server_module, stubbed_modules)
    _install_stub_module("mcp.server.fastmcp", fastmcp_module, stubbed_modules)

    # Stub Computer module to avoid heavy dependencies
    computer_module = types.ModuleType("computer")

    class _StubInterface:
        async def screenshot(self) -> bytes:  # pragma: no cover - default stub
            return b""

    class _StubComputer:
        def __init__(self, *args, **kwargs):
            self.interface = _StubInterface()

        async def run(self):  # pragma: no cover - default stub
            return None

    class _StubVMProviderType:
        CLOUD = "cloud"
        LOCAL = "local"

    computer_module.Computer = _StubComputer
    computer_module.VMProviderType = _StubVMProviderType

    _install_stub_module("computer", computer_module, stubbed_modules)

    # Stub agent module so server can import ComputerAgent
    agent_module = types.ModuleType("agent")

    class _StubComputerAgent:
        def __init__(self, *args, **kwargs):
            pass

        async def run(self, *_args, **_kwargs):  # pragma: no cover - default stub
            if False:  # pragma: no cover
                yield {}
            return

    agent_module.ComputerAgent = _StubComputerAgent

    _install_stub_module("agent", agent_module, stubbed_modules)

    module_name = "mcp_server_server_under_test"
    module_path = Path("libs/python/mcp-server/mcp_server/server.py").resolve()
    spec = importlib.util.spec_from_file_location(module_name, module_path)
    server_module = importlib.util.module_from_spec(spec)
    assert spec and spec.loader
    spec.loader.exec_module(server_module)

    server_instance = getattr(server_module, "server", None)
    if server_instance is not None and hasattr(server_instance, "_tools"):
        for name, func in server_instance._tools.items():
            setattr(server_module, name, func)

    try:
        yield server_module
    finally:
        sys.modules.pop(module_name, None)
        for name, original in stubbed_modules.items():
            if original is None:
                sys.modules.pop(name, None)
            else:
                sys.modules[name] = original


class FakeContext:
    def __init__(self) -> None:
        self.events: list[tuple] = []
        self.progress_updates: list[float] = []

    def info(self, message: str) -> None:
        self.events.append(("info", message))

    def error(self, message: str) -> None:
        self.events.append(("error", message))

    def report_progress(self, value: float) -> None:
        self.progress_updates.append(value)

    async def yield_message(self, *, role: str, content):
        timestamp = asyncio.get_running_loop().time()
        self.events.append(("message", role, content, timestamp))

    async def yield_tool_call(self, *, name: str | None, call_id: str, input):
        timestamp = asyncio.get_running_loop().time()
        self.events.append(("tool_call", name, call_id, input, timestamp))

    async def yield_tool_output(self, *, call_id: str, output, is_error: bool = False):
        timestamp = asyncio.get_running_loop().time()
        self.events.append(("tool_output", call_id, output, is_error, timestamp))


def test_run_cua_task_streams_partial_results(server_module):
    async def _run_test():
        class FakeAgent:
            script = []

            def __init__(self, *args, **kwargs):
                pass

            async def run(self, messages):  # type: ignore[override]
                for factory, delay in type(self).script:
                    yield factory(messages)
                    if delay:
                        await asyncio.sleep(delay)

        FakeAgent.script = [
            (
                lambda _messages: {
                    "output": [
                        {
                            "type": "message",
                            "role": "assistant",
                            "content": [
                                {"type": "output_text", "text": "First chunk"}
                            ],
                        }
                    ]
                },
                0.0,
            ),
            (
                lambda _messages: {
                    "output": [
                        {
                            "type": "tool_use",
                            "id": "call_1",
                            "name": "computer",
                            "input": {"action": "click"},
                        },
                        {
                            "type": "computer_call_output",
                            "call_id": "call_1",
                            "output": [
                                {"type": "text", "text": "Tool completed"}
                            ],
                        },
                    ]
                },
                0.05,
            ),
        ]

        class FakeInterface:
            def __init__(self) -> None:
                self.calls = 0

            async def screenshot(self) -> bytes:
                self.calls += 1
                return b"final-image"

        fake_interface = FakeInterface()
        server_module.global_computer = types.SimpleNamespace(interface=fake_interface)
        server_module.ComputerAgent = FakeAgent  # type: ignore[assignment]

        ctx = FakeContext()
        task = asyncio.create_task(server_module.run_cua_task(ctx, "open settings"))

        await asyncio.sleep(0.01)
        assert not task.done(), "Task should still be running to simulate long operation"
        message_events = [event for event in ctx.events if event[0] == "message"]
        assert message_events, "Expected message event before task completion"

        text_result, image = await task

        assert "First chunk" in text_result
        assert "Tool completed" in text_result
        assert image.data == b"final-image"
        assert fake_interface.calls == 1

        tool_call_events = [event for event in ctx.events if event[0] == "tool_call"]
        tool_output_events = [event for event in ctx.events if event[0] == "tool_output"]
        assert tool_call_events and tool_output_events
        assert tool_call_events[0][2] == "call_1"
        assert tool_output_events[0][1] == "call_1"

    asyncio.run(_run_test())


def test_run_multi_cua_tasks_reports_progress(server_module, monkeypatch):
    async def _run_test():
        class FakeAgent:
            script = []

            def __init__(self, *args, **kwargs):
                pass

            async def run(self, messages):  # type: ignore[override]
                for factory, delay in type(self).script:
                    yield factory(messages)
                    if delay:
                        await asyncio.sleep(delay)

        FakeAgent.script = [
            (
                lambda messages: {
                    "output": [
                        {
                            "type": "message",
                            "role": "assistant",
                            "content": [
                                {
                                    "type": "output_text",
                                    "text": f"Result for {messages[0].get('content')}",
                                }
                            ],
                        }
                    ]
                },
                0.0,
            )
        ]

        server_module.ComputerAgent = FakeAgent  # type: ignore[assignment]

        class FakeInterface:
            async def screenshot(self) -> bytes:
                return b"progress-image"

        server_module.global_computer = types.SimpleNamespace(interface=FakeInterface())

        ctx = FakeContext()

        results = await server_module.run_multi_cua_tasks(ctx, ["a", "b", "c"])

        assert len(results) == 3
        assert results[0][0] == "Result for a"
        assert ctx.progress_updates[0] == pytest.approx(0.0)
        assert ctx.progress_updates[-1] == pytest.approx(1.0)
        assert len(ctx.progress_updates) == 6

    asyncio.run(_run_test())
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/providers/cloud/provider.py:
--------------------------------------------------------------------------------

```python
"""Cloud VM provider implementation using CUA Public API.

Implements the following public API endpoints:

- GET /v1/vms
- POST /v1/vms/:name/start
- POST /v1/vms/:name/stop
- POST /v1/vms/:name/restart
"""

import logging
from typing import Dict, List, Optional, Any

from ..base import BaseVMProvider, VMProviderType
from ..types import ListVMsResponse, MinimalVM

# Setup logging
logger = logging.getLogger(__name__)

import asyncio
import aiohttp
from urllib.parse import urlparse
import os


DEFAULT_API_BASE = os.getenv("CUA_API_BASE", "https://api.cua.ai")

class CloudProvider(BaseVMProvider):
    """Cloud VM Provider implementation."""
    def __init__(
        self,
        api_key: str,
        verbose: bool = False,
        api_base: Optional[str] = None,
        **kwargs,
    ):
        """
        Args:
            api_key: API key for authentication
            name: Name of the VM
            verbose: Enable verbose logging
        """
        assert api_key, "api_key required for CloudProvider"
        self.api_key = api_key
        self.verbose = verbose
        self.api_base = (api_base or DEFAULT_API_BASE).rstrip("/")

    @property
    def provider_type(self) -> VMProviderType:
        return VMProviderType.CLOUD

    async def __aenter__(self):
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        pass

    async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
        """Get VM information by querying the VM status endpoint.

        - Build hostname via get_ip(name) → "{name}.containers.cloud.trycua.com"
        - Probe https://{hostname}:8443/status with a short timeout
        - If JSON contains a "status" field, return it; otherwise infer
        - Fallback to DNS resolve check to distinguish unknown vs not_found
        """
        hostname = await self.get_ip(name=name)

        # Try HTTPS probe to the computer-server status endpoint (8443)
        try:
            timeout = aiohttp.ClientTimeout(total=3)
            async with aiohttp.ClientSession(timeout=timeout) as session:
                url = f"https://{hostname}:8443/status"
                async with session.get(url, allow_redirects=False) as resp:
                    status_code = resp.status
                    vm_status: str
                    vm_os_type: Optional[str] = None
                    if status_code == 200:
                        try:
                            data = await resp.json(content_type=None)
                            vm_status = str(data.get("status", "ok"))
                            vm_os_type = str(data.get("os_type"))
                        except Exception:
                            vm_status = "unknown"
                    elif status_code < 500:
                        vm_status = "unknown"
                    else:
                        vm_status = "unknown"
                    return {
                        "name": name,
                        "status": "running" if vm_status == "ok" else vm_status,
                        "api_url": f"https://{hostname}:8443",
                        "os_type": vm_os_type,
                    }
        except Exception:
            return {"name": name, "status": "not_found", "api_url": f"https://{hostname}:8443"}

    async def list_vms(self) -> ListVMsResponse:
        url = f"{self.api_base}/v1/vms"
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Accept": "application/json",
        }
        async with aiohttp.ClientSession() as session:
            async with session.get(url, headers=headers) as resp:
                if resp.status == 200:
                    try:
                        data = await resp.json(content_type=None)
                    except Exception:
                        text = await resp.text()
                        logger.error(f"Failed to parse list_vms JSON: {text}")
                        return []
                    if isinstance(data, list):
                        # Enrich with convenience URLs when possible.
                        enriched: List[Dict[str, Any]] = []
                        for item in data:
                            vm = dict(item) if isinstance(item, dict) else {}
                            name = vm.get("name")
                            password = vm.get("password")
                            if isinstance(name, str) and name:
                                host = f"{name}.containers.cloud.trycua.com"
                                # api_url: always set if missing
                                if not vm.get("api_url"):
                                    vm["api_url"] = f"https://{host}:8443"
                                # vnc_url: only when password available
                                if not vm.get("vnc_url") and isinstance(password, str) and password:
                                    vm[
                                        "vnc_url"
                                    ] = f"https://{host}/vnc.html?autoconnect=true&password={password}"
                            enriched.append(vm)
                        return enriched  # type: ignore[return-value]
                    logger.warning("Unexpected response for list_vms; expected list")
                    return []
                elif resp.status == 401:
                    logger.error("Unauthorized: invalid CUA API key for list_vms")
                    return []
                else:
                    text = await resp.text()
                    logger.error(f"list_vms failed: HTTP {resp.status} - {text}")
                    return []

    async def run_vm(self, name: str, image: Optional[str] = None, run_opts: Optional[Dict[str, Any]] = None, storage: Optional[str] = None) -> Dict[str, Any]:
        """Start a VM via public API. Returns a minimal status."""
        url = f"{self.api_base}/v1/vms/{name}/start"
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Accept": "application/json",
        }
        async with aiohttp.ClientSession() as session:
            async with session.post(url, headers=headers) as resp:
                if resp.status in (200, 201, 202, 204):
                    return {"name": name, "status": "starting"}
                elif resp.status == 404:
                    return {"name": name, "status": "not_found"}
                elif resp.status == 401:
                    return {"name": name, "status": "unauthorized"}
                else:
                    text = await resp.text()
                    return {"name": name, "status": "error", "message": text}

    async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
        """Stop a VM via public API."""
        url = f"{self.api_base}/v1/vms/{name}/stop"
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Accept": "application/json",
        }
        async with aiohttp.ClientSession() as session:
            async with session.post(url, headers=headers) as resp:
                if resp.status in (200, 202):
                    # Spec says 202 with {"status":"stopping"}
                    body_status: Optional[str] = None
                    try:
                        data = await resp.json(content_type=None)
                        body_status = data.get("status") if isinstance(data, dict) else None
                    except Exception:
                        body_status = None
                    return {"name": name, "status": body_status or "stopping"}
                elif resp.status == 404:
                    return {"name": name, "status": "not_found"}
                elif resp.status == 401:
                    return {"name": name, "status": "unauthorized"}
                else:
                    text = await resp.text()
                    return {"name": name, "status": "error", "message": text}

    async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
        """Restart a VM via public API."""
        url = f"{self.api_base}/v1/vms/{name}/restart"
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Accept": "application/json",
        }
        async with aiohttp.ClientSession() as session:
            async with session.post(url, headers=headers) as resp:
                if resp.status in (200, 202):
                    # Spec says 202 with {"status":"restarting"}
                    body_status: Optional[str] = None
                    try:
                        data = await resp.json(content_type=None)
                        body_status = data.get("status") if isinstance(data, dict) else None
                    except Exception:
                        body_status = None
                    return {"name": name, "status": body_status or "restarting"}
                elif resp.status == 404:
                    return {"name": name, "status": "not_found"}
                elif resp.status == 401:
                    return {"name": name, "status": "unauthorized"}
                else:
                    text = await resp.text()
                    return {"name": name, "status": "error", "message": text}

    async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
        logger.warning("CloudProvider.update_vm is not implemented via public API")
        return {"name": name, "status": "unchanged", "message": "update_vm not supported by public API"}

    async def get_ip(self, name: Optional[str] = None, storage: Optional[str] = None, retry_delay: int = 2) -> str:
        """
        Return the VM's IP address as '{container_name}.containers.cloud.trycua.com'.
        Uses the provided 'name' argument (the VM name requested by the caller),
        falling back to self.name only if 'name' is None.
        Retries up to 3 times with retry_delay seconds if hostname is not available.
        """
        if name is None:
            raise ValueError("VM name is required for CloudProvider.get_ip")
        return f"{name}.containers.cloud.trycua.com"

```

--------------------------------------------------------------------------------
/libs/lume/scripts/install.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash
set -e

# Lume Installer
# This script installs Lume to your system

# Define colors for output
BOLD=$(tput bold)
NORMAL=$(tput sgr0)
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
BLUE=$(tput setaf 4)
YELLOW=$(tput setaf 3)

# Check if running as root or with sudo
if [ "$(id -u)" -eq 0 ] || [ -n "$SUDO_USER" ]; then
  echo "${RED}Error: Do not run this script with sudo or as root.${NORMAL}"
  echo "If you need to install to a system directory, create it first with proper permissions:"
  echo "  sudo mkdir -p /desired/directory && sudo chown $(whoami) /desired/directory"
  echo "Then run the installer normally:"
  echo "  ./install.sh --install-dir=/desired/directory"
  exit 1
fi

# Default installation directory (user-specific, doesn't require sudo)
DEFAULT_INSTALL_DIR="$HOME/.local/bin"
INSTALL_DIR="${INSTALL_DIR:-$DEFAULT_INSTALL_DIR}"

# GitHub info
GITHUB_REPO="trycua/cua"
LATEST_RELEASE_URL="https://api.github.com/repos/$GITHUB_REPO/releases/latest"

# Option to skip background service setup (default: install it)
INSTALL_BACKGROUND_SERVICE=true

# Default port for lume serve (default: 7777)
LUME_PORT=7777

# Parse command line arguments
while [ "$#" -gt 0 ]; do
  case "$1" in
    --install-dir)
      INSTALL_DIR="$2"
      shift
      ;;
    --port)
      LUME_PORT="$2"
      shift
      ;;
    --no-background-service)
      INSTALL_BACKGROUND_SERVICE=false
      ;;
    --help)
      echo "${BOLD}${BLUE}Lume Installer${NORMAL}"
      echo "Usage: $0 [OPTIONS]"
      echo ""
      echo "Options:"
      echo "  --install-dir DIR         Install to the specified directory (default: $DEFAULT_INSTALL_DIR)"
      echo "  --port PORT              Specify the port for lume serve (default: 7777)"
      echo "  --no-background-service   Do not setup the Lume background service (LaunchAgent)"
      echo "  --help                    Display this help message"
      echo ""
      echo "Examples:"
      echo "  $0                                   # Install to $DEFAULT_INSTALL_DIR and setup background service"
      echo "  $0 --install-dir=/usr/local/bin      # Install to system directory (may require root privileges)"
      echo "  $0 --port 7778                       # Use port 7778 instead of the default 7777"
      echo "  $0 --no-background-service           # Install without setting up the background service"
      echo "  INSTALL_DIR=/opt/lume $0             # Install to /opt/lume (legacy env var support)"
      exit 0
      ;;
    *)
      echo "${RED}Unknown option: $1${NORMAL}"
      echo "Use --help for usage information"
      exit 1
      ;;
  esac
  shift
done

echo "${BOLD}${BLUE}Lume Installer${NORMAL}"
echo "This script will install Lume to your system."

# Check if we're running with appropriate permissions
check_permissions() {
  # System directories that typically require root privileges
  SYSTEM_DIRS=("/usr/local/bin" "/usr/bin" "/bin" "/opt")
  
  NEEDS_ROOT=false
  for DIR in "${SYSTEM_DIRS[@]}"; do
    if [[ "$INSTALL_DIR" == "$DIR"* ]] && [ ! -w "$INSTALL_DIR" ]; then
      NEEDS_ROOT=true
      break
    fi
  done
  
  if [ "$NEEDS_ROOT" = true ]; then
    echo "${YELLOW}Warning: Installing to $INSTALL_DIR may require root privileges.${NORMAL}"
    echo "Consider these alternatives:"
    echo "  • Install to a user-writable location: $0 --install-dir=$HOME/.local/bin"
    echo "  • Create the directory with correct permissions first:"
    echo "    sudo mkdir -p $INSTALL_DIR && sudo chown $(whoami) $INSTALL_DIR"
    echo ""
    
    # Check if we already have write permission (might have been set up previously)
    if [ ! -w "$INSTALL_DIR" ] && [ ! -w "$(dirname "$INSTALL_DIR")" ]; then
      echo "${RED}Error: You don't have write permission to $INSTALL_DIR${NORMAL}"
      echo "Please choose a different installation directory or ensure you have the proper permissions."
      exit 1
    fi
  fi
}

# Detect OS and architecture
detect_platform() {
  OS=$(uname -s | tr '[:upper:]' '[:lower:]')
  ARCH=$(uname -m)
  
  if [ "$OS" != "darwin" ]; then
    echo "${RED}Error: Currently only macOS is supported.${NORMAL}"
    exit 1
  fi
  
  if [ "$ARCH" != "arm64" ]; then
    echo "${RED}Error: Lume only supports macOS on Apple Silicon (ARM64).${NORMAL}"
    exit 1
  fi
  
  PLATFORM="darwin-arm64"
  echo "Detected platform: ${BOLD}$PLATFORM${NORMAL}"
}

# Create temporary directory
create_temp_dir() {
  TEMP_DIR=$(mktemp -d)
  echo "Using temporary directory: $TEMP_DIR"
  
  # Make sure we clean up on exit
  trap 'rm -rf "$TEMP_DIR"' EXIT
}

# Download the latest release
download_release() {
  echo "Downloading latest Lume release..."
  
  # Use the direct download link with the non-versioned symlink
  DOWNLOAD_URL="https://github.com/$GITHUB_REPO/releases/latest/download/lume.tar.gz"
  echo "Downloading from: $DOWNLOAD_URL"
  
  # Download the tarball
  if command -v curl &> /dev/null; then
    curl -L --progress-bar "$DOWNLOAD_URL" -o "$TEMP_DIR/lume.tar.gz"
    
    # Verify the download was successful
    if [ ! -s "$TEMP_DIR/lume.tar.gz" ]; then
      echo "${RED}Error: Failed to download Lume.${NORMAL}"
      echo "The download URL may be incorrect or the file may not exist."
      exit 1
    fi
    
    # Verify the file is a valid archive
    if ! tar -tzf "$TEMP_DIR/lume.tar.gz" > /dev/null 2>&1; then
      echo "${RED}Error: The downloaded file is not a valid tar.gz archive.${NORMAL}"
      echo "Let's try the alternative URL..."
      
      # Try alternative URL
      ALT_DOWNLOAD_URL="https://github.com/$GITHUB_REPO/releases/latest/download/lume-$PLATFORM.tar.gz"
      echo "Downloading from alternative URL: $ALT_DOWNLOAD_URL"
      curl -L --progress-bar "$ALT_DOWNLOAD_URL" -o "$TEMP_DIR/lume.tar.gz"
      
      # Check again
      if ! tar -tzf "$TEMP_DIR/lume.tar.gz" > /dev/null 2>&1; then
        echo "${RED}Error: Could not download a valid Lume archive.${NORMAL}"
        echo "Please try installing Lume manually from: https://github.com/$GITHUB_REPO/releases/latest"
        exit 1
      fi
    fi
  else
    echo "${RED}Error: curl is required but not installed.${NORMAL}"
    exit 1
  fi
}

# Extract and install
install_binary() {
  echo "Extracting archive..."
  tar -xzf "$TEMP_DIR/lume.tar.gz" -C "$TEMP_DIR"
  
  echo "Installing to $INSTALL_DIR..."
  
  # Create install directory if it doesn't exist
  mkdir -p "$INSTALL_DIR"
  
  # Move the binary to the installation directory
  mv "$TEMP_DIR/lume" "$INSTALL_DIR/"
  
  # Make the binary executable
  chmod +x "$INSTALL_DIR/lume"
  
  echo "${GREEN}Installation complete!${NORMAL}"
  echo "Lume has been installed to ${BOLD}$INSTALL_DIR/lume${NORMAL}"
  
  # Check if the installation directory is in PATH
  if [ -n "${PATH##*$INSTALL_DIR*}" ]; then
    SHELL_NAME=$(basename "$SHELL")
    echo "${YELLOW}Warning: $INSTALL_DIR is not in your PATH.${NORMAL}"
    case "$SHELL_NAME" in
      zsh)
        echo "To add it, run:"
        echo "  echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.zprofile"
        ;;
      bash)
        echo "To add it, run:"
        echo "  echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.bash_profile"
        ;;
      fish)
        echo "To add it, run:"
        echo "  echo 'fish_add_path $INSTALL_DIR' >> ~/.config/fish/config.fish"
        ;;
      *)
        echo "Add $INSTALL_DIR to your PATH in your shell profile file."
        ;;
    esac
  fi
}

# Main installation flow
main() {
  check_permissions
  detect_platform
  create_temp_dir
  download_release
  install_binary

  echo ""
  echo "${GREEN}${BOLD}Lume has been successfully installed!${NORMAL}"
  echo "Run ${BOLD}lume${NORMAL} to get started."

  if [ "$INSTALL_BACKGROUND_SERVICE" = true ]; then
    # --- Setup background service (LaunchAgent) for Lume ---
    SERVICE_NAME="com.trycua.lume_daemon"
    PLIST_PATH="$HOME/Library/LaunchAgents/$SERVICE_NAME.plist"
    LUME_BIN="$INSTALL_DIR/lume"

    echo ""
    echo "Setting up LaunchAgent to run lume daemon on login..."

    # Create LaunchAgents directory if it doesn't exist
    mkdir -p "$HOME/Library/LaunchAgents"

    # Unload existing service if present
    if [ -f "$PLIST_PATH" ]; then
      echo "Existing LaunchAgent found. Unloading..."
      launchctl unload "$PLIST_PATH" 2>/dev/null || true
    fi

    # Create the plist file
    cat <<EOF > "$PLIST_PATH"
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
    <key>Label</key>
    <string>$SERVICE_NAME</string>
    <key>ProgramArguments</key>
    <array>
        <string>$LUME_BIN</string>
        <string>serve</string>
        <string>--port</string>
        <string>$LUME_PORT</string>
    </array>
    <key>RunAtLoad</key>
    <true/>
    <key>KeepAlive</key>
    <true/>
    <key>WorkingDirectory</key>
    <string>$HOME</string>
    <key>EnvironmentVariables</key>
    <dict>
        <key>PATH</key>
        <string>/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$HOME/.local/bin</string>
        <key>HOME</key>
        <string>$HOME</string>
    </dict>
    <key>StandardOutPath</key>
    <string>/tmp/lume_daemon.log</string>
    <key>StandardErrorPath</key>
    <string>/tmp/lume_daemon.error.log</string>
    <key>ProcessType</key>
    <string>Interactive</string>
    <key>SessionType</key>
    <string>Aqua</string>
</dict>
</plist>
EOF

    # Set permissions
    chmod 644 "$PLIST_PATH"
    touch /tmp/lume_daemon.log /tmp/lume_daemon.error.log
    chmod 644 /tmp/lume_daemon.log /tmp/lume_daemon.error.log

    # Load the LaunchAgent
    echo "Loading LaunchAgent..."
    launchctl unload "$PLIST_PATH" 2>/dev/null || true
    launchctl load "$PLIST_PATH"

    echo "${GREEN}Lume daemon LaunchAgent installed and loaded. It will start automatically on login!${NORMAL}"
    echo "To check status: launchctl list | grep $SERVICE_NAME"
    echo "To view logs: tail -f /tmp/lume_daemon.log"
    echo ""
    echo "To remove the lume daemon service, run:"
    echo "  launchctl unload \"$PLIST_PATH\""
    echo "  rm \"$PLIST_PATH\""
  else
    SERVICE_NAME="com.trycua.lume_daemon"
    PLIST_PATH="$HOME/Library/LaunchAgents/$SERVICE_NAME.plist"
    if [ -f "$PLIST_PATH" ]; then
      echo "Removing existing Lume background service (LaunchAgent)..."
      launchctl unload "$PLIST_PATH" 2>/dev/null || true
      rm "$PLIST_PATH"
      echo "Lume background service (LaunchAgent) removed."
    else
      echo "Skipping Lume background service (LaunchAgent) setup as requested (use --no-background-service)."
    fi
  fi
}

# Run the installation
main

```

--------------------------------------------------------------------------------
/blog/hack-the-north.md:
--------------------------------------------------------------------------------

```markdown
# What happens when hackathon judging is a public benchmark (Hack the North edition)

*Written by Francesco Bonacci — Reviewed by Parth Patel (HUD W25) — Sept 25, 2025*

## Prologue

Hack the North ran Sept 12–14 at the University of Waterloo. Official count this year: **1,778 hackers**, and a [Guinness World Record for the most people building interlocking plastic brick sculptures simultaneously](https://uwaterloo.ca/news/eweal-making-hackathons-fun-again-breaking-guinness-world-record).

Our team arrived from Europe and the US one day before the hackathon, after a summer scattered post–YC X25, waiting for our O-1 visas. **HUD**’s founders Parth and Jay flew in from SF to help us run evaluations, and Michael and Parth from **Ollama** joined as co-sponsors.

Our plan was ambitious: run the **first state-of-the-art Computer-Use Agents track**, score it on a public benchmark, and give the top performer a guaranteed YC interview. (Interview ≠ offer. YC didn’t judge.)

The rest, as they say, was a 36h story worth telling—and a playbook worth sharing for anyone thinking about running or sponsoring this type of hackathon track.

![hack-cua-ollama-hud](./assets/hack-cua-ollama-hud.jpeg)

## The sign-up problem we had to invent

We joined as a sponsor at the last minute, thanks to a push from our friend @Michael Chiang at Ollama—Waterloo alum, naturally. It’s kind of an open secret that UWaterloo turns out some of the sharpest hackers around (*no pun intended, HackMIT*). It was a bit of a scramble, but also great timing—our Agent framework had just finished a major refactor, with support for **100+ VLM configurations** now live. Naturally, we wanted to stress-test it at scale—and see whether teams could come up with SOTA-level setups. *This wasn’t a blank-slate, build-whatever-you-want kind of track.*

From day one, though, we knew we’d have to fight for sign-ups. This was a niche track, and a guaranteed YC interview alone wouldn’t be enough to pull people in.

Unfortunately, Hack the North (HTN) didn’t offer an interest form to help us estimate demand, which made capacity planning tricky—especially with early-stage infra. Stress-testing takes foresight, and multimodal language model usage is still costly (~1.5× to 3–4× the price of comparable text-only models).

On top of that, we were discouraged from external promotion on [lu.ma](http://lu.ma). So we spun up our own sign-up page at **trycua.com/hackathon** and built ad-hoc Discord channels to share track details. We emphasized—repeatedly—that only students already accepted to Hack the North should register.

*(Moral: the “measure-zero effect”—no matter how many times you say it, some people won’t see it. Plenty of invalid sign-ups still slipped through.)*

Even so, having your own form is absolutely worth it: it gives you an **early funnel**, surfaces demand signals ahead of time, and—crucially—**lets you require platform sign-up before kickoff**. In our case, Hack the North didn’t provide Devpost access until the very end, so our form was the only way to build a working roster.

Only a small trickle of sign-ups came through by the time the event kicked off—too few to plan around, but clearly the right kind of crowd. Several were already familiar with computer-use agents; one was even interning at Shopify, working on this space.

## At the Sponsor Booth

Day 0 on campus made the difference. We arrived a couple of hours early to collect swag shipments (around 1,200 stickers of our new **Cua-la** mascot, plus t-shirts and hats—always plan ~1.5× the estimated number of hackers!). After walking the sponsor floor and explaining the track at our booth, ~40 hackers signed up.

**Moral:** sponsor booths are still the most effective way to recruit for a track.

**Suggestions to maximize booth time (for HTN this is only ~24 of the total 36 hours):**

- **Be unmistakable.** Run a mini-challenge and a visible giveaway. We offered 5 × $200 Anthropic credits as a lightning raffle and constantly advertised in HTN Slack. Shout-out to our neighbors at **Mintlify**, who dressed their teammate as a mint plant - memorable and effective.
- **Create multiple touchpoints.** Hand out flyers and QR codes, and ask nearby booths to cross-refer. Big thanks to the YC team for flyer space and student connections - and to Michael (Ollama) for pointing visitors our way.
- **Never leave the booth empty.** Keep someone at the booth at all times and rotate shifts. With four founding engineers on-site, coverage was easy. Even after hacking kicked off, the booth stayed a point of reference - and even then multiple participants DM’d us asking where to meet up.
- **Students are organic DevRel.** Our runner-up, Adam, hung out with us at the booth, pulling more people in. Peer-to-peer energy creates the network effect you need!

![hack-booth](./assets/hack-booth.png)

*(Our Founding Engineer, Morgan, hangs out with students at the stand, while Adam (runner-up) hacks on the side.)*

## 02:30 a.m. is still prime time at a hackathon

Hack the North gives sponsors a 30-minute API Workshop during the early hours of the event—a perfect moment to shift from talking to building.

Our slot landed at **2:30 a.m.** (*perks of the cheapest sponsor tier*). Thirty students showed up, energy surprisingly high. James, our new Founding DevRel Engineer, led the session and nailed it.

**Our track rules were simple:**

1. Build a Computer-Use Agent with the [Cua framework](https://github.com/trycua/cua)
2. Benchmark the agent on [HUD](https://www.hud.so)
3. Use [OSWorld-Tiny](https://huggingface.co/datasets/ddupont/OSWorld-Tiny-Public): a 14-task distillation of the full benchmark (~360 tasks, >1h)

**Suggestions:**

- **Leave something tangible.** We provided a Jupyter Notebook teams could run immediately.
- **Narrow scope, strong starts.** The more focused the challenge, the more **robust starting points** you should provide.
- **Want the details?** [Here’s the notebook we left participants](https://github.com/trycua/cua/blob/main/notebooks/sota_hackathon.ipynb).

![hack-booth](./assets/hack-workshop.jpeg)

*(Our CUA Workshop at 2:30 AM.)*

## Making it possible to focus on the work

If you’re an OSS framework, it’s tempting to have hackers self-host on laptops. **Don’t.** You’ll spend the workshop debugging setups instead of reviewing ideas.

**Lesson learned:** within hours, we shifted to **cloud-only Sandboxes**. Payoff: consistent environments, faster starts, far less tech support.

We provided:

- **Credits:** $200 Cua Cloud + $200 HUD per team (manual top-ups for visible progress)
- **LLMs/VLMs:** Anthropic assigned $50 per participant—tight for VLM iteration—so we added capped access under our org
- **Pre-kickoff provisioning:** Platform sign-up auto-created projects, keys, and sandboxes

**Takeaway:** every minute not spent on setup is a minute gained for iterating.

## 12 Hours in the Hackathon

**After the workshop buzz.** Morning interest was high, but Docker setup + requiring focus on a single track thinned the crowd. Most sponsor prizes are broad (“use our product and you qualify”), letting students stack tracks. Ours required commitment. Upside: those who stayed shipped sharper, higher-quality submissions.

**The bell curve of submissions.** Most entries used *claude-sonnet-4-20250514*—proof that docs and public leaderboards ([OSWorld](https://os-world.github.io/#benchmark)) guide choices. Results clustered around the safe pick, with fewer pushing boundaries.

**Who went beyond the baseline.** A few tried multi-agent/tool graphs. One standout—[**cuala**](https://github.com/YeIIcw/cuala)—was a clean reference: deterministic actions, verifiable state changes, callbacks for saving images and trajectories.

**Bottom line:** Early excitement is easy; keeping teams engaged requires reducing friction and offering multiple entry points.

### What broke (and why)

We skipped a full end-to-end **Cua × HUD** dry-run. It showed.

- Hackers ran out of inference credits. Desktop tasks are token-heavy. A full OSWorld run (200 max steps) for *computer-use-preview* (OpenAI Operator API) can cost >$600. Serious attempts: ~400k tokens × 14 tasks.
- Python version/build mismatches surfaced, requiring debug time across both OSS repos.
- Our Cua framework lacked a **Response Agent** to complete evaluation loops. Some runs stalled until patched.

## Scoring and Results

### Participation & Outcomes

- ~**30** hackers gave the track a serious try; **5** crossed the finish line
- All submissions were **solo**, mostly undergrads
- Judging: OSWorld-Tiny on HUD, with Cua + HUD reruns to verify scores
- Final leaderboard: [HUD Leaderboard](https://www.hud.so/leaderboards/ddupont/OSWorld-Tiny-Public)

![hack-leaderboard](./assets/hack-leaderboard.png)

*(Leaderboard on HUD)*

### Winners

**🥇 Winner — Ram**  
- Devpost: https://devpost.com/software/sota-computer-use-agent-challenge  
- Code: https://github.com/Ram-Raghav-S/cua/tree/ram  
- Score: 68.3%

**🥈 Runner-up — Aryan**  
- Devpost: https://devpost.com/software/loopdeloop-computer-use-agent-sota-attempt  
- Code: https://github.com/Tumph/cua  
- Score: 55.9%

**🥉 Special Mention — Adam**  
- Devpost: https://devpost.com/software/cuala  
- Code: https://github.com/YeIIcw/cuala  
- Score: 42.1%

![hack-winners](./assets/hack-winners.jpeg)

*(Our finalists before the award ceremony)*

## What We’d Keep

- **Sponsor Hack the North again**
- **Keep a visible, staffed booth**
- **Publish a compact FAQ**
- **Simple, transparent scoring**

## What We’d Change

- **Run a full Cua × HUD dry-run under load**
- **Offer multiple on-ramps (evals, creative, RL)**
- **Keep a private eval set for judging**
- **Default to cloud sandboxes**
- **Handle ops earlier (swag, signage, QR codes)**
- **Reward generalization, not lucky runs**

## Closing Thoughts

Our first outing as sponsors wasn’t perfect, but it gave us a working playbook: **provision cloud early, keep scoring simple, always dry-run infra, and make the booth unforgettable**.

If more hackathon tracks leaned on **public benchmarks**, weekends like this would produce fewer demos-for-show and more measurable progress.

**P.S.** Huge thanks to the Ollama and HUD teams for co-sponsoring the track, and to our YC Partner Diana for offering a **guaranteed YC interview** as first prize.

Whether you’re a hacker who wants to participate, or a company looking to sponsor, let’s talk — we’re especially excited to support benchmark-first hackathon tracks in the Bay Area this year.

![hack-closing-ceremony](./assets/hack-closing-ceremony.jpg)

*(HTN Closing Ceremony — Cua Track Winner Announcement)*
```

--------------------------------------------------------------------------------
/libs/typescript/computer/src/interface/base.ts:
--------------------------------------------------------------------------------

```typescript
/**
 * Base interface for computer control.
 */

import pino from 'pino';
import WebSocket from 'ws';
import type { ScreenSize } from '../types';

export type MouseButton = 'left' | 'middle' | 'right';

export interface CursorPosition {
  x: number;
  y: number;
}

export interface AccessibilityNode {
  role: string;
  title?: string;
  value?: string;
  description?: string;
  bounds?: {
    x: number;
    y: number;
    width: number;
    height: number;
  };
  children?: AccessibilityNode[];
}

/**
 * Base class for computer control interfaces.
 */
export abstract class BaseComputerInterface {
  protected ipAddress: string;
  protected username: string;
  protected password: string;
  protected closed = false;
  protected commandLock: Promise<unknown> = Promise.resolve();
  protected ws: WebSocket;
  protected apiKey?: string;
  protected vmName?: string;

  protected logger = pino({ name: 'computer.interface-base' });

  constructor(
    ipAddress: string,
    username = 'lume',
    password = 'lume',
    apiKey?: string,
    vmName?: string
  ) {
    this.ipAddress = ipAddress;
    this.username = username;
    this.password = password;
    this.apiKey = apiKey;
    this.vmName = vmName;

    // Initialize WebSocket with headers if needed
    const headers: { [key: string]: string } = {};
    if (this.apiKey && this.vmName) {
      headers['X-API-Key'] = this.apiKey;
      headers['X-VM-Name'] = this.vmName;
    }

    // Create the WebSocket instance
    this.ws = new WebSocket(this.wsUri, { headers });
  }

  /**
   * Get the WebSocket URI for connection.
   * Subclasses can override this to customize the URI.
   */
  protected get wsUri(): string {
    const protocol = this.apiKey ? 'wss' : 'ws';

    // Check if ipAddress already includes a port
    if (this.ipAddress.includes(':')) {
      return `${protocol}://${this.ipAddress}/ws`;
    }

    // Otherwise, append the default port
    const port = this.apiKey ? '8443' : '8000';
    return `${protocol}://${this.ipAddress}:${port}/ws`;
  }

  /**
   * Wait for interface to be ready.
   * @param timeout Maximum time to wait in seconds
   * @throws Error if interface is not ready within timeout
   */
  async waitForReady(timeout = 60): Promise<void> {
    const startTime = Date.now();

    while (Date.now() - startTime < timeout * 1000) {
      try {
        await this.connect();
        return;
      } catch (error) {
        console.log(error);
        // Wait a bit before retrying
        this.logger.error(
          `Error connecting to websocket: ${JSON.stringify(error)}`
        );
        await new Promise((resolve) => setTimeout(resolve, 1000));
      }
    }

    throw new Error(`Interface not ready after ${timeout} seconds`);
  }

  /**
   * Authenticate with the WebSocket server.
   * This should be called immediately after the WebSocket connection is established.
   */
  private async authenticate(): Promise<void> {
    if (!this.apiKey || !this.vmName) {
      // No authentication needed
      return;
    }

    this.logger.info('Performing authentication handshake...');
    const authMessage = {
      command: 'authenticate',
      params: {
        api_key: this.apiKey,
        container_name: this.vmName,
      },
    };

    return new Promise<void>((resolve, reject) => {
      const authHandler = (data: WebSocket.RawData) => {
        try {
          const authResult = JSON.parse(data.toString());
          if (!authResult.success) {
            const errorMsg = authResult.error || 'Authentication failed';
            this.logger.error(`Authentication failed: ${errorMsg}`);
            this.ws.close();
            reject(new Error(`Authentication failed: ${errorMsg}`));
          } else {
            this.logger.info('Authentication successful');
            this.ws.off('message', authHandler);
            resolve();
          }
        } catch (error) {
          this.ws.off('message', authHandler);
          reject(error);
        }
      };

      this.ws.on('message', authHandler);
      this.ws.send(JSON.stringify(authMessage));
    });
  }

  /**
   * Connect to the WebSocket server.
   */
  public async connect(): Promise<void> {
    // If the WebSocket is already open, check if we need to authenticate
    if (this.ws.readyState === WebSocket.OPEN) {
      this.logger.info(
        'Websocket is open, ensuring authentication is complete.'
      );
      return this.authenticate();
    }

    // If the WebSocket is closed or closing, reinitialize it
    if (
      this.ws.readyState === WebSocket.CLOSED ||
      this.ws.readyState === WebSocket.CLOSING
    ) {
      this.logger.info('Websocket is closed. Reinitializing connection.');
      const headers: { [key: string]: string } = {};
      if (this.apiKey && this.vmName) {
        headers['X-API-Key'] = this.apiKey;
        headers['X-VM-Name'] = this.vmName;
      }
      this.ws = new WebSocket(this.wsUri, { headers });
      return this.authenticate();
    }

    // Connect and authenticate
    return new Promise((resolve, reject) => {
      const onOpen = async () => {
        try {
          // Always authenticate immediately after connection
          await this.authenticate();
          resolve();
        } catch (error) {
          reject(error);
        }
      };

      // If already connecting, wait for it to complete then authenticate
      if (this.ws.readyState === WebSocket.CONNECTING) {
        this.ws.addEventListener('open', onOpen, { once: true });
        this.ws.addEventListener('error', (error) => reject(error), {
          once: true,
        });
        return;
      }

      // Set up event handlers
      this.ws.on('open', onOpen);

      this.ws.on('error', (error: Error) => {
        reject(error);
      });

      this.ws.on('close', () => {
        if (!this.closed) {
          // Attempt to reconnect
          setTimeout(() => this.connect(), 1000);
        }
      });
    });
  }

  /**
   * Send a command to the WebSocket server.
   */
  public async sendCommand(
    command: string,
    params: { [key: string]: unknown } = {}
  ): Promise<{ [key: string]: unknown }> {
    // Create a new promise for this specific command
    const commandPromise = new Promise<{ [key: string]: unknown }>(
      (resolve, reject) => {
        // Chain it to the previous commands
        const executeCommand = async (): Promise<{
          [key: string]: unknown;
        }> => {
          if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
            await this.connect();
          }

          return new Promise<{ [key: string]: unknown }>(
            (innerResolve, innerReject) => {
              const messageHandler = (data: WebSocket.RawData) => {
                try {
                  const response = JSON.parse(data.toString());
                  if (response.error) {
                    innerReject(new Error(response.error));
                  } else {
                    innerResolve(response);
                  }
                } catch (error) {
                  innerReject(error);
                }
                this.ws.off('message', messageHandler);
              };

              this.ws.on('message', messageHandler);
              const wsCommand = { command, params };
              this.ws.send(JSON.stringify(wsCommand));
            }
          );
        };

        // Add this command to the lock chain
        this.commandLock = this.commandLock.then(() =>
          executeCommand().then(resolve, reject)
        );
      }
    );

    return commandPromise;
  }

  /**
   * Check if the WebSocket is connected.
   */
  public isConnected(): boolean {
    return this.ws && this.ws.readyState === WebSocket.OPEN;
  }

  /**
   * Close the interface connection.
   */
  disconnect(): void {
    this.closed = true;
    if (this.ws && this.ws.readyState === WebSocket.OPEN) {
      this.ws.close();
    } else if (this.ws && this.ws.readyState === WebSocket.CONNECTING) {
      // If still connecting, terminate the connection attempt
      this.ws.terminate();
    }
  }

  /**
   * Force close the interface connection.
   * By default, this just calls close(), but subclasses can override
   * to provide more forceful cleanup.
   */
  forceClose(): void {
    this.disconnect();
  }

  // Mouse Actions
  abstract mouseDown(
    x?: number,
    y?: number,
    button?: MouseButton
  ): Promise<void>;
  abstract mouseUp(x?: number, y?: number, button?: MouseButton): Promise<void>;
  abstract leftClick(x?: number, y?: number): Promise<void>;
  abstract rightClick(x?: number, y?: number): Promise<void>;
  abstract doubleClick(x?: number, y?: number): Promise<void>;
  abstract moveCursor(x: number, y: number): Promise<void>;
  abstract dragTo(
    x: number,
    y: number,
    button?: MouseButton,
    duration?: number
  ): Promise<void>;
  abstract drag(
    path: Array<[number, number]>,
    button?: MouseButton,
    duration?: number
  ): Promise<void>;

  // Keyboard Actions
  abstract keyDown(key: string): Promise<void>;
  abstract keyUp(key: string): Promise<void>;
  abstract typeText(text: string): Promise<void>;
  abstract pressKey(key: string): Promise<void>;
  abstract hotkey(...keys: string[]): Promise<void>;

  // Scrolling Actions
  abstract scroll(x: number, y: number): Promise<void>;
  abstract scrollDown(clicks?: number): Promise<void>;
  abstract scrollUp(clicks?: number): Promise<void>;

  // Screen Actions
  abstract screenshot(): Promise<Buffer>;
  abstract getScreenSize(): Promise<ScreenSize>;
  abstract getCursorPosition(): Promise<CursorPosition>;

  // Clipboard Actions
  abstract copyToClipboard(): Promise<string>;
  abstract setClipboard(text: string): Promise<void>;

  // File System Actions
  abstract fileExists(path: string): Promise<boolean>;
  abstract directoryExists(path: string): Promise<boolean>;
  abstract listDir(path: string): Promise<string[]>;
  abstract readText(path: string): Promise<string>;
  abstract writeText(path: string, content: string): Promise<void>;
  abstract readBytes(path: string): Promise<Buffer>;
  abstract writeBytes(path: string, content: Buffer): Promise<void>;
  abstract deleteFile(path: string): Promise<void>;
  abstract createDir(path: string): Promise<void>;
  abstract deleteDir(path: string): Promise<void>;
  abstract runCommand(command: string): Promise<[string, string]>;

  // Accessibility Actions
  abstract getAccessibilityTree(): Promise<AccessibilityNode>;
  abstract toScreenCoordinates(x: number, y: number): Promise<[number, number]>;
  abstract toScreenshotCoordinates(
    x: number,
    y: number
  ): Promise<[number, number]>;
}

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/integrations/hud/proxy.py:
--------------------------------------------------------------------------------

```python
"""HUD ComputerAgent wrapper and Fake AsyncOpenAI client.

Provides FakeAsyncOpenAI that adapts our ComputerAgent to the OpenAI Responses
interface needed by HUD's OperatorAgent. It implements only `responses.create`
and returns an OpenAI Response object with `id` and `output` fields, where `output` is a list of
OpenAI-like response blocks. We intentionally only support a single-step call
by consuming the first yielded result from `ComputerAgent.run()`.
"""

import traceback
import time
import uuid
from typing import Any, Dict, List, Optional

from agent.agent import ComputerAgent as BaseComputerAgent
from agent.callbacks import PromptInstructionsCallback
from hud.tools.computer.settings import computer_settings
from PIL import Image
from hud.agents import OperatorAgent

# OpenAI Responses typed models (required)
from openai.types.responses import (
    Response,
    ResponseInputParam,
    ResponseOutputItem,
    ResponseComputerToolCall,
    ResponseOutputMessage,
    ResponseOutputText,
    ResponseReasoningItem,
    ResponseUsage,
)

def _map_agent_output_to_openai_blocks(output_items: List[Dict[str, Any]]) -> List[ResponseOutputItem]:
    """Map our agent output items to OpenAI ResponseOutputItem typed models.

    Only a subset is supported: computer_call, assistant message (text), and reasoning.
    Unknown types are ignored.
    """
    blocks: List[ResponseOutputItem] = []
    for item in output_items or []:
        t = item.get("type")
        if t == "computer_call":
            comp = ResponseComputerToolCall.model_validate({
                "id": item.get("id") or f"cu_{uuid.uuid4().hex}",
                "type": "computer_call",
                "call_id": item["call_id"],
                "action": item["action"],
                "pending_safety_checks": item.get("pending_safety_checks", []),
                "status": "completed",
            })
            blocks.append(comp)
            # we will exit early here as the responses api only supports a single step
            break
        elif t == "message" and item.get("role") == "assistant":
            content_blocks: List[ResponseOutputText] = []
            for c in item.get("content", []) or []:
                content_blocks.append(
                    ResponseOutputText.model_validate({
                        "type": "output_text",
                        "text": c["text"],
                        "annotations": [],
                    })
                )
            if content_blocks:
                msg = ResponseOutputMessage.model_validate({
                    "id": item.get("id") or f"msg_{uuid.uuid4()}",
                    "type": "message",
                    "role": "assistant",
                    "status": "completed",
                    "content": [ct.model_dump() for ct in content_blocks],
                })
                blocks.append(msg)
        elif t == "reasoning":
            reasoning = ResponseReasoningItem.model_validate({
                "id": item.get("id") or f"rsn_{uuid.uuid4()}",
                "type": "reasoning",
                "summary": item["summary"],
            })
            blocks.append(reasoning)
        # Unhandled types are ignored
    return blocks

def _to_plain_dict_list(items: Any) -> List[Dict[str, Any]]:
    out: List[Dict[str, Any]] = []
    for it in list(items):
        if hasattr(it, "model_dump"):
            out.append(it.model_dump())  # type: ignore[attr-defined]
        elif isinstance(it, dict):
            out.append(it)
        else:
            # Strict: rely on default __dict__ if present
            out.append(dict(it))  # may raise if not mapping
    return out

class FakeAsyncOpenAI:
    """Minimal fake OpenAI client with only `responses.create` implemented.

    It uses a provided `ComputerAgent` instance to produce a single-step
    response compatible with HUD's OperatorAgent loop.
    """

    def __init__(self, computer_agent: BaseComputerAgent) -> None:
        self._agent = computer_agent
        self.responses = self._Responses(self)

    class _Responses:
        def __init__(self, parent: "FakeAsyncOpenAI") -> None:
            # Caches for cross-call context when using previous_response_id
            self.blocks_cache: Dict[str, ResponseInputParam | ResponseOutputItem] = {}
            self.context_cache: Dict[str, List[str]] = {}
            self.agent = parent._agent

        async def create(
            self,
            *,
            model: str,
            input: ResponseInputParam,
            tools: Optional[List[Dict[str, Any]]] = None,
            instructions: Optional[str] = None,
            previous_response_id: Optional[str] = None,
            max_retries: int = 5,
            **_: Any,
        ) -> Any:
            for attempt in range(max_retries):
                # Prepend cached blocks from previous_response_id to input
                full_input = input
                if previous_response_id is not None:
                    prev_block_ids = self.context_cache[previous_response_id]
                    prev_blocks = [self.blocks_cache[b_id] for b_id in prev_block_ids]
                    full_input = _to_plain_dict_list(prev_blocks + input)

                # Pre-pend instructions message
                effective_input = full_input
                if instructions:
                    effective_input = [{
                        "role": "user",
                        "content": instructions,
                    }] + full_input

                # Run a single iteration of the ComputerAgent
                agent_result: Optional[Dict[str, Any]] = None
                async for result in self.agent.run(effective_input):  # type: ignore[arg-type]
                    agent_result = result
                    break
                assert agent_result is not None, "Agent failed to produce result"

                output = _map_agent_output_to_openai_blocks(agent_result["output"])
                usage = agent_result["usage"]

                # Cache conversation context using the last response id
                block_ids: List[str] = []
                blocks_to_cache = full_input + output
                for b in blocks_to_cache:
                    bid = getattr(b, "id", None) or f"tmp-{hash(repr(b))}"
                    self.blocks_cache[bid] = b # type: ignore[assignment]
                    block_ids.append(bid)
                response_id = agent_result.get("id") or f"fake-{int(time.time()*1000)}"
                self.context_cache[response_id] = block_ids

                try:
                    return Response.model_validate({
                        "id": response_id,
                        "created_at": time.time(),
                        "object": "response",
                        "model": model,
                        "output": output,
                        "parallel_tool_calls": False,
                        "tool_choice": "auto",
                        "tools": [],
                        "previous_response_id": previous_response_id,
                        "usage": ResponseUsage.model_validate({
                            "input_tokens": usage.get("input_tokens", 0),
                            "output_tokens": usage.get("output_tokens", 0),
                            "total_tokens": usage.get("total_tokens", 0),
                            "input_tokens_details": usage.get("input_tokens_details", { "cached_tokens": 0 }),
                            "output_tokens_details": usage.get("output_tokens_details", { "reasoning_tokens": 0 }),
                        }),
                    })
                except Exception as e:
                    print(f"Error while validating agent response (attempt {attempt + 1}/{max_retries}): ", e)
                    if attempt == max_retries - 1:
                        print(traceback.format_exc())
                        raise e


# ---------------------------------------------------------------------------
# Proxy OperatorAgent (moved from __init__.py)
# ---------------------------------------------------------------------------


class ProxyOperatorAgent(OperatorAgent):
    """OperatorAgent that proxies model calls through our ComputerAgent.

    Accepts the same config keys we pass via hud.run_dataset `agent_config`:
    - model: str | None
    - allowed_tools: list[str] | None
    Additional kwargs are forwarded to OperatorAgent (if any are supported).
    """

    def __init__(
        self,
        *,
        model: str | None = None,
        allowed_tools: list[str] | None = None,
        trajectory_dir: str | dict | None = None,
        # === ComputerAgent kwargs ===
        tools: list[Any] | None = None,
        custom_loop: Any | None = None,
        only_n_most_recent_images: int | None = None,
        callbacks: list[Any] | None = None,
        instructions: str | None = None,
        verbosity: int | None = None,
        max_retries: int | None = 3,
        screenshot_delay: float | int = 0.5,
        use_prompt_caching: bool | None = False,
        max_trajectory_budget: float | dict | None = None,
        telemetry_enabled: bool | None = True,
        **kwargs: Any,
    ) -> None:
        model = model or "computer-use-preview"
        allowed_tools = allowed_tools or ["openai_computer"]

        computer_shim = {
            'screenshot': lambda: Image.new('RGB', (computer_settings.OPENAI_COMPUTER_WIDTH, computer_settings.OPENAI_COMPUTER_HEIGHT)),
            'environment': 'linux',
            'dimensions': (computer_settings.OPENAI_COMPUTER_WIDTH, computer_settings.OPENAI_COMPUTER_HEIGHT)
        }
        # Build tools ensuring the computer_shim is included
        agent_tools: list[Any] = [computer_shim]
        if tools:
            agent_tools.extend(tools)

        # Build callbacks, injecting prompt instructions if provided
        agent_callbacks = list(callbacks or [])
        if instructions:
            agent_callbacks.append(PromptInstructionsCallback(instructions))

        computer_agent = BaseComputerAgent(
            model=model,
            tools=agent_tools,
            custom_loop=custom_loop,
            only_n_most_recent_images=only_n_most_recent_images,
            callbacks=agent_callbacks,
            verbosity=verbosity,
            trajectory_dir=trajectory_dir,
            max_retries=max_retries,
            screenshot_delay=screenshot_delay,
            use_prompt_caching=use_prompt_caching,
            max_trajectory_budget=max_trajectory_budget,
            telemetry_enabled=telemetry_enabled,
        )
        model_client = FakeAsyncOpenAI(computer_agent)

        super().__init__(
            model_client=model_client,  # type: ignore[arg-type]
            model=model,
            allowed_tools=allowed_tools,
            **kwargs,
        )

__all__ = [
    "FakeAsyncOpenAI",
    "ProxyOperatorAgent",
]

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/callbacks/logging.py:
--------------------------------------------------------------------------------

```python
"""
Logging callback for ComputerAgent that provides configurable logging of agent lifecycle events.
"""

import json
import logging
from typing import Dict, List, Any, Optional, Union
from .base import AsyncCallbackHandler


def sanitize_image_urls(data: Any) -> Any:
    """
    Recursively search for 'image_url' keys and set their values to '[omitted]'.
    
    Args:
        data: Any data structure (dict, list, or primitive type)
        
    Returns:
        A deep copy of the data with all 'image_url' values replaced with '[omitted]'
    """
    if isinstance(data, dict):
        # Create a copy of the dictionary
        sanitized = {}
        for key, value in data.items():
            if key == "image_url":
                sanitized[key] = "[omitted]"
            else:
                # Recursively sanitize the value
                sanitized[key] = sanitize_image_urls(value)
        return sanitized
    
    elif isinstance(data, list):
        # Recursively sanitize each item in the list
        return [sanitize_image_urls(item) for item in data]
    
    else:
        # For primitive types (str, int, bool, None, etc.), return as-is
        return data


class LoggingCallback(AsyncCallbackHandler):
    """
    Callback handler that logs agent lifecycle events with configurable verbosity.
    
    Logging levels:
    - DEBUG: All events including API calls, message preprocessing, and detailed outputs
    - INFO: Major lifecycle events (start/end, messages, outputs)  
    - WARNING: Only warnings and errors
    - ERROR: Only errors
    """
    
    def __init__(self, logger: Optional[logging.Logger] = None, level: int = logging.INFO):
        """
        Initialize the logging callback.
        
        Args:
            logger: Logger instance to use. If None, creates a logger named 'agent.ComputerAgent'
            level: Logging level (logging.DEBUG, logging.INFO, etc.)
        """
        self.logger = logger or logging.getLogger('agent.ComputerAgent')
        self.level = level
        
        # Set up logger if it doesn't have handlers
        if not self.logger.handlers:
            handler = logging.StreamHandler()
            formatter = logging.Formatter(
                '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
            )
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)
            self.logger.setLevel(level)
    
    def _update_usage(self, usage: Dict[str, Any]) -> None:
        """Update total usage statistics."""
        def add_dicts(target: Dict[str, Any], source: Dict[str, Any]) -> None:
            for key, value in source.items():
                if isinstance(value, dict):
                    if key not in target:
                        target[key] = {}
                    add_dicts(target[key], value)
                else:
                    if key not in target:
                        target[key] = 0
                    target[key] += value
        add_dicts(self.total_usage, usage)
    
    async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
        """Called before the run starts."""
        self.total_usage = {}
    
    async def on_usage(self, usage: Dict[str, Any]) -> None:
        """Called when usage information is received."""
        self._update_usage(usage)

    async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
        """Called after the run ends."""
        def format_dict(d, indent=0):
            lines = []
            prefix = f" - {' ' * indent}"
            for key, value in d.items():
                if isinstance(value, dict):
                    lines.append(f"{prefix}{key}:")
                    lines.extend(format_dict(value, indent + 1))
                elif isinstance(value, float):
                    lines.append(f"{prefix}{key}: ${value:.4f}")
                else:
                    lines.append(f"{prefix}{key}: {value}")
            return lines
        
        formatted_output = "\n".join(format_dict(self.total_usage))
        self.logger.info(f"Total usage:\n{formatted_output}")
    
    async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Called before LLM processing starts."""
        if self.logger.isEnabledFor(logging.INFO):
            self.logger.info(f"LLM processing started with {len(messages)} messages")
        if self.logger.isEnabledFor(logging.DEBUG):
            sanitized_messages = [sanitize_image_urls(msg) for msg in messages]
            self.logger.debug(f"LLM input messages: {json.dumps(sanitized_messages, indent=2)}")
        return messages
    
    async def on_llm_end(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Called after LLM processing ends."""
        if self.logger.isEnabledFor(logging.DEBUG):
            sanitized_messages = [sanitize_image_urls(msg) for msg in messages]
            self.logger.debug(f"LLM output: {json.dumps(sanitized_messages, indent=2)}")
        return messages
    
    async def on_computer_call_start(self, item: Dict[str, Any]) -> None:
        """Called when a computer call starts."""
        action = item.get("action", {})
        action_type = action.get("type", "unknown")
        action_args = {k: v for k, v in action.items() if k != "type"}
        
        # INFO level logging for the action
        self.logger.info(f"Computer: {action_type}({action_args})")
        
        # DEBUG level logging for full details
        if self.logger.isEnabledFor(logging.DEBUG):
            self.logger.debug(f"Computer call started: {json.dumps(action, indent=2)}")
    
    async def on_computer_call_end(self, item: Dict[str, Any], result: Any) -> None:
        """Called when a computer call ends."""
        if self.logger.isEnabledFor(logging.DEBUG):
            action = item.get("action", "unknown")
            self.logger.debug(f"Computer call completed: {json.dumps(action, indent=2)}")
            if result:
                sanitized_result = sanitize_image_urls(result)
                self.logger.debug(f"Computer call result: {json.dumps(sanitized_result, indent=2)}")
    
    async def on_function_call_start(self, item: Dict[str, Any]) -> None:
        """Called when a function call starts."""
        name = item.get("name", "unknown")
        arguments = item.get("arguments", "{}")
        
        # INFO level logging for the function call
        self.logger.info(f"Function: {name}({arguments})")
        
        # DEBUG level logging for full details
        if self.logger.isEnabledFor(logging.DEBUG):
            self.logger.debug(f"Function call started: {name}")
    
    async def on_function_call_end(self, item: Dict[str, Any], result: Any) -> None:
        """Called when a function call ends."""
        # INFO level logging for function output (similar to function_call_output)
        if result:
            # Handle both list and direct result formats
            if isinstance(result, list) and len(result) > 0:
                output = result[0].get("output", str(result)) if isinstance(result[0], dict) else str(result[0])
            else:
                output = str(result)
            
            # Truncate long outputs
            if len(output) > 100:
                output = output[:100] + "..."
            
            self.logger.info(f"Output: {output}")
        
        # DEBUG level logging for full details
        if self.logger.isEnabledFor(logging.DEBUG):
            name = item.get("name", "unknown")
            self.logger.debug(f"Function call completed: {name}")
            if result:
                self.logger.debug(f"Function call result: {json.dumps(result, indent=2)}")
    
    async def on_text(self, item: Dict[str, Any]) -> None:
        """Called when a text message is encountered."""
        # Get the role to determine if it's Agent or User
        role = item.get("role", "unknown")
        content_items = item.get("content", [])
        
        # Process content items to build display text
        text_parts = []
        for content_item in content_items:
            content_type = content_item.get("type", "output_text")
            if content_type == "output_text":
                text_content = content_item.get("text", "")
                if not text_content.strip():
                    text_parts.append("[empty]")
                else:
                    # Truncate long text and add ellipsis
                    if len(text_content) > 2048:
                        text_parts.append(text_content[:2048] + "...")
                    else:
                        text_parts.append(text_content)
            else:
                # Non-text content, show as [type]
                text_parts.append(f"[{content_type}]")
        
        # Join all text parts
        display_text = ''.join(text_parts) if text_parts else "[empty]"
        
        # Log with appropriate level and format
        if role == "assistant":
            self.logger.info(f"Agent: {display_text}")
        elif role == "user":
            self.logger.info(f"User: {display_text}")
        else:
            # Fallback for unknown roles, use debug level
            if self.logger.isEnabledFor(logging.DEBUG):
                self.logger.debug(f"Text message ({role}): {display_text}")
    
    async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
        """Called when an API call is about to start."""
        if self.logger.isEnabledFor(logging.DEBUG):
            model = kwargs.get("model", "unknown")
            self.logger.debug(f"API call starting for model: {model}")
            # Log sanitized messages if present
            if "messages" in kwargs:
                sanitized_messages = sanitize_image_urls(kwargs["messages"])
                self.logger.debug(f"API call messages: {json.dumps(sanitized_messages, indent=2)}")
            elif "input" in kwargs:
                sanitized_input = sanitize_image_urls(kwargs["input"])
                self.logger.debug(f"API call input: {json.dumps(sanitized_input, indent=2)}")
    
    async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
        """Called when an API call has completed."""
        if self.logger.isEnabledFor(logging.DEBUG):
            model = kwargs.get("model", "unknown")
            self.logger.debug(f"API call completed for model: {model}")
            self.logger.debug(f"API call result: {json.dumps(sanitize_image_urls(result), indent=2)}")

    async def on_screenshot(self, item: Union[str, bytes], name: str = "screenshot") -> None:
        """Called when a screenshot is taken."""
        if self.logger.isEnabledFor(logging.DEBUG):
            image_size = len(item) / 1024
            self.logger.debug(f"Screenshot captured: {name} {image_size:.2f} KB")
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/adapters/models/internvl.py:
--------------------------------------------------------------------------------

```python
from __future__ import annotations
from typing import List, Dict, Any, Optional

# Hugging Face imports are local to avoid hard dependency at module import
try:
    import torch  # type: ignore
    from transformers import AutoModel, AutoTokenizer  # type: ignore
    # Attempt to import InternVL's model dependencies
    import einops as _  # type: ignore
    import timm as _  # type: ignore
    from PIL import Image  # type: ignore
    import torchvision.transforms as T  # type: ignore
    from torchvision.transforms.functional import InterpolationMode  # type: ignore
    import base64  # type: ignore
    from io import BytesIO  # type: ignore
    import requests  # type: ignore
    HF_AVAILABLE = True
except Exception:
    HF_AVAILABLE = False


class InternVLModel:
    """Generic Hugging Face vision-language model handler.
    Uses InternVL's native `model.chat()` interface with `AutoTokenizer`.
    Provides preprocessing to support multi-turn conversations with multiple images.
    """

    def __init__(self, model_name: str, device: str = "auto", trust_remote_code: bool = False) -> None:
        if not HF_AVAILABLE:
            raise ImportError(
                "InternVL dependencies not found. Install with: pip install \"cua-agent[internvl-hf]\""
            )
        self.model_name = model_name
        self.device = device
        self.model = None
        self.tokenizer = None
        self.trust_remote_code = trust_remote_code
        self._load()

    def _load(self) -> None:
        # Load model
        self.model = AutoModel.from_pretrained(
            self.model_name,
            torch_dtype=torch.bfloat16,
            low_cpu_mem_usage=True,
            use_flash_attn=True,
            device_map=self.device,
            trust_remote_code=self.trust_remote_code,
        ).eval()
        # Load tokenizer (InternVL requires trust_remote_code=True and often use_fast=False)
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.model_name,
            trust_remote_code=self.trust_remote_code,
            use_fast=False,
        )

    # ---- Image preprocessing utilities adapted from InternVL docs ----
    IMAGENET_MEAN = (0.485, 0.456, 0.406)
    IMAGENET_STD = (0.229, 0.224, 0.225)

    def _build_transform(self, input_size: int) -> T.Compose:
        MEAN, STD = self.IMAGENET_MEAN, self.IMAGENET_STD
        transform = T.Compose([
            T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
            T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
            T.ToTensor(),
            T.Normalize(mean=MEAN, std=STD)
        ])
        return transform

    def _find_closest_aspect_ratio(self, aspect_ratio: float, target_ratios: List[tuple], width: int, height: int, image_size: int):
        best_ratio_diff = float('inf')
        best_ratio = (1, 1)
        area = width * height
        for ratio in target_ratios:
            target_aspect_ratio = ratio[0] / ratio[1]
            ratio_diff = abs(aspect_ratio - target_aspect_ratio)
            if ratio_diff < best_ratio_diff:
                best_ratio_diff = ratio_diff
                best_ratio = ratio
            elif ratio_diff == best_ratio_diff:
                if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
                    best_ratio = ratio
        return best_ratio

    def _dynamic_preprocess(self, image: Image.Image, min_num: int = 1, max_num: int = 12, image_size: int = 448, use_thumbnail: bool = True) -> List[Image.Image]:
        orig_width, orig_height = image.size
        aspect_ratio = orig_width / orig_height

        target_ratios = set(
            (i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
            i * j <= max_num and i * j >= min_num)
        target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])

        target_aspect_ratio = self._find_closest_aspect_ratio(
            aspect_ratio, target_ratios, orig_width, orig_height, image_size)

        target_width = image_size * target_aspect_ratio[0]
        target_height = image_size * target_aspect_ratio[1]
        blocks = target_aspect_ratio[0] * target_aspect_ratio[1]

        resized_img = image.resize((target_width, target_height))
        processed_images: List[Image.Image] = []
        for i in range(blocks):
            box = (
                (i % (target_width // image_size)) * image_size,
                (i // (target_width // image_size)) * image_size,
                ((i % (target_width // image_size)) + 1) * image_size,
                ((i // (target_width // image_size)) + 1) * image_size
            )
            split_img = resized_img.crop(box)
            processed_images.append(split_img)
        assert len(processed_images) == blocks
        if use_thumbnail and len(processed_images) != 1:
            thumbnail_img = image.resize((image_size, image_size))
            processed_images.append(thumbnail_img)
        return processed_images

    def _load_image_from_source(self, src: str) -> Image.Image:
        """Load PIL image from various sources: data URL, http(s), or local path."""
        if src.startswith("data:image/"):
            # data URL base64
            header, b64data = src.split(",", 1)
            img_bytes = base64.b64decode(b64data)
            return Image.open(BytesIO(img_bytes)).convert('RGB')
        if src.startswith("http://") or src.startswith("https://"):
            resp = requests.get(src, timeout=10)
            resp.raise_for_status()
            return Image.open(BytesIO(resp.content)).convert('RGB')
        # Assume local file path
        return Image.open(src).convert('RGB')

    def _images_to_pixel_values(self, images: List[Image.Image], input_size: int = 448, max_num: int = 12):
        transform = self._build_transform(input_size=input_size)
        pixel_values_list = []
        num_patches_list: List[int] = []
        for img in images:
            tiles = self._dynamic_preprocess(img, image_size=input_size, use_thumbnail=True, max_num=max_num)
            pv = [transform(tile) for tile in tiles]
            pv = torch.stack(pv)
            num_patches_list.append(pv.shape[0])
            pixel_values_list.append(pv)
        if not pixel_values_list:
            return None, []
        pixel_values = torch.cat(pixel_values_list)
        return pixel_values, num_patches_list

    def generate(self, messages: List[Dict[str, Any]], max_new_tokens: int = 128) -> str:
        """Generate text for the given HF-format messages.
        messages: [{ role, content: [{type:'text'|'image', text|image}] }]

        This implementation constructs InternVL-compatible inputs and uses
        `model.chat(tokenizer, pixel_values, question, history=...)` to avoid
        relying on AutoProcessor (which fails for some tokenizers).
        """
        assert self.model is not None and self.tokenizer is not None

        # Build textual context and collect images and the final question
        context_lines: List[str] = []
        all_images: List[Image.Image] = []
        last_user_text_parts: List[str] = []

        for msg in messages:
            role = msg.get("role", "user")
            content = msg.get("content", [])
            if isinstance(content, str):
                content_items = [{"type": "text", "text": content}]
            else:
                content_items = content

            if role == "user":
                # Collect text and images
                parts_text: List[str] = []
                for item in content_items:
                    if item.get("type") == "text":
                        t = item.get("text", "")
                        if t:
                            parts_text.append(t)
                    elif item.get("type") == "image":
                        url = item.get("image", "")
                        if url:
                            try:
                                all_images.append(self._load_image_from_source(url))
                            except Exception:
                                # Ignore failed image loads but keep going
                                pass
                text = "\n".join(parts_text).strip()
                if text:
                    context_lines.append(f"User: {text}")
                # Track last user text separately for question
                last_user_text_parts = parts_text or last_user_text_parts
            elif role == "assistant":
                # Only keep text content for history
                parts_text = [item.get("text", "") for item in content_items if item.get("type") == "text"]
                text = "\n".join(parts_text).strip()
                if text:
                    context_lines.append(f"Assistant: {text}")

        # Prepare pixel values for all collected images (across turns)
        pixel_values = None
        num_patches_list: List[int] = []
        if all_images:
            pixel_values, num_patches_list = self._images_to_pixel_values(all_images, input_size=448, max_num=12)
            if pixel_values is not None:
                # Convert dtype/device as in docs
                pixel_values = pixel_values.to(torch.bfloat16)
                # Chat API expects tensors on CUDA when model is on CUDA
                try:
                    pixel_values = pixel_values.to(self.model.device)
                except Exception:
                    pass

        # Build question with any prior context and numbered image placeholders
        if all_images:
            # Separate images layout: Image-1: <image> ... then question text
            prefix_lines = [f"Image-{i+1}: <image>" for i in range(len(all_images))]
            prefix = "\n".join(prefix_lines) + "\n"
        else:
            prefix = ""

        last_user_text = "\n".join(last_user_text_parts).strip()
        # Combine prior text-only turns as context to emulate multi-turn
        context_text = "\n".join(context_lines[:-1]) if len(context_lines) > 1 else ""
        base_question = last_user_text if last_user_text else "Describe the image(s) in detail."
        if context_text:
            question = (context_text + "\n" + prefix + base_question).strip()
        else:
            question = (prefix + base_question).strip()

        # Generation config
        generation_config = dict(max_new_tokens=max_new_tokens, do_sample=False)

        # Call InternVL chat
        try:
            if pixel_values is None:
                # Pure-text conversation (embed prior turns in question)
                response = self.model.chat(self.tokenizer, None, question, generation_config)
            else:
                # Multi-image: pass num_patches_list if >1 image
                if len(num_patches_list) > 1:
                    response = self.model.chat(
                        self.tokenizer,
                        pixel_values,
                        question,
                        generation_config,
                        num_patches_list=num_patches_list,
                    )
                else:
                    response = self.model.chat(self.tokenizer, pixel_values, question, generation_config)
        except Exception as e:
            # Fallback: return empty string to avoid crashing the adapter
            return ""

        return response or ""

```

--------------------------------------------------------------------------------
/scripts/playground.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash

set -e

echo "🚀 Launching Cua Computer-Use Agent UI..."

# Save the original working directory
ORIGINAL_DIR="$(pwd)"

# Directories used by the script
DEMO_DIR="$HOME/.cua-demo"
VENV_DIR="$DEMO_DIR/venv"

# Function to clean up on exit
cleanup() {
  cd ~
  rm -rf "$TMP_DIR" 2>/dev/null || true
}

# Create a temporary directory for our work
TMP_DIR=$(mktemp -d)
cd "$TMP_DIR"
trap cleanup EXIT

# Ask user to choose between local macOS VMs or Cua Cloud Sandbox
echo ""
echo "Choose your Cua setup:"
echo "1) ☁️  Cua Cloud Sandbox (works on any system)"
echo "2) 🖥️  Local macOS VMs (requires Apple Silicon Mac + macOS 15+)"
echo ""
read -p "Enter your choice (1 or 2): " CHOICE

if [[ "$CHOICE" == "1" ]]; then
  # Cua Cloud Sandbox setup
  echo ""
  echo "☁️ Setting up Cua Cloud Sandbox..."
  echo ""
  
  # Check if existing .env.local already has CUA_API_KEY (check current dir and demo dir)
  # Look for .env.local in the original working directory (before cd to temp dir)
  CURRENT_ENV_FILE="$ORIGINAL_DIR/.env.local"
  DEMO_ENV_FILE="$DEMO_DIR/.env.local"
  
  CUA_API_KEY=""
  
  # First check current directory
  if [[ -f "$CURRENT_ENV_FILE" ]] && grep -q "CUA_API_KEY=" "$CURRENT_ENV_FILE"; then
    EXISTING_CUA_KEY=$(grep "CUA_API_KEY=" "$CURRENT_ENV_FILE" | cut -d'=' -f2- | tr -d '"' | tr -d "'" | xargs)
    if [[ -n "$EXISTING_CUA_KEY" && "$EXISTING_CUA_KEY" != "your_cua_api_key_here" && "$EXISTING_CUA_KEY" != "" ]]; then
      CUA_API_KEY="$EXISTING_CUA_KEY"
    fi
  fi
  
  # Then check demo directory if not found in current dir
  if [[ -z "$CUA_API_KEY" ]] && [[ -f "$DEMO_ENV_FILE" ]] && grep -q "CUA_API_KEY=" "$DEMO_ENV_FILE"; then
    EXISTING_CUA_KEY=$(grep "CUA_API_KEY=" "$DEMO_ENV_FILE" | cut -d'=' -f2- | tr -d '"' | tr -d "'" | xargs)
    if [[ -n "$EXISTING_CUA_KEY" && "$EXISTING_CUA_KEY" != "your_cua_api_key_here" && "$EXISTING_CUA_KEY" != "" ]]; then
      CUA_API_KEY="$EXISTING_CUA_KEY"
    fi
  fi
  
  # If no valid API key found, prompt for one
  if [[ -z "$CUA_API_KEY" ]]; then
    echo "To use Cua Cloud Sandbox, you need to:"
    echo "1. Sign up at https://trycua.com"
    echo "2. Create a Cloud Sandbox"
    echo "3. Generate an Api Key"
    echo ""
    read -p "Enter your Cua Api Key: " CUA_API_KEY
    
    if [[ -z "$CUA_API_KEY" ]]; then
      echo "❌ Cua Api Key is required for Cloud Sandbox."
      exit 1
    fi
  fi
  
  USE_CLOUD=true

elif [[ "$CHOICE" == "2" ]]; then
  # Local macOS VM setup
  echo ""
  echo "🖥️ Setting up local macOS VMs..."
  
  # Check for Apple Silicon Mac
  if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then
    echo "❌ Local macOS VMs require an Apple Silicon Mac (M1/M2/M3/M4)."
    echo "💡 Consider using Cua Cloud Sandbox instead (option 1)."
    exit 1
  fi

  # Check for macOS 15 (Sequoia) or newer
  OSVERSION=$(sw_vers -productVersion)
  if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then
    echo "❌ Local macOS VMs require macOS 15 (Sequoia) or newer. You have $OSVERSION."
    echo "💡 Consider using Cua Cloud Sandbox instead (option 1)."
    exit 1
  fi

  USE_CLOUD=false

else
  echo "❌ Invalid choice. Please run the script again and choose 1 or 2."
  exit 1
fi

# Install Lume if not already installed (only for local VMs)
if [[ "$USE_CLOUD" == "false" ]]; then
  if ! command -v lume &> /dev/null; then
    echo "📦 Installing Lume CLI..."
    curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash
    
    # Add lume to PATH for this session if it's not already there
    if ! command -v lume &> /dev/null; then
      export PATH="$PATH:$HOME/.local/bin"
    fi
  fi

  # Pull the macOS CUA image if not already present
  if ! lume ls | grep -q "macos-sequoia-cua"; then
    # Check available disk space
    IMAGE_SIZE_GB=30
    AVAILABLE_SPACE_KB=$(df -k $HOME | tail -1 | awk '{print $4}')
    AVAILABLE_SPACE_GB=$(($AVAILABLE_SPACE_KB / 1024 / 1024))
    
    echo "📊 The macOS CUA image will use approximately ${IMAGE_SIZE_GB}GB of disk space."
    echo "   You currently have ${AVAILABLE_SPACE_GB}GB available on your system."
    
    # Prompt for confirmation
    read -p "   Continue? [y]/n: " CONTINUE
    CONTINUE=${CONTINUE:-y}
    
    if [[ $CONTINUE =~ ^[Yy]$ ]]; then
      echo "📥 Pulling macOS CUA image (this may take a while)..."
      lume pull macos-sequoia-cua:latest
    else
      echo "❌ Installation cancelled."
      exit 1
    fi
  fi
fi

# Create a Python virtual environment
echo "🐍 Setting up Python environment..."

# Try different Python commands in order of preference
PYTHON_CMD=""
for cmd in python3.11 python3 python; do
  if command -v $cmd &> /dev/null; then
    # Check this Python version
    PYTHON_VERSION=$($cmd --version 2>&1 | cut -d" " -f2)
    PYTHON_MAJOR=$(echo $PYTHON_VERSION | cut -d. -f1)
    PYTHON_MINOR=$(echo $PYTHON_VERSION | cut -d. -f2)
    
    if [ "$PYTHON_MAJOR" -eq 3 ] && [ "$PYTHON_MINOR" -eq 11 ]; then
      PYTHON_CMD=$cmd
      echo "✅ Found suitable Python: $cmd (version $PYTHON_VERSION)"
      break
    elif [ "$PYTHON_MAJOR" -eq 3 ] && [ "$PYTHON_MINOR" -gt 11 ]; then
      PYTHON_CMD=$cmd
      PYTHON_TOO_NEW=true
      echo "⚠️  Found $cmd (version $PYTHON_VERSION) but only Python 3.11.x is supported."
      break
    else
      echo "⚠️  Found $cmd (version $PYTHON_VERSION) but it's too old, trying next..."
    fi
  fi
done

# If no suitable Python was found, or if Python is too new, offer to exit or continue
if [ -z "$PYTHON_CMD" ] || [ "$PYTHON_TOO_NEW" = true ]; then
  OS_TYPE=$(uname -s)
  if [ "$PYTHON_TOO_NEW" = true ]; then
    echo -e "\n❌ Python version $PYTHON_VERSION detected. Only Python 3.11.x is supported. Newer versions (e.g., 3.12+) are not yet supported."
  else
    if [[ "$OS_TYPE" == "Darwin" ]]; then
      echo -e "\n❌ python3.11 not found. To continue, we recommend running this:\n\n    $ brew install [email protected]\n"
    elif [[ "$OS_TYPE" == "MINGW"* || "$OS_TYPE" == "CYGWIN"* || "$OS_TYPE" == "MSYS"* ]]; then
      echo -e "\n❌ python3.11 not found. Please install Python 3.11 from https://www.python.org/downloads/\n"
    else
      echo -e "\n❌ python3.11 not found. Please install Python 3.11 from your package manager or https://www.python.org/downloads/\n"
    fi
  fi
  while true; do
    echo "Would you like to exit so you can install Python 3.11, or continue anyway? (e = exit, c = continue): "
    read -n 1 -r PYTHON_CONT_CHOICE
    echo
    if [[ "$PYTHON_CONT_CHOICE" =~ ^[Ee]$ ]]; then
      echo "Exiting so you can install Python 3.11."
      exit 1
    elif [[ "$PYTHON_CONT_CHOICE" =~ ^[Cc]$ ]]; then
      echo "⚠️  Continuing without Python 3.11. Some features may not work as expected."
      break
    else
      echo "Please enter 'e' to exit or 'c' to continue."
    fi
  done
fi

# Create a virtual environment
if [ ! -d "$VENV_DIR" ]; then
  $PYTHON_CMD -m venv "$VENV_DIR"
fi

# Activate the virtual environment
source "$VENV_DIR/bin/activate"

# Install required packages
echo "📦 Updating Cua packages..."
pip install -U pip setuptools wheel Cmake
pip install -U cua-computer "cua-agent[all]"

# Create a simple demo script
mkdir -p "$DEMO_DIR"

# Create .env.local file with API keys (only if it doesn't exist)
if [[ ! -f "$DEMO_DIR/.env.local" ]]; then
  cat > "$DEMO_DIR/.env.local" << EOF
# Uncomment and add your API keys here
# OPENAI_API_KEY=your_openai_api_key_here
# ANTHROPIC_API_KEY=your_anthropic_api_key_here
CUA_API_KEY=your_cua_api_key_here
EOF
  echo "📝 Created .env.local file with API key placeholders"
else
  echo "📝 Found existing .env.local file - keeping your current settings"
fi

if [[ "$USE_CLOUD" == "true" ]]; then
  # Add CUA API key to .env.local if not already present
  if ! grep -q "CUA_API_KEY" "$DEMO_DIR/.env.local"; then
    echo "CUA_API_KEY=$CUA_API_KEY" >> "$DEMO_DIR/.env.local"
    echo "🔑 Added CUA_API_KEY to .env.local"
  elif grep -q "CUA_API_KEY=your_cua_api_key_here" "$DEMO_DIR/.env.local"; then
    # Update placeholder with actual key
    sed -i.bak "s/CUA_API_KEY=your_cua_api_key_here/CUA_API_KEY=$CUA_API_KEY/" "$DEMO_DIR/.env.local"
    echo "🔑 Updated CUA_API_KEY in .env.local"
  fi
fi

# Create a convenience script to run the demo
cat > "$DEMO_DIR/start_ui.sh" << EOF
#!/bin/bash
source "$VENV_DIR/bin/activate"
cd "$DEMO_DIR"
python run_demo.py
EOF
chmod +x "$DEMO_DIR/start_ui.sh"

echo "✅ Setup complete!"

if [[ "$USE_CLOUD" == "true" ]]; then
  # Create run_demo.py for cloud sandbox
  cat > "$DEMO_DIR/run_demo.py" << 'EOF'
import asyncio
import os
from pathlib import Path
from dotenv import load_dotenv
from computer import Computer
from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
from agent.ui.gradio.ui_components import create_gradio_ui

# Load environment variables from .env.local
load_dotenv(Path(__file__).parent / ".env.local")

# Check for required API keys
cua_api_key = os.environ.get("CUA_API_KEY", "")
if not cua_api_key:
    print("\n❌ CUA_API_KEY not found in .env.local file.")
    print("Please add your CUA API key to the .env.local file.")
    exit(1)

openai_key = os.environ.get("OPENAI_API_KEY", "")
anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")

if not openai_key and not anthropic_key:
    print("\n⚠️  No OpenAI or Anthropic API keys found in .env.local.")
    print("Please add at least one API key to use AI agents.")

print("🚀 Starting CUA playground with Cloud Sandbox...")
print("📝 Edit .env.local to update your API keys")

# Launch the Gradio UI and open it in the browser
app = create_gradio_ui()
app.launch(share=False, inbrowser=True)
EOF
else
  # Create run_demo.py for local macOS VMs
  cat > "$DEMO_DIR/run_demo.py" << 'EOF'
import asyncio
import os
from pathlib import Path
from dotenv import load_dotenv
from computer import Computer
from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
from agent.ui.gradio.ui_components import create_gradio_ui

# Load environment variables from .env.local
load_dotenv(Path(__file__).parent / ".env.local")

# Try to load API keys from environment
openai_key = os.environ.get("OPENAI_API_KEY", "")
anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")

if not openai_key and not anthropic_key:
    print("\n⚠️  No OpenAI or Anthropic API keys found in .env.local.")
    print("Please add at least one API key to use AI agents.")

print("🚀 Starting CUA playground with local macOS VMs...")
print("📝 Edit .env.local to update your API keys")

# Launch the Gradio UI and open it in the browser
app = create_gradio_ui()
app.launch(share=False, inbrowser=True)
EOF
fi

echo "☁️  CUA Cloud Sandbox setup complete!"
echo "📝 Edit $DEMO_DIR/.env.local to update your API keys"
echo "🖥️  Start the playground by running: $DEMO_DIR/start_ui.sh"

# Check if the VM is running (only for local setup)
if [[ "$USE_CLOUD" == "false" ]]; then
  echo "🔍 Checking if the macOS CUA VM is running..."
  VM_RUNNING=$(lume ls | grep "macos-sequoia-cua" | grep "running" || echo "")

  if [ -z "$VM_RUNNING" ]; then
    echo "🚀 Starting the macOS CUA VM in the background..."
    lume run macos-sequoia-cua:latest &
    # Wait a moment for the VM to initialize
    sleep 5
    echo "✅ VM started successfully."
  else
    echo "✅ macOS CUA VM is already running."
  fi
fi

# Ask if the user wants to start the demo now
echo
read -p "Would you like to start the Cua Computer-Use Agent UI now? (y/n) " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
  echo "🚀 Starting the Cua Computer-Use Agent UI..."
  echo ""
  "$DEMO_DIR/start_ui.sh"
fi

```

--------------------------------------------------------------------------------
/libs/python/som/som/visualization.py:
--------------------------------------------------------------------------------

```python
from typing import List, Dict, Any, Tuple
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import supervision as sv
import platform
import os
import logging

logger = logging.getLogger(__name__)


class BoxAnnotator:
    """Class for drawing bounding boxes and labels on images."""

    def __init__(self):
        """Initialize the box annotator with a color palette."""
        # WCAG 2.1 compliant color palette optimized for accessibility
        self.colors = [
            "#2E7D32",  # Green
            "#C62828",  # Red
            "#1565C0",  # Blue
            "#6A1B9A",  # Purple
            "#EF6C00",  # Orange
            "#283593",  # Indigo
            "#4527A0",  # Deep Purple
            "#00695C",  # Teal
            "#D84315",  # Deep Orange
            "#1B5E20",  # Dark Green
            "#B71C1C",  # Dark Red
            "#0D47A1",  # Dark Blue
            "#4A148C",  # Dark Purple
            "#E65100",  # Dark Orange
            "#1A237E",  # Dark Indigo
            "#311B92",  # Darker Purple
            "#004D40",  # Dark Teal
            "#BF360C",  # Darker Orange
            "#33691E",  # Darker Green
            "#880E4F",  # Pink
        ]
        self.color_index = 0
        self.default_font = None
        self._initialize_font()

    def _initialize_font(self) -> None:
        """Initialize the default font."""
        # Try to load a system font first
        system = platform.system()
        font_paths = []

        if system == "Darwin":  # macOS
            font_paths = [
                "/System/Library/Fonts/Helvetica.ttc",
                "/System/Library/Fonts/Arial.ttf",
                "/Library/Fonts/Arial.ttf",
            ]
        elif system == "Linux":
            font_paths = [
                "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
                "/usr/share/fonts/TTF/DejaVuSans.ttf",
                "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
            ]
        else:  # Windows
            font_paths = ["C:\\Windows\\Fonts\\arial.ttf"]

        # Try each font path
        for font_path in font_paths:
            if os.path.exists(font_path):
                try:
                    # Test the font with a small size
                    test_font = ImageFont.truetype(font_path, 12)
                    # Test if the font can render text
                    test_font.getbbox("1")
                    self.default_font = font_path
                    return
                except Exception:
                    continue

    def _get_next_color(self) -> str:
        """Get the next color from the palette."""
        color = self.colors[self.color_index]
        self.color_index = (self.color_index + 1) % len(self.colors)
        return color

    def _hex_to_rgb(self, hex_color: str) -> Tuple[int, int, int]:
        """Convert hex color to RGB tuple."""
        hex_color = hex_color.lstrip("#")
        # Create explicit tuple of 3 integers to match the return type
        r = int(hex_color[0:2], 16)
        g = int(hex_color[2:4], 16)
        b = int(hex_color[4:6], 16)
        return (r, g, b)

    def draw_boxes(
        self, image: Image.Image, detections: List[Dict[str, Any]], draw_config: Dict[str, Any]
    ) -> Image.Image:
        """Draw bounding boxes and labels on the image."""
        draw = ImageDraw.Draw(image)

        # Create smaller font while keeping contrast
        try:
            if self.default_font:
                font = ImageFont.truetype(self.default_font, size=12)  # Reduced from 16 to 12
            else:
                # If no TrueType font available, use default
                font = ImageFont.load_default()
        except Exception:
            font = ImageFont.load_default()

        padding = 2  # Reduced padding for smaller overall box
        spacing = 1  # Reduced spacing between elements

        # Keep track of used label areas to check for collisions
        used_areas = []

        # Store label information for third pass
        labels_to_draw = []

        # First pass: Initialize used_areas with all bounding boxes
        for detection in detections:
            box = detection["bbox"]
            x1, y1, x2, y2 = [
                int(coord * dim) for coord, dim in zip(box, [image.width, image.height] * 2)
            ]
            used_areas.append((x1, y1, x2, y2))

        # Second pass: Draw all bounding boxes
        for idx, detection in enumerate(detections, 1):
            # Get box coordinates
            box = detection["bbox"]
            x1, y1, x2, y2 = [
                int(coord * dim) for coord, dim in zip(box, [image.width, image.height] * 2)
            ]

            # Get color for this detection
            color = self._get_next_color()
            rgb_color = self._hex_to_rgb(color)

            # Draw bounding box with original width
            draw.rectangle(((x1, y1), (x2, y2)), outline=rgb_color, width=2)

            # Use detection number as label
            label = str(idx)

            # Get text dimensions using getbbox
            bbox = font.getbbox(label)
            text_width = bbox[2] - bbox[0]
            text_height = bbox[3] - bbox[1]

            # Create box dimensions with padding
            box_width = text_width + (padding * 2)  # Removed multiplier for tighter box
            box_height = text_height + (padding * 2)  # Removed multiplier for tighter box

            def is_inside_bbox(x, y):
                """Check if a label box would be inside the bounding box."""
                return x >= x1 and x + box_width <= x2 and y >= y1 and y + box_height <= y2

            # Try different positions until we find one without collision
            positions = [
                # Top center (above bbox)
                lambda: (x1 + ((x2 - x1) - box_width) // 2, y1 - box_height - spacing),
                # Bottom center (below bbox)
                lambda: (x1 + ((x2 - x1) - box_width) // 2, y2 + spacing),
                # Right center (right of bbox)
                lambda: (x2 + spacing, y1 + ((y2 - y1) - box_height) // 2),
                # Left center (left of bbox)
                lambda: (x1 - box_width - spacing, y1 + ((y2 - y1) - box_height) // 2),
                # Top right (outside corner)
                lambda: (x2 + spacing, y1 - box_height - spacing),
                # Top left (outside corner)
                lambda: (x1 - box_width - spacing, y1 - box_height - spacing),
                # Bottom right (outside corner)
                lambda: (x2 + spacing, y2 + spacing),
                # Bottom left (outside corner)
                lambda: (x1 - box_width - spacing, y2 + spacing),
            ]

            def check_occlusion(x, y):
                """Check if a label box occludes any existing ones or is inside bbox."""
                # First check if it's inside the bounding box
                if is_inside_bbox(x, y):
                    return True

                # Then check collision with other labels
                new_box = (x, y, x + box_width, y + box_height)
                label_width = new_box[2] - new_box[0]
                label_height = new_box[3] - new_box[1]
                
                for used_box in used_areas:
                    if not (
                        new_box[2] < used_box[0]  # new box is left of used box
                        or new_box[0] > used_box[2]  # new box is right of used box
                        or new_box[3] < used_box[1]  # new box is above used box
                        or new_box[1] > used_box[3]  # new box is below used box
                    ):
                        # Calculate dimensions of the used box
                        used_box_width = used_box[2] - used_box[0]
                        used_box_height = used_box[3] - used_box[1]
                        
                        # Only consider as collision if used box is NOT more than 5x bigger in both dimensions
                        if not (used_box_width > 5 * label_width and used_box_height > 5 * label_height):
                            return True
                return False

            # Try each position until we find one without collision
            label_x = None
            label_y = None

            for get_pos in positions:
                x, y = get_pos()
                # Ensure position is within image bounds
                if x < 0 or y < 0 or x + box_width > image.width or y + box_height > image.height:
                    continue
                if not check_occlusion(x, y):
                    label_x = x
                    label_y = y
                    break

            # If all positions collide or are out of bounds, find the best possible position
            if label_x is None:
                # Try to place it in the nearest valid position outside the bbox
                best_pos = positions[0]()  # Default to top center
                label_x = max(0, min(image.width - box_width, best_pos[0]))
                label_y = max(0, min(image.height - box_height, best_pos[1]))

                # Ensure it's not inside the bounding box
                if is_inside_bbox(label_x, label_y):
                    # Force it above the bounding box
                    label_y = max(0, y1 - box_height - spacing)

            # Add this label area to used areas
            if (
                label_x is not None
                and label_y is not None
                and box_width is not None
                and box_height is not None
            ):
                used_areas.append((label_x, label_y, label_x + box_width, label_y + box_height))

            # Store label information for second pass
            labels_to_draw.append(
                {
                    "label": label,
                    "x": label_x,
                    "y": label_y,
                    "width": box_width,
                    "height": box_height,
                    "text_width": text_width,
                    "text_height": text_height,
                    "color": rgb_color,
                }
            )

        # Third pass: Draw all labels on top
        for label_info in labels_to_draw:
            # Draw background box with white outline
            draw.rectangle(
                (
                    (label_info["x"] - 1, label_info["y"] - 1),
                    (
                        label_info["x"] + label_info["width"] + 1,
                        label_info["y"] + label_info["height"] + 1,
                    ),
                ),
                outline="white",
                width=2,
            )
            draw.rectangle(
                (
                    (label_info["x"], label_info["y"]),
                    (label_info["x"] + label_info["width"], label_info["y"] + label_info["height"]),
                ),
                fill=label_info["color"],
            )

            # Center text in box
            text_x = label_info["x"] + (label_info["width"] - label_info["text_width"]) // 2
            text_y = label_info["y"] + (label_info["height"] - label_info["text_height"]) // 2

            # Draw text with black outline for better visibility
            outline_width = 1
            for dx in [-outline_width, outline_width]:
                for dy in [-outline_width, outline_width]:
                    draw.text(
                        (text_x + dx, text_y + dy), label_info["label"], fill="black", font=font
                    )

            # Draw the main white text
            draw.text((text_x, text_y), label_info["label"], fill=(255, 255, 255), font=font)

        logger.info("Finished drawing all boxes")
        return image

```