trycua/cua # codebase.md

This is page 4 of 16. Use http://codebase.md/trycua/cua?page={x} to view the full context.

# Directory Structure

```
├── .all-contributorsrc
├── .cursorignore
├── .devcontainer
│   ├── devcontainer.json
│   ├── post-install.sh
│   └── README.md
├── .dockerignore
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── scripts
│   │   ├── get_pyproject_version.py
│   │   └── tests
│   │       ├── __init__.py
│   │       ├── README.md
│   │       └── test_get_pyproject_version.py
│   └── workflows
│       ├── ci-lume.yml
│       ├── docker-publish-kasm.yml
│       ├── docker-publish-xfce.yml
│       ├── docker-reusable-publish.yml
│       ├── npm-publish-computer.yml
│       ├── npm-publish-core.yml
│       ├── publish-lume.yml
│       ├── pypi-publish-agent.yml
│       ├── pypi-publish-computer-server.yml
│       ├── pypi-publish-computer.yml
│       ├── pypi-publish-core.yml
│       ├── pypi-publish-mcp-server.yml
│       ├── pypi-publish-pylume.yml
│       ├── pypi-publish-som.yml
│       ├── pypi-reusable-publish.yml
│       └── test-validation-script.yml
├── .gitignore
├── .vscode
│   ├── docs.code-workspace
│   ├── launch.json
│   ├── libs-ts.code-workspace
│   ├── lume.code-workspace
│   ├── lumier.code-workspace
│   └── py.code-workspace
├── blog
│   ├── app-use.md
│   ├── assets
│   │   ├── composite-agents.png
│   │   ├── docker-ubuntu-support.png
│   │   ├── hack-booth.png
│   │   ├── hack-closing-ceremony.jpg
│   │   ├── hack-cua-ollama-hud.jpeg
│   │   ├── hack-leaderboard.png
│   │   ├── hack-the-north.png
│   │   ├── hack-winners.jpeg
│   │   ├── hack-workshop.jpeg
│   │   ├── hud-agent-evals.png
│   │   └── trajectory-viewer.jpeg
│   ├── bringing-computer-use-to-the-web.md
│   ├── build-your-own-operator-on-macos-1.md
│   ├── build-your-own-operator-on-macos-2.md
│   ├── composite-agents.md
│   ├── cua-hackathon.md
│   ├── hack-the-north.md
│   ├── hud-agent-evals.md
│   ├── human-in-the-loop.md
│   ├── introducing-cua-cloud-containers.md
│   ├── lume-to-containerization.md
│   ├── sandboxed-python-execution.md
│   ├── training-computer-use-models-trajectories-1.md
│   ├── trajectory-viewer.md
│   ├── ubuntu-docker-support.md
│   └── windows-sandbox.md
├── CONTRIBUTING.md
├── Development.md
├── Dockerfile
├── docs
│   ├── .gitignore
│   ├── .prettierrc
│   ├── content
│   │   └── docs
│   │       ├── agent-sdk
│   │       │   ├── agent-loops.mdx
│   │       │   ├── benchmarks
│   │       │   │   ├── index.mdx
│   │       │   │   ├── interactive.mdx
│   │       │   │   ├── introduction.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── osworld-verified.mdx
│   │       │   │   ├── screenspot-pro.mdx
│   │       │   │   └── screenspot-v2.mdx
│   │       │   ├── callbacks
│   │       │   │   ├── agent-lifecycle.mdx
│   │       │   │   ├── cost-saving.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── logging.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── pii-anonymization.mdx
│   │       │   │   └── trajectories.mdx
│   │       │   ├── chat-history.mdx
│   │       │   ├── custom-computer-handlers.mdx
│   │       │   ├── custom-tools.mdx
│   │       │   ├── customizing-computeragent.mdx
│   │       │   ├── integrations
│   │       │   │   ├── hud.mdx
│   │       │   │   └── meta.json
│   │       │   ├── message-format.mdx
│   │       │   ├── meta.json
│   │       │   ├── migration-guide.mdx
│   │       │   ├── prompt-caching.mdx
│   │       │   ├── supported-agents
│   │       │   │   ├── composed-agents.mdx
│   │       │   │   ├── computer-use-agents.mdx
│   │       │   │   ├── grounding-models.mdx
│   │       │   │   ├── human-in-the-loop.mdx
│   │       │   │   └── meta.json
│   │       │   ├── supported-model-providers
│   │       │   │   ├── index.mdx
│   │       │   │   └── local-models.mdx
│   │       │   └── usage-tracking.mdx
│   │       ├── computer-sdk
│   │       │   ├── cloud-vm-management.mdx
│   │       │   ├── commands.mdx
│   │       │   ├── computer-ui.mdx
│   │       │   ├── computers.mdx
│   │       │   ├── meta.json
│   │       │   └── sandboxed-python.mdx
│   │       ├── index.mdx
│   │       ├── libraries
│   │       │   ├── agent
│   │       │   │   └── index.mdx
│   │       │   ├── computer
│   │       │   │   └── index.mdx
│   │       │   ├── computer-server
│   │       │   │   ├── Commands.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── REST-API.mdx
│   │       │   │   └── WebSocket-API.mdx
│   │       │   ├── core
│   │       │   │   └── index.mdx
│   │       │   ├── lume
│   │       │   │   ├── cli-reference.mdx
│   │       │   │   ├── faq.md
│   │       │   │   ├── http-api.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── prebuilt-images.mdx
│   │       │   ├── lumier
│   │       │   │   ├── building-lumier.mdx
│   │       │   │   ├── docker-compose.mdx
│   │       │   │   ├── docker.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   └── meta.json
│   │       │   ├── mcp-server
│   │       │   │   ├── client-integrations.mdx
│   │       │   │   ├── configuration.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── llm-integrations.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── tools.mdx
│   │       │   │   └── usage.mdx
│   │       │   └── som
│   │       │       ├── configuration.mdx
│   │       │       └── index.mdx
│   │       ├── meta.json
│   │       ├── quickstart-cli.mdx
│   │       ├── quickstart-devs.mdx
│   │       └── telemetry.mdx
│   ├── next.config.mjs
│   ├── package-lock.json
│   ├── package.json
│   ├── pnpm-lock.yaml
│   ├── postcss.config.mjs
│   ├── public
│   │   └── img
│   │       ├── agent_gradio_ui.png
│   │       ├── agent.png
│   │       ├── cli.png
│   │       ├── computer.png
│   │       ├── som_box_threshold.png
│   │       └── som_iou_threshold.png
│   ├── README.md
│   ├── source.config.ts
│   ├── src
│   │   ├── app
│   │   │   ├── (home)
│   │   │   │   ├── [[...slug]]
│   │   │   │   │   └── page.tsx
│   │   │   │   └── layout.tsx
│   │   │   ├── api
│   │   │   │   └── search
│   │   │   │       └── route.ts
│   │   │   ├── favicon.ico
│   │   │   ├── global.css
│   │   │   ├── layout.config.tsx
│   │   │   ├── layout.tsx
│   │   │   ├── llms.mdx
│   │   │   │   └── [[...slug]]
│   │   │   │       └── route.ts
│   │   │   └── llms.txt
│   │   │       └── route.ts
│   │   ├── assets
│   │   │   ├── discord-black.svg
│   │   │   ├── discord-white.svg
│   │   │   ├── logo-black.svg
│   │   │   └── logo-white.svg
│   │   ├── components
│   │   │   ├── iou.tsx
│   │   │   └── mermaid.tsx
│   │   ├── lib
│   │   │   ├── llms.ts
│   │   │   └── source.ts
│   │   └── mdx-components.tsx
│   └── tsconfig.json
├── examples
│   ├── agent_examples.py
│   ├── agent_ui_examples.py
│   ├── cloud_api_examples.py
│   ├── computer_examples_windows.py
│   ├── computer_examples.py
│   ├── computer_ui_examples.py
│   ├── computer-example-ts
│   │   ├── .env.example
│   │   ├── .gitignore
│   │   ├── .prettierrc
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── README.md
│   │   ├── src
│   │   │   ├── helpers.ts
│   │   │   └── index.ts
│   │   └── tsconfig.json
│   ├── docker_examples.py
│   ├── evals
│   │   ├── hud_eval_examples.py
│   │   └── wikipedia_most_linked.txt
│   ├── pylume_examples.py
│   ├── sandboxed_functions_examples.py
│   ├── som_examples.py
│   ├── utils.py
│   └── winsandbox_example.py
├── img
│   ├── agent_gradio_ui.png
│   ├── agent.png
│   ├── cli.png
│   ├── computer.png
│   ├── logo_black.png
│   └── logo_white.png
├── libs
│   ├── kasm
│   │   ├── Dockerfile
│   │   ├── LICENSE
│   │   ├── README.md
│   │   └── src
│   │       └── ubuntu
│   │           └── install
│   │               └── firefox
│   │                   ├── custom_startup.sh
│   │                   ├── firefox.desktop
│   │                   └── install_firefox.sh
│   ├── lume
│   │   ├── .cursorignore
│   │   ├── CONTRIBUTING.md
│   │   ├── Development.md
│   │   ├── img
│   │   │   └── cli.png
│   │   ├── Package.resolved
│   │   ├── Package.swift
│   │   ├── README.md
│   │   ├── resources
│   │   │   └── lume.entitlements
│   │   ├── scripts
│   │   │   ├── build
│   │   │   │   ├── build-debug.sh
│   │   │   │   ├── build-release-notarized.sh
│   │   │   │   └── build-release.sh
│   │   │   └── install.sh
│   │   ├── src
│   │   │   ├── Commands
│   │   │   │   ├── Clone.swift
│   │   │   │   ├── Config.swift
│   │   │   │   ├── Create.swift
│   │   │   │   ├── Delete.swift
│   │   │   │   ├── Get.swift
│   │   │   │   ├── Images.swift
│   │   │   │   ├── IPSW.swift
│   │   │   │   ├── List.swift
│   │   │   │   ├── Logs.swift
│   │   │   │   ├── Options
│   │   │   │   │   └── FormatOption.swift
│   │   │   │   ├── Prune.swift
│   │   │   │   ├── Pull.swift
│   │   │   │   ├── Push.swift
│   │   │   │   ├── Run.swift
│   │   │   │   ├── Serve.swift
│   │   │   │   ├── Set.swift
│   │   │   │   └── Stop.swift
│   │   │   ├── ContainerRegistry
│   │   │   │   ├── ImageContainerRegistry.swift
│   │   │   │   ├── ImageList.swift
│   │   │   │   └── ImagesPrinter.swift
│   │   │   ├── Errors
│   │   │   │   └── Errors.swift
│   │   │   ├── FileSystem
│   │   │   │   ├── Home.swift
│   │   │   │   ├── Settings.swift
│   │   │   │   ├── VMConfig.swift
│   │   │   │   ├── VMDirectory.swift
│   │   │   │   └── VMLocation.swift
│   │   │   ├── LumeController.swift
│   │   │   ├── Main.swift
│   │   │   ├── Server
│   │   │   │   ├── Handlers.swift
│   │   │   │   ├── HTTP.swift
│   │   │   │   ├── Requests.swift
│   │   │   │   ├── Responses.swift
│   │   │   │   └── Server.swift
│   │   │   ├── Utils
│   │   │   │   ├── CommandRegistry.swift
│   │   │   │   ├── CommandUtils.swift
│   │   │   │   ├── Logger.swift
│   │   │   │   ├── NetworkUtils.swift
│   │   │   │   ├── Path.swift
│   │   │   │   ├── ProcessRunner.swift
│   │   │   │   ├── ProgressLogger.swift
│   │   │   │   ├── String.swift
│   │   │   │   └── Utils.swift
│   │   │   ├── Virtualization
│   │   │   │   ├── DarwinImageLoader.swift
│   │   │   │   ├── DHCPLeaseParser.swift
│   │   │   │   ├── ImageLoaderFactory.swift
│   │   │   │   └── VMVirtualizationService.swift
│   │   │   ├── VM
│   │   │   │   ├── DarwinVM.swift
│   │   │   │   ├── LinuxVM.swift
│   │   │   │   ├── VM.swift
│   │   │   │   ├── VMDetails.swift
│   │   │   │   ├── VMDetailsPrinter.swift
│   │   │   │   ├── VMDisplayResolution.swift
│   │   │   │   └── VMFactory.swift
│   │   │   └── VNC
│   │   │       ├── PassphraseGenerator.swift
│   │   │       └── VNCService.swift
│   │   └── tests
│   │       ├── Mocks
│   │       │   ├── MockVM.swift
│   │       │   ├── MockVMVirtualizationService.swift
│   │       │   └── MockVNCService.swift
│   │       ├── VM
│   │       │   └── VMDetailsPrinterTests.swift
│   │       ├── VMTests.swift
│   │       ├── VMVirtualizationServiceTests.swift
│   │       └── VNCServiceTests.swift
│   ├── lumier
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   └── src
│   │       ├── bin
│   │       │   └── entry.sh
│   │       ├── config
│   │       │   └── constants.sh
│   │       ├── hooks
│   │       │   └── on-logon.sh
│   │       └── lib
│   │           ├── utils.sh
│   │           └── vm.sh
│   ├── python
│   │   ├── agent
│   │   │   ├── agent
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── adapters
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── huggingfacelocal_adapter.py
│   │   │   │   │   ├── human_adapter.py
│   │   │   │   │   ├── mlxvlm_adapter.py
│   │   │   │   │   └── models
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── generic.py
│   │   │   │   │       ├── internvl.py
│   │   │   │   │       ├── opencua.py
│   │   │   │   │       └── qwen2_5_vl.py
│   │   │   │   ├── agent.py
│   │   │   │   ├── callbacks
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── budget_manager.py
│   │   │   │   │   ├── image_retention.py
│   │   │   │   │   ├── logging.py
│   │   │   │   │   ├── operator_validator.py
│   │   │   │   │   ├── pii_anonymization.py
│   │   │   │   │   ├── prompt_instructions.py
│   │   │   │   │   ├── telemetry.py
│   │   │   │   │   └── trajectory_saver.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── computers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cua.py
│   │   │   │   │   └── custom.py
│   │   │   │   ├── decorators.py
│   │   │   │   ├── human_tool
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── server.py
│   │   │   │   │   └── ui.py
│   │   │   │   ├── integrations
│   │   │   │   │   └── hud
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── agent.py
│   │   │   │   │       └── proxy.py
│   │   │   │   ├── loops
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── anthropic.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── composed_grounded.py
│   │   │   │   │   ├── glm45v.py
│   │   │   │   │   ├── gta1.py
│   │   │   │   │   ├── holo.py
│   │   │   │   │   ├── internvl.py
│   │   │   │   │   ├── model_types.csv
│   │   │   │   │   ├── moondream3.py
│   │   │   │   │   ├── omniparser.py
│   │   │   │   │   ├── openai.py
│   │   │   │   │   ├── opencua.py
│   │   │   │   │   └── uitars.py
│   │   │   │   ├── proxy
│   │   │   │   │   ├── examples.py
│   │   │   │   │   └── handlers.py
│   │   │   │   ├── responses.py
│   │   │   │   ├── types.py
│   │   │   │   └── ui
│   │   │   │       ├── __init__.py
│   │   │   │       ├── __main__.py
│   │   │   │       └── gradio
│   │   │   │           ├── __init__.py
│   │   │   │           ├── app.py
│   │   │   │           └── ui_components.py
│   │   │   ├── benchmarks
│   │   │   │   ├── .gitignore
│   │   │   │   ├── contrib.md
│   │   │   │   ├── interactive.py
│   │   │   │   ├── models
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   └── gta1.py
│   │   │   │   ├── README.md
│   │   │   │   ├── ss-pro.py
│   │   │   │   ├── ss-v2.py
│   │   │   │   └── utils.py
│   │   │   ├── example.py
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── computer
│   │   │   ├── computer
│   │   │   │   ├── __init__.py
│   │   │   │   ├── computer.py
│   │   │   │   ├── diorama_computer.py
│   │   │   │   ├── helpers.py
│   │   │   │   ├── interface
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── logger.py
│   │   │   │   ├── models.py
│   │   │   │   ├── providers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cloud
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── docker
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── lume
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── lume_api.py
│   │   │   │   │   ├── lumier
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── types.py
│   │   │   │   │   └── winsandbox
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── provider.py
│   │   │   │   │       └── setup_script.ps1
│   │   │   │   ├── ui
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   └── gradio
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── app.py
│   │   │   │   └── utils.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── computer-server
│   │   │   ├── computer_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── diorama
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── diorama_computer.py
│   │   │   │   │   ├── diorama.py
│   │   │   │   │   ├── draw.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── safezone.py
│   │   │   │   ├── handlers
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── main.py
│   │   │   │   ├── server.py
│   │   │   │   └── watchdog.py
│   │   │   ├── examples
│   │   │   │   ├── __init__.py
│   │   │   │   └── usage_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   ├── run_server.py
│   │   │   └── test_connection.py
│   │   ├── core
│   │   │   ├── core
│   │   │   │   ├── __init__.py
│   │   │   │   └── telemetry
│   │   │   │       ├── __init__.py
│   │   │   │       └── posthog.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── mcp-server
│   │   │   ├── mcp_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   └── server.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── scripts
│   │   │       ├── install_mcp_server.sh
│   │   │       └── start_mcp_server.sh
│   │   ├── pylume
│   │   │   ├── __init__.py
│   │   │   ├── pylume
│   │   │   │   ├── __init__.py
│   │   │   │   ├── client.py
│   │   │   │   ├── exceptions.py
│   │   │   │   ├── lume
│   │   │   │   ├── models.py
│   │   │   │   ├── pylume.py
│   │   │   │   └── server.py
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   └── som
│   │       ├── LICENSE
│   │       ├── poetry.toml
│   │       ├── pyproject.toml
│   │       ├── README.md
│   │       ├── som
│   │       │   ├── __init__.py
│   │       │   ├── detect.py
│   │       │   ├── detection.py
│   │       │   ├── models.py
│   │       │   ├── ocr.py
│   │       │   ├── util
│   │       │   │   └── utils.py
│   │       │   └── visualization.py
│   │       └── tests
│   │           └── test_omniparser.py
│   ├── typescript
│   │   ├── .gitignore
│   │   ├── .nvmrc
│   │   ├── agent
│   │   │   ├── examples
│   │   │   │   ├── playground-example.html
│   │   │   │   └── README.md
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── client.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   └── client.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── biome.json
│   │   ├── computer
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── computer
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── providers
│   │   │   │   │   │   ├── base.ts
│   │   │   │   │   │   ├── cloud.ts
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── base.ts
│   │   │   │   │   ├── factory.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── linux.ts
│   │   │   │   │   ├── macos.ts
│   │   │   │   │   └── windows.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   ├── computer
│   │   │   │   │   └── cloud.test.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── factory.test.ts
│   │   │   │   │   ├── index.test.ts
│   │   │   │   │   ├── linux.test.ts
│   │   │   │   │   ├── macos.test.ts
│   │   │   │   │   └── windows.test.ts
│   │   │   │   └── setup.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── core
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── index.ts
│   │   │   │   └── telemetry
│   │   │   │       ├── clients
│   │   │   │       │   ├── index.ts
│   │   │   │       │   └── posthog.ts
│   │   │   │       └── index.ts
│   │   │   ├── tests
│   │   │   │   └── telemetry.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── pnpm-workspace.yaml
│   │   └── README.md
│   └── xfce
│       ├── .dockerignore
│       ├── .gitignore
│       ├── Dockerfile
│       ├── README.md
│       └── src
│           ├── scripts
│           │   ├── resize-display.sh
│           │   ├── start-computer-server.sh
│           │   ├── start-novnc.sh
│           │   ├── start-vnc.sh
│           │   └── xstartup.sh
│           ├── supervisor
│           │   └── supervisord.conf
│           └── xfce-config
│               ├── helpers.rc
│               ├── xfce4-power-manager.xml
│               └── xfce4-session.xml
├── LICENSE.md
├── notebooks
│   ├── agent_nb.ipynb
│   ├── blog
│   │   ├── build-your-own-operator-on-macos-1.ipynb
│   │   └── build-your-own-operator-on-macos-2.ipynb
│   ├── composite_agents_docker_nb.ipynb
│   ├── computer_nb.ipynb
│   ├── computer_server_nb.ipynb
│   ├── customizing_computeragent.ipynb
│   ├── eval_osworld.ipynb
│   ├── ollama_nb.ipynb
│   ├── pylume_nb.ipynb
│   ├── README.md
│   ├── sota_hackathon_cloud.ipynb
│   └── sota_hackathon.ipynb
├── pdm.lock
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── samples
│   └── community
│       ├── global-online
│       │   └── README.md
│       └── hack-the-north
│           └── README.md
├── scripts
│   ├── build-uv.sh
│   ├── build.ps1
│   ├── build.sh
│   ├── cleanup.sh
│   ├── playground-docker.sh
│   ├── playground.sh
│   └── run-docker-dev.sh
└── tests
    ├── pytest.ini
    ├── shell_cmd.py
    ├── test_files.py
    ├── test_shell_bash.py
    ├── test_telemetry.py
    ├── test_venv.py
    └── test_watchdog.py
```

# Files

--------------------------------------------------------------------------------
/docs/content/docs/libraries/lumier/docker.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Docker
---

You can use Lumier through Docker:

### Run a macOS VM (ephemeral)
```bash
# Run the container with temporary storage (using pre-built image from Docker Hub)
docker run -it --rm \
    --name macos-vm \
    -p 8006:8006 \
    -e VM_NAME=macos-vm \
    -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \
    -e CPU_CORES=4 \
    -e RAM_SIZE=8192 \
    trycua/lumier:latest
```
Access the VM in your browser at [http://localhost:8006](http://localhost:8006).

After running the command above, you can access your macOS VM through a web browser (e.g., http://localhost:8006).

<Callout title="Note">
With the basic setup above, your VM will be reset when you stop the container (ephemeral mode). This means any changes you make inside the macOS VM will be lost. See the section below for how to save your VM state.
</Callout>

## Saving Your VM State

To save your VM state between sessions (so your changes persist when you stop and restart the container), you'll need to set up a storage location:

```bash
# First, create a storage directory if it doesn't exist
mkdir -p storage

# Then run the container with persistent storage
docker run -it --rm \
    --name lumier-vm \
    -p 8006:8006 \
    -v $(pwd)/storage:/storage \
    -e VM_NAME=lumier-vm \
    -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \
    -e CPU_CORES=4 \
    -e RAM_SIZE=8192 \
    -e HOST_STORAGE_PATH=$(pwd)/storage \
    trycua/lumier:latest
```

This command creates a connection between a folder on your Mac (`$(pwd)/storage`) and a folder inside the Docker container (`/storage`). The `-v` flag (volume mount) and the `HOST_STORAGE_PATH` variable work together to ensure your VM data is saved on your host Mac.

## Sharing Files with Your VM

To share files between your Mac and the virtual machine, you can set up a shared folder:

```bash
# Create both storage and shared folders
mkdir -p storage shared

# Run with both persistent storage and a shared folder
docker run -it --rm \
    --name lumier-vm \
    -p 8006:8006 \
    -v $(pwd)/storage:/storage \
    -v $(pwd)/shared:/shared \
    -e VM_NAME=lumier-vm \
    -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \
    -e CPU_CORES=4 \
    -e RAM_SIZE=8192 \
    -e HOST_STORAGE_PATH=$(pwd)/storage \
    -e HOST_SHARED_PATH=$(pwd)/shared \
    trycua/lumier:latest
```

With this setup, any files you place in the `shared` folder on your Mac will be accessible from within the macOS VM, and vice versa.

## Automating VM Startup with on-logon.sh

You can automatically run scripts when the VM starts up by placing an `on-logon.sh` script in the shared folder's lifecycle directory. This is useful for setting up your VM environment each time it starts.

```bash
# Create the lifecycle directory in your shared folder
mkdir -p shared/lifecycle

# Create a sample on-logon.sh script
cat > shared/lifecycle/on-logon.sh << 'EOF'
#!/usr/bin/env bash

# Create a file on the desktop
echo "Hello from Lumier!" > /Users/lume/Desktop/hello_lume.txt

# You can add more commands to execute at VM startup
# For example:
# - Configure environment variables
# - Start applications
# - Mount network drives
# - Set up development environments
EOF

# Make the script executable
chmod +x shared/lifecycle/on-logon.sh
```

The script will be automatically executed when the VM starts up. It runs in the VM context and has access to:

- The `/Users/lume` user directory (home directory in the VM)
- The shared folder at `/Volumes/My Shared Files` inside the VM
- Any resources available to the VM

This feature enables automation of VM setup without modifying the base VM image.

## Configuration Options

When running Lumier, you'll need to configure a few things:

- **Port forwarding** (`-p 8006:8006`): Makes the VM's VNC interface accessible in your browser. If port 8006 is already in use, you can use a different port like `-p 8007:8006`.

- **Environment variables** (`-e`): Configure your VM settings:
  - `VM_NAME`: A name for your virtual machine
  - `VERSION`: The macOS image to use
  - `CPU_CORES`: Number of CPU cores to allocate
  - `RAM_SIZE`: Memory in MB to allocate
  - `HOST_STORAGE_PATH`: Path to save VM state (when using persistent storage)
  - `HOST_SHARED_PATH`: Path to the shared folder (optional)

- **Background service**: The `lume serve` service should be running on your host (starts automatically when you install Lume using the `install.sh` script above).
```

--------------------------------------------------------------------------------
/libs/typescript/agent/src/types.ts:
--------------------------------------------------------------------------------

```typescript
// #region Request
export type ConnectionType = 'http' | 'https' | 'peer';
export interface AgentClientOptions {
  timeout?: number;
  retries?: number;
  /** Optional CUA API key to send as X-API-Key header for HTTP requests */
  apiKey?: string;
}
// Request types matching the Python proxy API
export interface AgentRequest {
  model: string;
  input: string | AgentMessage[];
  agent_kwargs?: {
    save_trajectory?: boolean;
    verbosity?: number;
    [key: string]: any;
  };
  computer_kwargs?: {
    os_type?: string;
    provider_type?: string;
    [key: string]: any;
  };
  /**
   * Optional per-request environment variable overrides.
   * Keys and values are strings and will be forwarded to the backend proxy.
   */
  env?: Record<string, string>;
}
// #endregion


// #region Response
// Response types
export interface AgentResponse {
  output: AgentMessage[];
  usage: Usage;
  status: 'completed' | 'failed';
  error?: string;
}
// Usage information
export interface Usage {
  prompt_tokens: number;
  completion_tokens: number;
  total_tokens: number;
  response_cost: number;
}
// #endregion



// #region Messages
// Agent message types - can be one of several different message types
export type AgentMessage = 
  | UserMessage
  | AssistantMessage
  | ReasoningMessage
  | ComputerCallMessage
  | ComputerCallOutputMessage
  | FunctionCallMessage
  | FunctionCallOutputMessage;
// Input message
export interface UserMessage {
  type?: 'message';
  role: 'user' | 'system' | 'developer';
  content: string | InputContent[];
}
// Output message
export interface AssistantMessage {
  type: 'message';
  role: 'assistant';
  content: OutputContent[];
}
// Output reasoning/thinking message
export interface ReasoningMessage {
  type: 'reasoning';
  summary: SummaryContent[];
}
// Output computer action call
export interface ComputerCallMessage {
  type: 'computer_call';
  call_id: string;
  status: 'completed' | 'failed' | 'pending';
  action: ComputerAction;
}
// Output computer action result (always a screenshot)
export interface ComputerCallOutputMessage {
  type: 'computer_call_output';
  call_id: string;
  output: ComputerResultContent;
}
// Output function call
export interface FunctionCallMessage {
  type: 'function_call';
  call_id: string;
  status: 'completed' | 'failed' | 'pending';
  name: string;
  arguments: string; // JSON dict of kwargs
}
// Output function call result (always text)
export interface FunctionCallOutputMessage {
  type: 'function_call_output';
  call_id: string;
  output: string;
}
// #endregion



// #region Message Content
export interface InputContent {
  type: 'input_image' | 'input_text';
  text?: string;
  image_url?: string;
}
export interface OutputContent {
  type: 'output_text';
  text: string;
}
export interface SummaryContent {
  type: 'summary_text';
  text: string;
}
export interface ComputerResultContent {
  type: 'computer_screenshot' | 'input_image';
  image_url: string;
}
// #endregion



// #region Actions
export type ComputerAction = 
  | ComputerActionOpenAI
  | ComputerActionAnthropic;
// OpenAI Computer Actions
export type ComputerActionOpenAI = 
  | ClickAction
  | DoubleClickAction
  | DragAction
  | KeyPressAction
  | MoveAction
  | ScreenshotAction
  | ScrollAction
  | TypeAction
  | WaitAction;
export interface ClickAction {
  type: 'click';
  button: 'left' | 'right' | 'wheel' | 'back' | 'forward';
  x: number;
  y: number;
}
export interface DoubleClickAction {
  type: 'double_click';
  button?: 'left' | 'right' | 'wheel' | 'back' | 'forward';
  x: number;
  y: number;
}
export interface DragAction {
  type: 'drag';
  button?: 'left' | 'right' | 'wheel' | 'back' | 'forward';
  path: Array<[number, number]>;
}
export interface KeyPressAction {
  type: 'keypress';
  keys: string[];
}
export interface MoveAction {
  type: 'move';
  x: number;
  y: number;
}
export interface ScreenshotAction {
  type: 'screenshot';
}
export interface ScrollAction {
  type: 'scroll';
  scroll_x: number;
  scroll_y: number;
  x: number;
  y: number;
}
export interface TypeAction {
  type: 'type';
  text: string;
}
export interface WaitAction {
  type: 'wait';
}
// Anthropic Computer Actions
export type ComputerActionAnthropic = 
  | LeftMouseDownAction
  | LeftMouseUpAction;
export interface LeftMouseDownAction {
  type: 'left_mouse_down';
  x: number;
  y: number;
}
export interface LeftMouseUpAction {
  type: 'left_mouse_up';
  x: number;
  y: number;
}
// #endregion
```

--------------------------------------------------------------------------------
/libs/python/agent/example.py:
--------------------------------------------------------------------------------

```python
"""
Example usage of the agent library with docstring-based tool definitions.
"""

import asyncio
import logging

from agent import ComputerAgent
from computer import Computer
from computer.helpers import sandboxed

@sandboxed()
def read_file(location: str) -> str:
    """Read contents of a file
    
    Parameters
    ----------
    location : str
        Path to the file to read
        
    Returns
    -------
    str
        Contents of the file or error message
    """
    try:
        with open(location, 'r') as f:
            return f.read()
    except Exception as e:
        return f"Error reading file: {str(e)}"

def save_note(content: str, filename: str = "note.txt") -> str:
    """Save content to a note file
    
    Parameters
    ----------
    content : str
        Content to save to the file
    filename : str, optional
        Name of the file to save to (default is "note.txt")
        
    Returns
    -------
    str
        Success or error message
    """
    try:
        with open(filename, 'w') as f:
            f.write(content)
        return f"Saved note to {filename}"
    except Exception as e:
        return f"Error saving note: {str(e)}"

def calculate(a: int, b: int) -> int:
    """Calculate the sum of two integers
    
    Parameters
    ----------
    a : int
        First integer
    b : int
        Second integer
        
    Returns
    -------
    int
        Sum of the two integers
    """
    return a + b

async def main():
    """Example usage of ComputerAgent with different models"""
    
    # Example 1: Using Claude with computer and custom tools
    print("=== Example 1: Claude with Computer ===")
    
    import os
    import dotenv
    import json
    dotenv.load_dotenv()

    assert os.getenv("CUA_CONTAINER_NAME") is not None, "CUA_CONTAINER_NAME is not set"
    assert os.getenv("CUA_API_KEY") is not None, "CUA_API_KEY is not set"

    async with Computer(
        os_type="linux",
        provider_type="cloud",
        name=os.getenv("CUA_CONTAINER_NAME") or "",
        api_key=os.getenv("CUA_API_KEY") or ""
    ) as computer:
        agent = ComputerAgent(
            # Supported models:
            
            # == OpenAI CUA (computer-use-preview) ==
            model="openai/computer-use-preview",

            # == Anthropic CUA (Claude > 3.5) ==
            # model="anthropic/claude-opus-4-20250514", 
            # model="anthropic/claude-sonnet-4-20250514",
            # model="anthropic/claude-3-7-sonnet-20250219",
            # model="anthropic/claude-3-5-sonnet-20241022",

            # == UI-TARS ==
            # model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B",
            # TODO: add local mlx provider
            # model="mlx-community/UI-TARS-1.5-7B-6bit",
            # model="ollama_chat/0000/ui-tars-1.5-7b",

            # == Omniparser + Any LLM ==
            # model="omniparser+..."
            # model="omniparser+anthropic/claude-opus-4-20250514",

            tools=[computer],
            only_n_most_recent_images=3,
            verbosity=logging.INFO,
            trajectory_dir="trajectories",
            use_prompt_caching=True,
            max_trajectory_budget={ "max_budget": 1.0, "raise_error": True, "reset_after_each_run": False },
        )
        
        history = []
        while True:
            user_input = input("> ")
            history.append({"role": "user", "content": user_input})

            # Non-streaming usage
            async for result in agent.run(history, stream=False):
                history += result["output"]

                # # Print output
                # for item in result["output"]:
                #     if item["type"] == "message":
                #         print(item["content"][0]["text"])
                #     elif item["type"] == "computer_call":
                #         action = item["action"]
                #         action_type = action["type"]
                #         action_args = {k: v for k, v in action.items() if k != "type"}
                #         print(f"{action_type}({action_args})")
                #     elif item["type"] == "function_call":
                #         action = item["name"]
                #         action_args = item["arguments"]
                #         print(f"{action}({action_args})")
                #     elif item["type"] == "function_call_output":
                #         print("===>", item["output"])

if __name__ == "__main__":
    asyncio.run(main())
```

--------------------------------------------------------------------------------
/blog/trajectory-viewer.md:
--------------------------------------------------------------------------------

```markdown
# Trajectory Viewer for Cua

*Published on May 13, 2025 by Dillon DuPont*

Don’t forget to check out [Part 1: Building your own Computer-Use Operator](build-your-own-operator-on-macos-1) and [Part 2: Using the Agent framework](build-your-own-operator-on-macos-2) for setting up your Cua environment and basic tips and tricks!

## Introduction

Okay, so you’ve gotten your environment up and also tested a few agent runs. You’ll likely have encountered cases where your agent was successful at doing some tasks but also places where it got stuck or outright failed.
Now what?
If you’ve ever wondered exactly what your computer agent is doing and why it sometimes doesn’t do what you expected, then the Trajectory Viewer for Cua is here to help! Whether you’re a seasoned developer or someone who just wants to dive in and see results, this tool makes it easy to explore every step your agent takes on your screen.
Plus, if you want to start thinking about generating data to train your own agentic model (we’ll cover training in an upcoming blog, so look forward to it), then our Trajectory Viewer might be for you.

## So, what’s a “trajectory”?

Think of a trajectory as a detailed video recording of your agent’s journey:

- **Observations**: What did the agent see (the exact screen content) at each point in time?
- **Actions**: What clicks, keystrokes, or commands did it perform in response?
- **Decisions**: Which options did it choose, and why?
Especially for longer and more complex tasks, your agent will make multiple steps, take multiple actions, and make multiple observations. By examining this record, you can pinpoint where things go right, and more importantly, where they go wrong.

## So, what’s Cua’s Trajectory Viewer and why use it?

The Trajectory Player for Cua is a GUI tool that helps you explore saved trajectories generated from your Cua computer agent runs. This tool provides a powerful way to:

- **Debug your agents**: See exactly what your agent saw to reproduce bugs
- **Analyze failure cases**: Identify the moment when your agent went off-script
- **Collect training data**: Export your trajectories for your own processing, training, and more!

The viewer allows you to see exactly what your agent observed and how it interacted with the computer all through your browser.

## Opening Trajectory Viewer in 3 Simple Steps

1. **Visit**: Open your browser and go to [https://www.trycua.com/trajectory-viewer](https://www.trycua.com/trajectory-viewer).
2. **Upload**: Drag and drop a trajectories folder or click Select Folder.
3. **Explore**: View your agent’s trajectories! All data stays in your browser unless you give permission otherwise.

![Trajectory Viewer Screenshot](./assets/trajectory-viewer.jpeg)

## Recording a Trajectory

### Using the Gradio UI

The simplest way to create agent trajectories is through the [Cua Agent Gradio UI](https://www.trycua.com/docs/quickstart-ui) by checking the "Save Trajectory" option.

### Using the ComputerAgent API

Trajectories are saved by default when using the ComputerAgent API:

```python
agent.run("book a flight for me")
```

You can explicitly control trajectory saving with the `save_trajectory` parameter:

```python
from cua import ComputerAgent

agent = ComputerAgent(save_trajectory=True)
agent.run("search for hotels in Boston")
```

Each trajectory folder is saved in a `trajectories` directory with a timestamp format, for example: `trajectories/20250501_222749`

## Exploring and Analyzing Trajectories

Our Trajectory Viewer is designed to allow for thorough analysis and debugging in a friendly way. Once loaded, the viewer presents:

- **Timeline Slider**: Jump to any step in the session
- **Screen Preview**: See exactly what the agent saw
- **Action Details**: Review clicks, keypresses, and API calls
- **Logs & Metadata**: Inspect debug logs or performance stats

Use these features to:

- Step through each action and observation; understand your agent’s decision-making
- Understand why and where your agent failed
- Collect insights for improving your instructions, prompts, tasks, agent, etc.

The trajectory viewer provides a visual interface for stepping through each action your agent took, making it easy to see what your agent “sees”.

## Getting Started

Ready to see your agent in action? Head over to the Trajectory Viewer and load up your first session. Debug smarter, train faster, and stay in control (all within your browser).

Happy tinkering and Cua on!

Have questions or want to share feedback? Join our community on Discord or open an issue on GitHub.

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Composed Agents
description: Combine grounding models with any LLM for computer-use capabilities
---

Composed agents combine the best of both worlds: specialized grounding models for precise click prediction and powerful LLMs for task planning and reasoning.

Use the format `"grounding_model+planning_model"` to create a composed agent with any vision-enabled LiteLLM-compatible model.

## How Composed Agents Work

1. **Planning Phase**: The planning model (LLM) analyzes the task and decides what actions to take (e.g., `click("find the login button")`, `type("username")`)
2. **Grounding Phase**: The grounding model converts element descriptions to precise coordinates
3. **Execution**: Actions are performed using the predicted coordinates

## Supported Grounding Models

Any model that supports `predict_click()` can be used as the grounding component. See the full list on [Grounding Models](./grounding-models).

- OpenCUA: `huggingface-local/xlangai/OpenCUA-{7B,32B}`
- GTA1 family: `huggingface-local/HelloKKMe/GTA1-{7B,32B,72B}`
- Holo 1.5 family: `huggingface-local/Hcompany/Holo1.5-{3B,7B,72B}`
- InternVL 3.5 family: `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}`
- UI‑TARS 1.5: `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` (also supports full CU)
- OmniParser (OCR): `omniparser` (requires combination with a LiteLLM vision model)
- Moondream3: `moondream3` (requires combination with a LiteLLM vision/text model)

## Supported Planning Models

Any vision-enabled LiteLLM-compatible model can be used as the planning component:

- Any All‑in‑one CUA (planning-capable). See [All‑in‑one CUAs](./computer-use-agents).
- Any VLM via LiteLLM providers: `anthropic/*`, `openai/*`, `openrouter/*`, `gemini/*`, `vertex_ai/*`, `huggingface-local/*`, `mlx/*`, etc.
- Examples:
  - **Anthropic**: `anthropic/claude-3-5-sonnet-20241022`, `anthropic/claude-opus-4-1-20250805`
  - **OpenAI**: `openai/gpt-5`, `openai/gpt-o3`, `openai/gpt-4o`
  - **Google**: `gemini/gemini-1.5-pro`, `vertex_ai/gemini-pro-vision`
  - **Local models**: Any Hugging Face vision-language model

## Usage Examples

### GTA1 + GPT-5

Use Google's Gemini for planning with specialized grounding:

```python
agent = ComputerAgent(
    "huggingface-local/HelloKKMe/GTA1-7B+openai/gpt-5",
    tools=[computer]
)

async for _ in agent.run("Take a screenshot, analyze the UI, and click on the most prominent button"):
    pass
```

### GTA1 + Claude 3.5 Sonnet

Combine state-of-the-art grounding with powerful reasoning:

```python
agent = ComputerAgent(
    "huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-3-5-sonnet-20241022", 
    tools=[computer]
)

async for _ in agent.run("Open Firefox, navigate to github.com, and search for 'computer-use'"):
    pass
# Success! 🎉
# - Claude 3.5 Sonnet plans the sequence of actions
# - GTA1-7B provides precise click coordinates for each UI element
```

### UI-TARS + GPT-4o

Combine two different vision models for enhanced capabilities:

```python
agent = ComputerAgent(
    "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B+openai/gpt-4o",
    tools=[computer]
)

async for _ in agent.run("Help me fill out this form with my personal information"):
    pass
```

### Moondream3 + GPT-4o

Use the built-in Moondream3 grounding with any planning model. Moondream3 will detect UI elements on the latest screenshot, label them, and provide a user message listing detected element names.

```python
from agent import ComputerAgent
from computer import computer

agent = ComputerAgent(
    "moondream3+openai/gpt-4o",
    tools=[computer]
)

async for _ in agent.run("Close the settings window, then open the Downloads folder"):
    pass
```

## Benefits of Composed Agents

- **Specialized Grounding**: Use models optimized for click prediction accuracy
- **Flexible Planning**: Choose any LLM for task reasoning and planning
- **Cost Optimization**: Use smaller grounding models with larger planning models only when needed
- **Performance**: Leverage the strengths of different model architectures

## Capabilities

Composed agents support both capabilities:

```python
agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-3-5-sonnet-20241022")

# Full computer-use agent capabilities
async for _ in agent.run("Complete this online form"):
    pass

# Direct click prediction (uses grounding model only)
coords = agent.predict_click("find the submit button")
```

---

For more information on individual model capabilities, see [Computer-Use Agents](./computer-use-agents) and [Grounding Models](./grounding-models).

```

--------------------------------------------------------------------------------
/blog/composite-agents.md:
--------------------------------------------------------------------------------

```markdown
# Announcing Cua Agent framework 0.4 and Composite Agents

*Published on August 26, 2025 by Dillon DuPont*

<img src="./assets/composite-agents.png" alt="Composite Agents">

So you want to build an agent that can use a computer. Great! You've probably discovered that there are now dozens of different AI models that claim they can click GUI buttons and fill out forms. Less great: actually getting them to work together is like trying to coordinate a group project where everyone speaks a different language and has invented seventeen different ways to say "click here".

Here's the thing about new GUI models: they're all special snowflakes. One model wants you to feed it images and expects coordinates back as percentages from 0 to 1. Another wants absolute pixel coordinates. A third model has invented its own numeral system with `<|loc095|><|loc821|>` tokens inside tool calls. Some models output Python code that calls `pyautogui.click(x, y)`. Others will start hallucinating coordinates if you forget to format all previous messages within a very specific GUI system prompt.

This is the kind of problem that makes you wonder if we're building the future of computing or just recreating the Tower of Babel with more GPUs.

## What we fixed

Agent framework 0.4 solves this by doing something radical: making all these different models speak the same language. 

Instead of writing separate code for each model's peculiarities, you now just pick a model with a string like `"anthropic/claude-3-5-sonnet-20241022"` or `"huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"`, and everything else Just Works™. Behind the scenes, we handle all the coordinate normalization, token parsing, and image preprocessing so you don't have to.

```python
# This works the same whether you're using Anthropic, OpenAI, or that new model you found on Hugging Face
agent = ComputerAgent(
    model="anthropic/claude-3-5-sonnet-20241022",  # or any other supported model
    tools=[computer]
)
```

The output format is consistent across all providers (OpenAI, Anthropic, Vertex, Hugging Face, OpenRouter, etc.). No more writing different parsers for each model's creative interpretation of how to represent a mouse click.

## Composite Agents: Two Brains Are Better Than One

Here's where it gets interesting. We realized that you don't actually need one model to be good at everything. Some models are excellent at understanding what's on the screen—they can reliably identify buttons and text fields and figure out where to click. Other models are great at planning and reasoning but might be a bit fuzzy on the exact pixel coordinates.

So we let you combine them with a `+` sign:

```python
agent = ComputerAgent(
    # specify the grounding model first, then the planning model
    model="huggingface-local/HelloKKMe/GTA1-7B+huggingface-local/OpenGVLab/InternVL3_5-8B",
    tools=[computer]
)
```

This creates a composite agent where one model (the "grounding" model) handles the visual understanding and precise UI interactions, while the other (the "planning" model) handles the high-level reasoning and task orchestration. It's like having a pilot and a navigator, except they're both AI models and they're trying to help you star a GitHub repository.

  You can even take a model that was never designed for computer use—like GPT-4o—and give it GUI capabilities by pairing it with a specialized vision model:

```python
agent = ComputerAgent(
    model="huggingface-local/HelloKKMe/GTA1-7B+openai/gpt-4o",
    tools=[computer]
)
```

## Example notebook

For a full, ready-to-run demo (install deps, local computer using Docker, and a composed agent example), see the notebook:

- https://github.com/trycua/cua/blob/models/opencua/notebooks/composite_agents_docker_nb.ipynb

## What's next

We're building integration with HUD evals, allowing us to curate and benchmark model combinations. This will help us identify which composite agent pairs work best for different types of tasks, and provide you with tested recommendations rather than just throwing model names at the wall to see what sticks.

If you try out version 0.4.x, we'd love to hear how it goes. Join us on Discord to share your results and let us know what model combinations work best for your projects.


---

## Links

* **Composite Agent Docs:** [https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents](https://docs.trycua.com/docs/agent-sdk/supported-agents/composed-agents)
* **Discord:** [https://discord.gg/cua-ai](https://discord.gg/cua-ai)

Questions or weird edge cases? Ping us on Discord—we’re curious to see what you build.
```

--------------------------------------------------------------------------------
/docs/content/docs/computer-sdk/computers.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Cua Computers
description: Understanding cua computer types and connection methods
---

<Callout>A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/computer_nb.ipynb" target="_blank">Jupyter Notebook</a> and <a href="https://github.com/trycua/cua/tree/main/examples/computer-example-ts" target="_blank">NodeJS project</a> are available for this documentation.</Callout>

Before we can automate apps using AI, we need to first connect to a Computer Server to give the AI a safe environment to execute workflows in.

Cua Computers are preconfigured virtual machines running the Computer Server. They can be either macOS, Linux, or Windows. They're found in either a cloud-native container, or on your host desktop.

## Cloud Sandbox

**Easiest & safest way to get started - works on any host OS**

This is a Cloud Sandbox running the Computer Server. Get a container at [trycua.com](https://www.trycua.com/).

<Tabs items={['Python', 'TypeScript']}>
  <Tab value="Python">
    ```python
    from computer import Computer

    computer = Computer(
        os_type="linux",
        provider_type="cloud",
        name="your-sandbox-name",
        api_key="your-api-key"
    )

    await computer.run() # Connect to the sandbox
    ```

  </Tab>
  <Tab value="TypeScript">
    ```typescript
    import { Computer, OSType } from '@trycua/computer';

    const computer = new Computer({
      osType: OSType.LINUX,
      name: "your-sandbox-name",
      apiKey: "your-api-key"
    });

    await computer.run(); // Connect to the sandbox
    ```

  </Tab>
</Tabs>

## Linux on Docker

**Run Linux desktop locally on macOS, Windows, or Linux hosts**

Cua provides two Docker images for running Linux desktops:

<Tabs items={['XFCE (Lightweight)', 'KASM (Full-Featured)']}>
  <Tab value="XFCE (Lightweight)">

    **Recommended for most use cases** - lightweight XFCE desktop with Firefox

    1. Install Docker Desktop or Docker Engine

    2. Pull the CUA XFCE image

    ```bash
    docker pull --platform=linux/amd64 trycua/cua-xfce:latest
    ```

    3. Connect with Computer

    ```python
    from computer import Computer

    computer = Computer(
        os_type="linux",
        provider_type="docker",
        image="trycua/cua-xfce:latest",
        name="my-xfce-container"
    )

    await computer.run() # Launch & connect to Docker sandbox
    ```

  </Tab>
  <Tab value="KASM (Full-Featured)">

    **Full-featured Ubuntu desktop** with additional applications

    1. Install Docker Desktop or Docker Engine

    2. Build or pull the CUA KASM image

    ```bash
    # Option 1: Pull from Docker Hub
    docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest

    # Option 2: Build locally
    cd libs/kasm
    docker build -t cua-ubuntu:latest .
    ```

    3. Connect with Computer

    ```python
    from computer import Computer

    computer = Computer(
        os_type="linux",
        provider_type="docker",
        image="trycua/cua-ubuntu:latest",
        name="my-kasm-container"
    )

    await computer.run() # Launch & connect to Docker sandbox
    ```

  </Tab>
</Tabs>

## Windows Sandbox

**Windows hosts only - requires Windows 10 Pro/Enterprise or Windows 11**

1. Enable Windows Sandbox
2. Install pywinsandbox dependency

```bash
pip install -U git+git://github.com/karkason/pywinsandbox.git
```

3. Connect with Computer

```python
from computer import Computer

computer = Computer(
    os_type="windows",
    provider_type="winsandbox",
    ephemeral=True # Windows Sandbox is always ephemeral
)

await computer.run() # Launch & connect to Windows Sandbox
```

## macOS VM

**macOS hosts only - requires Lume CLI**

1. Install lume cli

```bash
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
```

2. Start a local cua macOS VM

```bash
lume run macos-sequoia-cua:latest
```

3. Connect with Computer

```python
from computer import Computer

computer = Computer(
    os_type="macos",
    provider_type="lume",
    name="macos-sequoia-cua:latest"
)

await computer.run() # Launch & connect to the sandbox
```

## Your host desktop

You can also have agents control your desktop directly by running Computer Server without any containerization layer. Beware that AI models may perform risky actions.

```bash
pip install cua-computer-server
python -m computer_server
```

Connect with:

<Tabs items={['Python']}>
  <Tab value="Python">
    ```python

    computer = Computer(use_host_computer_server=True)
    await computer.run() # Connect to the host desktop

    ```

  </Tab>
</Tabs>

```

--------------------------------------------------------------------------------
/libs/lumier/src/bin/entry.sh:
--------------------------------------------------------------------------------

```bash
#!/usr/bin/env bash

# Configure SSH to prevent known hosts warnings
export SSHPASS_PROMPT=
export SSH_ASKPASS=/bin/echo
# Set SSH quiet mode via the SSHPASS environment variable
export SSHPASS_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -q"

# We'll enable strict error checking AFTER initialization
# to prevent premature exits

# Source configuration files
CONFIG_DIR="/run/config"
LIB_DIR="/run/lib"

# Source constants if available
if [ -f "${CONFIG_DIR}/constants.sh" ]; then
  source "${CONFIG_DIR}/constants.sh"
fi

# Import utilities
for lib in "${LIB_DIR}"/*.sh; do
  if [ -f "$lib" ]; then
    source "$lib"
  fi
done

# Set VM_NAME to env or fallback to container name (from --name)
if [ -z "${VM_NAME:-}" ]; then
    VM_NAME="$(cat /etc/hostname)"
    export VM_NAME
fi

# Set HOST_STORAGE_PATH to a lume ephemeral storage if not set
if [ -z "${HOST_STORAGE_PATH:-}" ]; then
    HOST_STORAGE_PATH="ephemeral"
    
    # Tell user that ephemeral storage is being used
    echo "Using ephemeral storage. VM state will be lost when macOS cleans up temporary files."
    
    export HOST_STORAGE_PATH
fi

# Only check and report mountpoints in debug mode
if [ "${LUMIER_DEBUG:-0}" == "1" ]; then
    if mountpoint -q /storage; then
        echo "/storage is mounted"
    fi
    if mountpoint -q /shared; then
        echo "/shared is mounted"
    fi
    # if mountpoint -q /data; then
    #     echo "/data is mounted"
    # fi
fi

# Check if we're running as PID 1 (important for Docker signal handling)
if [ $$ -ne 1 ]; then
    echo "Warning: This script is not running as PID 1 (current PID: $$)."
    echo "Docker signal handling may not work properly when stopped from Docker Desktop."
fi

# Log startup info
echo "Lumier VM is starting..."

# Cleanup function to ensure VM and noVNC proxy shutdown on container stop
# Counter for signal handling
SIGNAL_COUNT=0

cleanup() {
  local signal_name=$1
  set +e  # Don't exit on error in cleanup
  
  # Increment signal counter
  SIGNAL_COUNT=$((SIGNAL_COUNT + 1))
  
  # If this is the first signal, try graceful shutdown
  if [ $SIGNAL_COUNT -eq 1 ]; then
    echo "[cleanup] Caught $signal_name signal, shutting down..."
    
    # Check if we're in the middle of an image pull
    if [[ "$PULL_IN_PROGRESS" == "1" ]]; then
      echo "[cleanup] Interrupted during image pull, skipping VM stop."
    else
      echo "[cleanup] Stopping VM..."
      stop_vm true
    fi
    
    # Attempt to clean up ephemeral storage if it's in the /private/tmp directory
    if [[ "$HOST_STORAGE_PATH" == "ephemeral" ]]; then
      # First check if VM actually exists
      VM_INFO=$(lume_get "$VM_NAME" "$HOST_STORAGE_PATH" "json" "false")
      
      # Only try VM deletion if VM exists and not in the middle of a pull
      if [[ "$PULL_IN_PROGRESS" != "1" && $VM_INFO != *"Virtual machine not found"* ]]; then
        echo "[cleanup] Cleaning up VM..."
        lume_delete "$VM_NAME" "$HOST_STORAGE_PATH" > /dev/null 2>&1
      fi
    fi
  else
    # For multiple signals, force an immediate exit
    echo "got $SIGNAL_COUNT SIGTERM/SIGINTs, forcefully exiting"
  fi
  
  # If we've received multiple signals, just exit immediately
  if [ $SIGNAL_COUNT -ge 3 ]; then
    exit 1
  fi
  
  # Exit with success for the first signal
  if [ $SIGNAL_COUNT -eq 1 ]; then
    exit 0
  fi
}
# Ensure we catch all typical container termination signals
trap 'cleanup SIGTERM' SIGTERM
trap 'cleanup SIGINT' SIGINT
trap 'cleanup SIGHUP' SIGHUP

# Now enable strict error handling after initialization
set -euo pipefail

# Start the VM with error handling
if ! start_vm; then
    echo "ERROR: Failed to start VM!" >&2
    exit 1
fi

# Start noVNC for VNC access
NOVNC_PID=""
if [ -n "${VNC_PORT:-}" ] && [ -n "${VNC_PASSWORD:-}" ]; then
  # Only show this in debug mode
  if [ "${LUMIER_DEBUG:-0}" == "1" ]; then
    echo "Starting noVNC proxy with optimized color settings..."
  fi
  ${NOVNC_PATH}/utils/novnc_proxy --vnc host.docker.internal:${VNC_PORT} --listen 8006 --web ${NOVNC_PATH} > /dev/null 2>&1 &
  NOVNC_PID=$!
  disown $NOVNC_PID
  echo "noVNC interface available at: http://localhost:8006/vnc.html?password=${VNC_PASSWORD}&autoconnect=true (replace PORT with the port you forwarded to 8006)"
fi

echo "Lumier is running. Press Ctrl+C to stop."

# Instead of tail -f /dev/null, use a wait loop that can be interrupted by signals
while true; do
  # Sleep in small increments to make signal handling more responsive
  sleep 1 &
  wait $!
  # Break the loop if we've received a signal
  if [ $SIGNAL_COUNT -gt 0 ]; then
    break
  fi
done
```

--------------------------------------------------------------------------------
/libs/lume/src/Server/Requests.swift:
--------------------------------------------------------------------------------

```swift
import ArgumentParser
import Foundation
import Virtualization

struct RunVMRequest: Codable {
    let noDisplay: Bool?
    let sharedDirectories: [SharedDirectoryRequest]?
    let recoveryMode: Bool?
    let storage: String?

    struct SharedDirectoryRequest: Codable {
        let hostPath: String
        let readOnly: Bool?
    }

    func parse() throws -> [SharedDirectory] {
        guard let sharedDirectories = sharedDirectories else { return [] }

        return try sharedDirectories.map { dir -> SharedDirectory in
            // Validate that the host path exists and is a directory
            var isDirectory: ObjCBool = false
            guard FileManager.default.fileExists(atPath: dir.hostPath, isDirectory: &isDirectory),
                isDirectory.boolValue
            else {
                throw ValidationError(
                    "Host path does not exist or is not a directory: \(dir.hostPath)")
            }

            return SharedDirectory(
                hostPath: dir.hostPath,
                tag: VZVirtioFileSystemDeviceConfiguration.macOSGuestAutomountTag,
                readOnly: dir.readOnly ?? false
            )
        }
    }
}

struct PullRequest: Codable {
    let image: String
    let name: String?
    var registry: String
    var organization: String
    let storage: String?

    enum CodingKeys: String, CodingKey {
        case image, name, registry, organization, storage
    }

    init(from decoder: Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        image = try container.decode(String.self, forKey: .image)
        name = try container.decodeIfPresent(String.self, forKey: .name)
        registry = try container.decodeIfPresent(String.self, forKey: .registry) ?? "ghcr.io"
        organization = try container.decodeIfPresent(String.self, forKey: .organization) ?? "trycua"
        storage = try container.decodeIfPresent(String.self, forKey: .storage)
    }
}

struct CreateVMRequest: Codable {
    let name: String
    let os: String
    let cpu: Int
    let memory: String
    let diskSize: String
    let display: String
    let ipsw: String?
    let storage: String?

    func parse() throws -> (memory: UInt64, diskSize: UInt64) {
        return (
            memory: try parseSize(memory),
            diskSize: try parseSize(diskSize)
        )
    }
}

struct SetVMRequest: Codable {
    let cpu: Int?
    let memory: String?
    let diskSize: String?
    let display: String?
    let storage: String?

    func parse() throws -> (memory: UInt64?, diskSize: UInt64?, display: VMDisplayResolution?) {
        return (
            memory: try memory.map { try parseSize($0) },
            diskSize: try diskSize.map { try parseSize($0) },
            display: try display.map {
                guard let resolution = VMDisplayResolution(string: $0) else {
                    throw ValidationError(
                        "Invalid display resolution format: \($0). Expected format: WIDTHxHEIGHT")
                }
                return resolution
            }
        )
    }
}

struct CloneRequest: Codable {
    let name: String
    let newName: String
    let sourceLocation: String?
    let destLocation: String?
}

struct PushRequest: Codable {
    let name: String // Name of the local VM
    let imageName: String // Base name for the image in the registry
    let tags: [String] // List of tags to push
    var registry: String // Registry URL
    var organization: String // Organization/user in the registry
    let storage: String? // Optional VM storage location or direct path
    var chunkSizeMb: Int // Chunk size
    // dryRun and reassemble are less common for API, default to false?
    // verbose is usually handled by server logging

    enum CodingKeys: String, CodingKey {
        case name, imageName, tags, registry, organization, storage, chunkSizeMb
    }

    // Provide default values for optional fields during decoding
    init(from decoder: Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        name = try container.decode(String.self, forKey: .name)
        imageName = try container.decode(String.self, forKey: .imageName)
        tags = try container.decode([String].self, forKey: .tags)
        registry = try container.decodeIfPresent(String.self, forKey: .registry) ?? "ghcr.io"
        organization = try container.decodeIfPresent(String.self, forKey: .organization) ?? "trycua"
        storage = try container.decodeIfPresent(String.self, forKey: .storage)
        chunkSizeMb = try container.decodeIfPresent(Int.self, forKey: .chunkSizeMb) ?? 512
    }
}

```

--------------------------------------------------------------------------------
/libs/python/agent/benchmarks/contrib.md:
--------------------------------------------------------------------------------

```markdown
# Contributing Reference Agent Implementations

This guide explains how to add your own reference agent implementations to the benchmark system.

## Adding Reference Agent Implementations

### 1. Implement the ModelProtocol

Create a new file in `models/` directory implementing the `ModelProtocol`:

```python
from models.base import ModelProtocol
from typing import Optional, Tuple
from PIL import Image

class YourModelName(ModelProtocol):
    def __init__(self, model_path: str):
        self.model_path = model_path
        self._model = None
    
    @property
    def model_name(self) -> str:
        return self.model_path
    
    async def load_model(self) -> None:
        """Load the model into memory."""
        # Your model loading logic here
        pass
    
    async def unload_model(self) -> None:
        """Unload the model from memory."""
        # Your model cleanup logic here
        pass
    
    async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]:
        """
        Predict click coordinates for the given image and instruction.
        
        Args:
            image: PIL Image to analyze
            instruction: Text instruction describing what to click
            
        Returns:
            Tuple of (x, y) coordinates or None if prediction fails
        """
        # Your prediction logic here
        return (x, y)  # Return predicted coordinates
```

### 2. Register Your Model

Add your model to the `get_available_models()` function in `utils.py`:

```python
def get_available_models() -> List[Union[str, ModelProtocol]]:
    models = [
        # Computer Agent SDK providers
        "huggingface-local/HelloKKMe/GTA1-7B",
        
        # Reference implementations
        GTA1Model("HelloKKMe/GTA1-7B"),
        YourModelName("path/to/your/model"),  # Add your model here
    ]
    return models
```

### 3. Test Your Implementation

Before submitting, test your model with the interactive tool:

```bash
python interactive.py
```

This will help you verify that your model loads correctly and produces reasonable predictions.

## Example: Adding a New Model

Here's a complete example of adding a hypothetical "MyVisionModel":

1. **Create `models/my_vision_model.py`:**
```python
import torch
from transformers import AutoModel, AutoProcessor
from models.base import ModelProtocol
from typing import Optional, Tuple
from PIL import Image

class MyVisionModel(ModelProtocol):
    def __init__(self, model_path: str):
        self.model_path = model_path
        self.model = None
        self.processor = None
    
    @property
    def model_name(self) -> str:
        return f"MyVisionModel({self.model_path})"
    
    async def load_model(self) -> None:
        """Load the model and processor."""
        self.processor = AutoProcessor.from_pretrained(self.model_path)
        self.model = AutoModel.from_pretrained(
            self.model_path,
            torch_dtype=torch.float16,
            device_map="auto"
        )
    
    async def unload_model(self) -> None:
        """Clean up model resources."""
        del self.model
        del self.processor
        self.model = None
        self.processor = None
        torch.cuda.empty_cache()
    
    async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]:
        """Predict click coordinates."""
        try:
            # Preprocess inputs
            inputs = self.processor(
                text=instruction,
                images=image,
                return_tensors="pt"
            )
            
            # Run inference
            with torch.no_grad():
                outputs = self.model(**inputs)
            
            # Extract coordinates (model-specific logic)
            x, y = self._extract_coordinates(outputs)
            return (int(x), int(y))
            
        except Exception as e:
            print(f"Prediction failed: {e}")
            return None
    
    def _extract_coordinates(self, outputs):
        """Extract x, y coordinates from model outputs."""
        # Your model-specific coordinate extraction logic
        pass
```

2. **Update `models/__init__.py`:**
```python
from .gta1 import GTA1Model
from .my_vision_model import MyVisionModel

__all__ = ["GTA1Model", "MyVisionModel"]
```

3. **Update `utils.py`:**
```python
from models import GTA1Model, MyVisionModel

def get_available_models() -> List[Union[str, ModelProtocol]]:
    models = [
        "huggingface-local/HelloKKMe/GTA1-7B",
        GTA1Model("HelloKKMe/GTA1-7B"),
        MyVisionModel("my-org/my-vision-model"),  # Add here
    ]
    return models
```

```

--------------------------------------------------------------------------------
/libs/lume/src/FileSystem/VMConfig.swift:
--------------------------------------------------------------------------------

```swift
import ArgumentParser
import Foundation
import Virtualization

/// Represents a shared directory configuration
struct SharedDirectory: Codable {
    let hostPath: String
    let tag: String
    let readOnly: Bool

    var string: String {
        return "\(hostPath):\(tag):\(readOnly ? "ro" : "rw")"
    }
}

// MARK: - VMConfig
struct VMConfig: Codable {
    
    // MARK: - Properties
    let os: String
    private var _cpuCount: Int?
    private var _memorySize: UInt64?
    private var _diskSize: UInt64?
    private var _macAddress: String?
    private var _display: VMDisplayResolution
    private var _hardwareModel: Data?
    private var _machineIdentifier: Data?
    
    // MARK: - Initialization
    init(
        os: String,
        cpuCount: Int? = nil,
        memorySize: UInt64? = nil,
        diskSize: UInt64? = nil,
        macAddress: String? = nil,
        display: String,
        hardwareModel: Data? = nil,
        machineIdentifier: Data? = nil
    ) throws {
        self.os = os
        self._cpuCount = cpuCount
        self._memorySize = memorySize
        self._diskSize = diskSize
        self._macAddress = macAddress
        self._display = VMDisplayResolution(string: display) ?? VMDisplayResolution(string: "1024x768")!
        self._hardwareModel = hardwareModel
        self._machineIdentifier = machineIdentifier
    }
    
    var display: VMDisplayResolution {
        get { _display }
        set { _display = newValue }
    }
    
    var cpuCount: Int? {
        get { _cpuCount }
        set { _cpuCount = newValue }
    }
    
    var memorySize: UInt64? {
        get { _memorySize }
        set { _memorySize = newValue }
    }
    
    var diskSize: UInt64? {
        get { _diskSize }
        set { _diskSize = newValue }
    }

    var hardwareModel: Data? {
        get { _hardwareModel }
        set { _hardwareModel = newValue }
    }

    var machineIdentifier: Data? {
        get { _machineIdentifier }
        set { _machineIdentifier = newValue }
    }

    var macAddress: String? {
        get { _macAddress }
        set { _macAddress = newValue }
    }
    
    mutating func setCpuCount(_ count: Int) {
        _cpuCount = count
    }
    
    mutating func setMemorySize(_ size: UInt64) {
        _memorySize = size
    }
    
    mutating func setDiskSize(_ size: UInt64) {
        _diskSize = size
    }

    mutating func setHardwareModel(_ hardwareModel: Data) {
        _hardwareModel = hardwareModel
    }

    mutating func setMachineIdentifier(_ machineIdentifier: Data) {
        _machineIdentifier = machineIdentifier
    }

    mutating func setMacAddress(_ newMacAddress: String) {
        self._macAddress = newMacAddress
    }

    mutating func setDisplay(_ newDisplay: VMDisplayResolution) {
        self._display = newDisplay
    }

    // MARK: - Codable
    enum CodingKeys: String, CodingKey {
        case _cpuCount = "cpuCount"
        case _memorySize = "memorySize"
        case _diskSize = "diskSize"
        case macAddress
        case display
        case _hardwareModel = "hardwareModel"
        case _machineIdentifier = "machineIdentifier"
        case os
    }
    
    init(from decoder: Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        
        os = try container.decode(String.self, forKey: .os)
        _cpuCount = try container.decodeIfPresent(Int.self, forKey: ._cpuCount)
        _memorySize = try container.decodeIfPresent(UInt64.self, forKey: ._memorySize)
        _diskSize = try container.decodeIfPresent(UInt64.self, forKey: ._diskSize)
        _macAddress = try container.decodeIfPresent(String.self, forKey: .macAddress)
        _display = VMDisplayResolution(string: try container.decode(String.self, forKey: .display))!
        _hardwareModel = try container.decodeIfPresent(Data.self, forKey: ._hardwareModel)
        _machineIdentifier = try container.decodeIfPresent(Data.self, forKey: ._machineIdentifier)
    }
    
    func encode(to encoder: Encoder) throws {
        var container = encoder.container(keyedBy: CodingKeys.self)
        
        try container.encodeIfPresent(os, forKey: .os)
        try container.encodeIfPresent(_cpuCount, forKey: ._cpuCount)
        try container.encodeIfPresent(_memorySize, forKey: ._memorySize)
        try container.encodeIfPresent(_diskSize, forKey: ._diskSize)
        try container.encodeIfPresent(_macAddress, forKey: .macAddress)
        try container.encode(display.string, forKey: .display)
        try container.encodeIfPresent(_hardwareModel, forKey: ._hardwareModel)
        try container.encodeIfPresent(_machineIdentifier, forKey: ._machineIdentifier)
    }
}

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/callbacks/base.py:
--------------------------------------------------------------------------------

```python
"""
Base callback handler interface for ComputerAgent preprocessing and postprocessing hooks.
"""

from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional, Union


class AsyncCallbackHandler(ABC):
    """
    Base class for async callback handlers that can preprocess messages before
    the agent loop and postprocess output after the agent loop.
    """

    async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
        """Called at the start of an agent run loop."""
        pass

    async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
        """Called at the end of an agent run loop."""
        pass
    
    async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool:
        """Called during agent run loop to determine if execution should continue.
        
        Args:
            kwargs: Run arguments
            old_items: Original messages
            new_items: New messages generated during run
            
        Returns:
            True to continue execution, False to stop
        """
        return True
    
    async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """
        Called before messages are sent to the agent loop.
        
        Args:
            messages: List of message dictionaries to preprocess
            
        Returns:
            List of preprocessed message dictionaries
        """
        return messages
    
    async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """
        Called after the agent loop returns output.
        
        Args:
            output: List of output message dictionaries to postprocess
            
        Returns:
            List of postprocessed output dictionaries
        """
        return output

    async def on_computer_call_start(self, item: Dict[str, Any]) -> None:
        """
        Called when a computer call is about to start.
        
        Args:
            item: The computer call item dictionary
        """
        pass
    
    async def on_computer_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None:
        """
        Called when a computer call has completed.
        
        Args:
            item: The computer call item dictionary
            result: The result of the computer call
        """
        pass
    
    async def on_function_call_start(self, item: Dict[str, Any]) -> None:
        """
        Called when a function call is about to start.
        
        Args:
            item: The function call item dictionary
        """
        pass
    
    async def on_function_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None:
        """
        Called when a function call has completed.
        
        Args:
            item: The function call item dictionary
            result: The result of the function call
        """
        pass
    
    async def on_text(self, item: Dict[str, Any]) -> None:
        """
        Called when a text message is encountered.
        
        Args:
            item: The message item dictionary
        """
        pass
    
    async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
        """
        Called when an API call is about to start.
        
        Args:
            kwargs: The kwargs being passed to the API call
        """
        pass
    
    async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
        """
        Called when an API call has completed.
        
        Args:
            kwargs: The kwargs that were passed to the API call
            result: The result of the API call
        """
        pass

    async def on_usage(self, usage: Dict[str, Any]) -> None:
        """
        Called when usage information is received.
        
        Args:
            usage: The usage information
        """
        pass

    async def on_screenshot(self, screenshot: Union[str, bytes], name: str = "screenshot") -> None:
        """
        Called when a screenshot is taken.
        
        Args:
            screenshot: The screenshot image
            name: The name of the screenshot
        """
        pass

    async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
        """
        Called when responses are received.
        
        Args:
            kwargs: The kwargs being passed to the agent loop
            responses: The responses received
        """
        pass
```

--------------------------------------------------------------------------------
/examples/agent_examples.py:
--------------------------------------------------------------------------------

```python
"""Example demonstrating the ComputerAgent capabilities with the Omni provider."""

import asyncio
import logging
import traceback
import signal

from computer import Computer, VMProviderType

# Import the unified agent class and types
from agent import ComputerAgent

# Import utility functions
from utils import load_dotenv_files, handle_sigint

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


async def run_agent_example():
    """Run example of using the ComputerAgent with different models."""
    print("\n=== Example: ComputerAgent with different models ===")

    try:
        # Create a local macOS computer
        computer = Computer(
            os_type="macos",
            verbosity=logging.DEBUG,
        )

        # Create a remote Linux computer with Cua
        # computer = Computer(
        #     os_type="linux",
        #     api_key=os.getenv("CUA_API_KEY"),
        #     name=os.getenv("CUA_CONTAINER_NAME"),
        #     provider_type=VMProviderType.CLOUD,
        # )

        # Create ComputerAgent with new API
        agent = ComputerAgent(
            # Supported models:
            
            # == OpenAI CUA (computer-use-preview) ==
            model="openai/computer-use-preview",

            # == Anthropic CUA (Claude > 3.5) ==
            # model="anthropic/claude-opus-4-20250514", 
            # model="anthropic/claude-sonnet-4-20250514",
            # model="anthropic/claude-3-7-sonnet-20250219",
            # model="anthropic/claude-3-5-sonnet-20241022",

            # == UI-TARS ==
            # model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B",
            # model="mlx/mlx-community/UI-TARS-1.5-7B-6bit",
            # model="ollama_chat/0000/ui-tars-1.5-7b",

            # == Omniparser + Any LLM ==
            # model="omniparser+anthropic/claude-opus-4-20250514",
            # model="omniparser+ollama_chat/gemma3:12b-it-q4_K_M",

            tools=[computer],
            only_n_most_recent_images=3,
            verbosity=logging.DEBUG,
            trajectory_dir="trajectories",
            use_prompt_caching=True,
            max_trajectory_budget=1.0,
        )

        # Example tasks to demonstrate the agent
        tasks = [
            "Look for a repository named trycua/cua on GitHub.",
            "Check the open issues, open the most recent one and read it.",
            "Clone the repository in users/lume/projects if it doesn't exist yet.",
            "Open the repository with an app named Cursor (on the dock, black background and white cube icon).",
            "From Cursor, open Composer if not already open.",
            "Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.",
        ]

        # Use message-based conversation history
        history = []
        
        for i, task in enumerate(tasks):
            print(f"\nExecuting task {i+1}/{len(tasks)}: {task}")
            
            # Add user message to history
            history.append({"role": "user", "content": task})
            
            # Run agent with conversation history
            async for result in agent.run(history, stream=False):
                # Add agent outputs to history
                history += result.get("output", [])
                
                # Print output for debugging
                for item in result.get("output", []):
                    if item.get("type") == "message":
                        content = item.get("content", [])
                        for content_part in content:
                            if content_part.get("text"):
                                print(f"Agent: {content_part.get('text')}")
                    elif item.get("type") == "computer_call":
                        action = item.get("action", {})
                        action_type = action.get("type", "")
                        print(f"Computer Action: {action_type}({action})")
                    elif item.get("type") == "computer_call_output":
                        print("Computer Output: [Screenshot/Result]")
                        
            print(f"✅ Task {i+1}/{len(tasks)} completed: {task}")

    except Exception as e:
        logger.error(f"Error in run_agent_example: {e}")
        traceback.print_exc()
        raise


def main():
    """Run the Anthropic agent example."""
    try:
        load_dotenv_files()

        # Register signal handler for graceful exit
        signal.signal(signal.SIGINT, handle_sigint)

        asyncio.run(run_agent_example())
    except Exception as e:
        print(f"Error running example: {e}")
        traceback.print_exc()


if __name__ == "__main__":
    main()

```

--------------------------------------------------------------------------------
/libs/lume/src/Virtualization/DarwinImageLoader.swift:
--------------------------------------------------------------------------------

```swift
import Foundation
import Virtualization

/// Handles loading and validation of macOS restore images (IPSW files).
/// Provides functionality to:
/// - Fetch the latest supported macOS restore image URL
/// - Load and validate image requirements for VM creation
/// - Extract hardware model and auxiliary storage configuration
protocol ImageLoader: Sendable {
    typealias ImageRequirements = DarwinImageLoader.ImageRequirements
    func fetchLatestSupportedURL() async throws -> URL
    func loadImageRequirements(from url: URL) async throws -> ImageRequirements
    func downloadLatestImage() async throws -> Path
}

final class DarwinImageLoader: NSObject, ImageLoader, @unchecked Sendable, URLSessionDownloadDelegate {
    struct ImageRequirements: Sendable {
        let hardwareModel: Data
        let minimumSupportedCPUCount: Int
        let minimumSupportedMemorySize: UInt64
    }
    
    enum ImageError: Error {
        case invalidImage
        case unsupportedConfiguration
        case downloadFailed
    }
    
    private var lastLoggedProgress: Double = 0.0
    private var progressLogger = ProgressLogger()
    private var completionHandler: ((URL?, Error?) -> Void)?
    
    func fetchLatestSupportedURL() async throws -> URL {
        try await withCheckedThrowingContinuation { continuation in
            VZMacOSRestoreImage.fetchLatestSupported { result in
                switch result {
                case .success(let image):
                    continuation.resume(returning: image.url)
                case .failure(let error):
                    continuation.resume(throwing: error)
                }
            }
        }
    }
    
    func loadImageRequirements(from url: URL) async throws -> ImageRequirements {
        let image = try await VZMacOSRestoreImage.image(from: url)
        guard let requirements = image.mostFeaturefulSupportedConfiguration else {
            throw ImageError.unsupportedConfiguration
        }
        
        return ImageRequirements(
            hardwareModel: requirements.hardwareModel.dataRepresentation,
            minimumSupportedCPUCount: requirements.minimumSupportedCPUCount,
            minimumSupportedMemorySize: requirements.minimumSupportedMemorySize
        )
    }
    
    func downloadLatestImage() async throws -> Path {
        let url = try await fetchLatestSupportedURL()
        let tempDir = FileManager.default.temporaryDirectory
        let downloadPath = tempDir.appendingPathComponent("latest.ipsw")
        
        // Reset progress logger state
        progressLogger = ProgressLogger(threshold: 0.01)
        
        // Create a continuation to wait for download completion
        return try await withCheckedThrowingContinuation { continuation in
            let session = URLSession(configuration: .default, delegate: self, delegateQueue: nil)
            let task = session.downloadTask(with: url)
            
            // Use the delegate method to handle completion
            self.completionHandler = { location, error in
                if let error = error {
                    continuation.resume(throwing: error)
                    return
                }
                
                do {
                    // Remove existing file if it exists
                    if FileManager.default.fileExists(atPath: downloadPath.path) {
                        try FileManager.default.removeItem(at: downloadPath)
                    }
                    
                    try FileManager.default.moveItem(at: location!, to: downloadPath)
                    Logger.info("Download completed and moved to: \(downloadPath.path)")
                    continuation.resume(returning: Path(downloadPath.path))
                } catch {
                    continuation.resume(throwing: error)
                }
            }
            
            task.resume()
        }
    }
    
    func urlSession(_ session: URLSession, downloadTask: URLSessionDownloadTask, didWriteData bytesWritten: Int64, totalBytesWritten: Int64, totalBytesExpectedToWrite: Int64) {
        let progress = Double(totalBytesWritten) / Double(totalBytesExpectedToWrite)
        progressLogger.logProgress(current: progress, context: "Downloading IPSW")
    }
    
    func urlSession(_ session: URLSession, downloadTask: URLSessionDownloadTask, didFinishDownloadingTo location: URL) {
        // Call the stored completion handler
        completionHandler?(location, nil)
    }
    
    func urlSession(_ session: URLSession, task: URLSessionTask, didCompleteWithError error: Error?) {
        // Call the stored completion handler with an error if it occurred
        if let error = error {
            completionHandler?(nil, error)
        }
    }
}
```

--------------------------------------------------------------------------------
/.github/workflows/pypi-publish-computer.yml:
--------------------------------------------------------------------------------

```yaml
name: Publish Computer Package

on:
  push:
    tags:
      - "computer-v*"
  workflow_dispatch:
    inputs:
      version:
        description: "Version to publish (without v prefix)"
        required: true
        default: "0.1.0"
  workflow_call:
    inputs:
      version:
        description: "Version to publish"
        required: true
        type: string

# Adding permissions at workflow level
permissions:
  contents: write

jobs:
  prepare:
    runs-on: macos-latest
    outputs:
      version: ${{ steps.get-version.outputs.version }}
      core_version: ${{ steps.update-deps.outputs.core_version }}
    steps:
      - uses: actions/checkout@v4

      - name: Determine version
        id: get-version
        run: |
          if [ "${{ github.event_name }}" == "push" ]; then
            # Extract version from tag (for package-specific tags)
            if [[ "${{ github.ref }}" =~ ^refs/tags/computer-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then
              VERSION=${BASH_REMATCH[1]}
            else
              echo "Invalid tag format for computer"
              exit 1
            fi
          elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
            # Use version from workflow dispatch
            VERSION=${{ github.event.inputs.version }}
          else
            # Use version from workflow_call
            VERSION=${{ inputs.version }}
          fi
          echo "VERSION=$VERSION"
          echo "version=$VERSION" >> $GITHUB_OUTPUT

      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: "3.11"

      - name: Update dependencies to latest versions
        id: update-deps
        run: |
          cd libs/python/computer
          # Install required package for PyPI API access
          pip install requests

          # Create a more robust Python script for PyPI version checking
          cat > get_latest_versions.py << 'EOF'
          import requests
          import json
          import sys

          def get_package_version(package_name, fallback="0.1.0"):
              try:
                  response = requests.get(f'https://pypi.org/pypi/{package_name}/json')
                  print(f"API Response Status for {package_name}: {response.status_code}", file=sys.stderr)
                  
                  if response.status_code != 200:
                      print(f"API request failed for {package_name}, using fallback version", file=sys.stderr)
                      return fallback
                  
                  data = json.loads(response.text)
                  
                  if 'info' not in data:
                      print(f"Missing 'info' key in API response for {package_name}, using fallback version", file=sys.stderr)
                      return fallback
                      
                  return data['info']['version']
              except Exception as e:
                  print(f"Error fetching version for {package_name}: {str(e)}", file=sys.stderr)
                  return fallback

          # Get latest versions
          print(get_package_version('cua-core'))
          EOF

          # Execute the script to get the versions
          VERSIONS=($(python get_latest_versions.py))
          LATEST_CORE=${VERSIONS[0]}

          echo "Latest cua-core version: $LATEST_CORE"

          # Output the versions for the next job
          echo "core_version=$LATEST_CORE" >> $GITHUB_OUTPUT

          # Determine major version for version constraint
          CORE_MAJOR=$(echo $LATEST_CORE | cut -d. -f1)
          NEXT_CORE_MAJOR=$((CORE_MAJOR + 1))

          # Update dependencies in pyproject.toml
          if [[ "$OSTYPE" == "darwin"* ]]; then
            # macOS version of sed needs an empty string for -i
            sed -i '' "s/\"cua-core>=.*,<.*\"/\"cua-core>=$LATEST_CORE,<$NEXT_CORE_MAJOR.0.0\"/" pyproject.toml
          else
            # Linux version
            sed -i "s/\"cua-core>=.*,<.*\"/\"cua-core>=$LATEST_CORE,<$NEXT_CORE_MAJOR.0.0\"/" pyproject.toml
          fi

          # Display the updated dependencies
          echo "Updated dependencies in pyproject.toml:"
          grep -E "cua-core" pyproject.toml

  publish:
    needs: prepare
    uses: ./.github/workflows/pypi-reusable-publish.yml
    with:
      package_name: "computer"
      package_dir: "libs/python/computer"
      version: ${{ needs.prepare.outputs.version }}
      is_lume_package: false
      base_package_name: "cua-computer"
    secrets:
      PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}

  set-env-variables:
    needs: [prepare, publish]
    runs-on: macos-latest
    steps:
      - name: Set environment variables for use in other jobs
        run: |
          echo "CORE_VERSION=${{ needs.prepare.outputs.core_version }}" >> $GITHUB_ENV

```

--------------------------------------------------------------------------------
/libs/xfce/Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
# CUA Docker XFCE Container
# Vanilla XFCE desktop with noVNC and computer-server

FROM ubuntu:22.04

# Avoid prompts from apt
ENV DEBIAN_FRONTEND=noninteractive

# Set environment variables
ENV HOME=/home/cua
ENV DISPLAY=:1
ENV VNC_PORT=5901
ENV NOVNC_PORT=6901
ENV API_PORT=8000
ENV VNC_RESOLUTION=1024x768
ENV VNC_COL_DEPTH=24

# Install system dependencies first (including sudo)
RUN apt-get update && apt-get install -y \
    # System utilities
    sudo \
    # Desktop environment
    xfce4 \
    xfce4-terminal \
    dbus-x11 \
    # VNC server
    tigervnc-standalone-server \
    tigervnc-common \
    # noVNC dependencies
    python3 \
    python3-pip \
    python3-numpy \
    git \
    net-tools \
    netcat \
    supervisor \
    # Computer-server dependencies
    python3-tk \
    python3-dev \
    gnome-screenshot \
    wmctrl \
    ffmpeg \
    socat \
    xclip \
    # Browser
    wget \
    software-properties-common \
    # Build tools
    build-essential \
    libncursesw5-dev \
    libssl-dev \
    libsqlite3-dev \
    tk-dev \
    libgdbm-dev \
    libc6-dev \
    libbz2-dev \
    libffi-dev \
    zlib1g-dev \
    && rm -rf /var/lib/apt/lists/*

# Remove screensavers and power manager to avoid popups and lock screens
RUN apt-get remove -y \
    xfce4-power-manager \
    xfce4-power-manager-data \
    xfce4-power-manager-plugins \
    xfce4-screensaver \
    light-locker \
    xscreensaver \
    xscreensaver-data || true

# Create user after sudo is installed
RUN useradd -m -s /bin/bash -G sudo cua && \
    echo "cua:cua" | chpasswd && \
    echo "cua ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers

# Install Firefox from Mozilla PPA (snap-free) - inline to avoid script issues
RUN apt-get update && \
    add-apt-repository -y ppa:mozillateam/ppa && \
    echo 'Package: *\nPin: release o=LP-PPA-mozillateam\nPin-Priority: 1001' > /etc/apt/preferences.d/mozilla-firefox && \
    apt-get update && \
    apt-get install -y firefox && \
    echo 'pref("datareporting.policy.firstRunURL", "");\npref("datareporting.policy.dataSubmissionEnabled", false);\npref("datareporting.healthreport.service.enabled", false);\npref("datareporting.healthreport.uploadEnabled", false);\npref("trailhead.firstrun.branches", "nofirstrun-empty");\npref("browser.aboutwelcome.enabled", false);' > /usr/lib/firefox/browser/defaults/preferences/firefox.js && \
    update-alternatives --install /usr/bin/x-www-browser x-www-browser /usr/bin/firefox 100 && \
    update-alternatives --install /usr/bin/gnome-www-browser gnome-www-browser /usr/bin/firefox 100 && \
    rm -rf /var/lib/apt/lists/*

# Install noVNC
RUN git clone https://github.com/novnc/noVNC.git /opt/noVNC && \
    git clone https://github.com/novnc/websockify /opt/noVNC/utils/websockify && \
    ln -s /opt/noVNC/vnc.html /opt/noVNC/index.html

# Pre-create cache directory with correct ownership before pip install
RUN mkdir -p /home/cua/.cache && \
    chown -R cua:cua /home/cua/.cache

# Install computer-server
RUN pip3 install cua-computer-server

# Fix any cache files created by pip
RUN chown -R cua:cua /home/cua/.cache

# Copy startup scripts
COPY src/supervisor/ /etc/supervisor/conf.d/
COPY src/scripts/ /usr/local/bin/

# Make scripts executable
RUN chmod +x /usr/local/bin/*.sh

# Setup VNC
USER cua
WORKDIR /home/cua

# Create VNC directory (no password needed with SecurityTypes None)
RUN mkdir -p $HOME/.vnc

# Configure XFCE for first start
RUN mkdir -p $HOME/.config/xfce4/xfconf/xfce-perchannel-xml $HOME/.config/xfce4 $HOME/.config/autostart

# Copy XFCE config to disable browser launching and welcome screens
COPY --chown=cua:cua src/xfce-config/helpers.rc $HOME/.config/xfce4/helpers.rc
COPY --chown=cua:cua src/xfce-config/xfce4-session.xml $HOME/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-session.xml
COPY --chown=cua:cua src/xfce-config/xfce4-power-manager.xml $HOME/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-power-manager.xml

# Disable autostart for screensaver, lock screen, and power manager
RUN echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-tips-autostart.desktop && \
    echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-screensaver.desktop && \
    echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/light-locker.desktop && \
    echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-power-manager.desktop && \
    chown -R cua:cua $HOME/.config

# Create storage and shared directories, and Firefox cache directory
RUN mkdir -p $HOME/storage $HOME/shared $HOME/.cache/dconf $HOME/.mozilla/firefox && \
    chown -R cua:cua $HOME/storage $HOME/shared $HOME/.cache $HOME/.mozilla $HOME/.vnc

USER root

# Expose ports
EXPOSE $VNC_PORT $NOVNC_PORT $API_PORT

# Start services via supervisor
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"]

```

--------------------------------------------------------------------------------
/libs/python/computer-server/computer_server/cli.py:
--------------------------------------------------------------------------------

```python
"""
Command-line interface for the Computer API server.
"""

import argparse
import asyncio
import logging
import os
import sys
import threading
from typing import List, Optional

from .server import Server

logger = logging.getLogger(__name__)


def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
    """Parse command-line arguments."""
    parser = argparse.ArgumentParser(description="Start the Computer API server")
    parser.add_argument(
        "--host", default="0.0.0.0", help="Host to bind the server to (default: 0.0.0.0)"
    )
    parser.add_argument(
        "--port", type=int, default=8000, help="Port to bind the server to (default: 8000)"
    )
    parser.add_argument(
        "--log-level",
        choices=["debug", "info", "warning", "error", "critical"],
        default="info",
        help="Logging level (default: info)",
    )
    parser.add_argument(
        "--ssl-keyfile",
        type=str,
        help="Path to SSL private key file (enables HTTPS)",
    )
    parser.add_argument(
        "--ssl-certfile", 
        type=str,
        help="Path to SSL certificate file (enables HTTPS)",
    )
    parser.add_argument(
        "--watchdog",
        action="store_true",
        help="Enable watchdog monitoring (automatically enabled if CONTAINER_NAME env var is set)",
    )
    parser.add_argument(
        "--watchdog-interval",
        type=int,
        default=30,
        help="Watchdog ping interval in seconds (default: 30)",
    )
    parser.add_argument(
        "--no-restart",
        action="store_true",
        help="Disable automatic server restart in watchdog",
    )

    return parser.parse_args(args)


def main() -> None:
    """Main entry point for the CLI."""
    args = parse_args()

    # Configure logging
    logging.basicConfig(
        level=getattr(logging, args.log_level.upper()),
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    )

    # Check if watchdog should be enabled
    container_name = os.environ.get("CONTAINER_NAME")
    enable_watchdog = (args.watchdog or bool(container_name)) and not sys.platform.startswith("win")
    
    if container_name:
        logger.info(f"Container environment detected (CONTAINER_NAME={container_name}), enabling watchdog")
    elif args.watchdog:
        logger.info("Watchdog explicitly enabled via --watchdog flag")
    
    # Start watchdog if enabled
    if enable_watchdog:
        logger.info(f"Starting watchdog monitoring with {args.watchdog_interval}s interval")
        
        def run_watchdog_thread():
            """Run watchdog in a separate thread."""
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            try:
                # Create CLI args dict for watchdog
                cli_args = {
                    'host': args.host,
                    'port': args.port,
                    'log_level': args.log_level,
                    'ssl_keyfile': args.ssl_keyfile,
                    'ssl_certfile': args.ssl_certfile
                }
                
                # Create watchdog with restart settings
                from .watchdog import Watchdog
                watchdog = Watchdog(
                    cli_args=cli_args,
                    ping_interval=args.watchdog_interval
                )
                watchdog.restart_enabled = not args.no_restart
                
                loop.run_until_complete(watchdog.start_monitoring())
            except Exception as e:
                logger.error(f"Watchdog error: {e}")
            finally:
                loop.close()
        
        # Start watchdog in background thread
        watchdog_thread = threading.Thread(
            target=run_watchdog_thread,
            daemon=True,
            name="watchdog"
        )
        watchdog_thread.start()

    # Create and start the server
    logger.info(f"Starting CUA Computer API server on {args.host}:{args.port}...")
    
    # Handle SSL configuration
    ssl_args = {}
    if args.ssl_keyfile and args.ssl_certfile:
        ssl_args = {
            "ssl_keyfile": args.ssl_keyfile,
            "ssl_certfile": args.ssl_certfile,
        }
        logger.info("HTTPS mode enabled with SSL certificates")
    elif args.ssl_keyfile or args.ssl_certfile:
        logger.warning("Both --ssl-keyfile and --ssl-certfile are required for HTTPS. Running in HTTP mode.")
    else:
        logger.info("HTTP mode (no SSL certificates provided)")
    
    server = Server(host=args.host, port=args.port, log_level=args.log_level, **ssl_args)

    try:
        server.start()
    except KeyboardInterrupt:
        logger.info("Server stopped by user")
        sys.exit(0)
    except Exception as e:
        logger.error(f"Error starting server: {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/computers/cua.py:
--------------------------------------------------------------------------------

```python
"""
Computer handler implementation for OpenAI computer-use-preview protocol.
"""

import base64
from typing import Dict, List, Any, Literal, Union, Optional
from .base import AsyncComputerHandler
from computer import Computer

class cuaComputerHandler(AsyncComputerHandler):
    """Computer handler that implements the Computer protocol using the computer interface."""
    
    def __init__(self, cua_computer: Computer):
        """Initialize with a computer interface (from tool schema)."""
        self.cua_computer = cua_computer
        self.interface = None

    async def _initialize(self):
        if hasattr(self.cua_computer, '_initialized') and not self.cua_computer._initialized:
            await self.cua_computer.run()
        self.interface = self.cua_computer.interface
    
    # ==== Computer-Use-Preview Action Space ==== 

    async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
        """Get the current environment type."""
        # TODO: detect actual environment
        return "linux"

    async def get_dimensions(self) -> tuple[int, int]:
        """Get screen dimensions as (width, height)."""
        assert self.interface is not None
        screen_size = await self.interface.get_screen_size()
        return screen_size["width"], screen_size["height"]
    
    async def screenshot(self) -> str:
        """Take a screenshot and return as base64 string."""
        assert self.interface is not None
        screenshot_bytes = await self.interface.screenshot()
        return base64.b64encode(screenshot_bytes).decode('utf-8')
    
    async def click(self, x: int, y: int, button: str = "left") -> None:
        """Click at coordinates with specified button."""
        assert self.interface is not None
        if button == "left":
            await self.interface.left_click(x, y)
        elif button == "right":
            await self.interface.right_click(x, y)
        else:
            # Default to left click for unknown buttons
            await self.interface.left_click(x, y)
    
    async def double_click(self, x: int, y: int) -> None:
        """Double click at coordinates."""
        assert self.interface is not None
        await self.interface.double_click(x, y)
    
    async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
        """Scroll at coordinates with specified scroll amounts."""
        assert self.interface is not None
        await self.interface.move_cursor(x, y)
        await self.interface.scroll(scroll_x, scroll_y)
    
    async def type(self, text: str) -> None:
        """Type text."""
        assert self.interface is not None
        await self.interface.type_text(text)
    
    async def wait(self, ms: int = 1000) -> None:
        """Wait for specified milliseconds."""
        assert self.interface is not None
        import asyncio
        await asyncio.sleep(ms / 1000.0)
    
    async def move(self, x: int, y: int) -> None:
        """Move cursor to coordinates."""
        assert self.interface is not None
        await self.interface.move_cursor(x, y)
    
    async def keypress(self, keys: Union[List[str], str]) -> None:
        """Press key combination."""
        assert self.interface is not None
        if isinstance(keys, str):
            keys = keys.replace("-", "+").split("+")
        if len(keys) == 1:
            await self.interface.press_key(keys[0])
        else:
            # Handle key combinations
            await self.interface.hotkey(*keys)
    
    async def drag(self, path: List[Dict[str, int]]) -> None:
        """Drag along specified path."""
        assert self.interface is not None
        if not path:
            return
        
        # Start drag from first point
        start = path[0]
        await self.interface.mouse_down(start["x"], start["y"])
        
        # Move through path
        for point in path[1:]:
            await self.interface.move_cursor(point["x"], point["y"])
        
        # End drag at last point
        end = path[-1]
        await self.interface.mouse_up(end["x"], end["y"])
    
    async def get_current_url(self) -> str:
        """Get current URL (for browser environments)."""
        # This would need to be implemented based on the specific browser interface
        # For now, return empty string
        return ""

    # ==== Anthropic Computer Action Space ==== 
    async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
        """Left mouse down at coordinates."""
        assert self.interface is not None
        await self.interface.mouse_down(x, y, button="left")
    
    async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
        """Left mouse up at coordinates."""
        assert self.interface is not None
        await self.interface.mouse_up(x, y, button="left")
```

--------------------------------------------------------------------------------
/docs/content/docs/computer-sdk/cloud-vm-management.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Cloud VM Management
description: Manage your Cua Cloud sandboxes (VMs) via Python SDK or HTTP API
---

import { Tab, Tabs } from 'fumadocs-ui/components/tabs';

Use these concise examples to manage your cloud sandboxes. Pick either the Python SDK or plain HTTP (curl) for each action.

> You need a CUA Database API key. Set it as an environment variable `CUA_API_KEY`.

## Status values
- `pending` – VM deployment in progress
- `running` – VM is active and accessible
- `stopped` – VM is stopped but not terminated
- `terminated` – VM has been permanently destroyed
- `failed` – VM deployment or operation failed

---

## List VMs

<Tabs items={["Python", "curl"]}>
  <Tab value="Python">

  ```python
  import os
  import asyncio
  from computer.providers.cloud.provider import CloudProvider

  async def main():
      api_key = os.getenv("CUA_API_KEY") or "your-api-key"
      # Optional: point to a different API base
      # os.environ["CUA_API_BASE"] = "https://api.cua.ai"

      provider = CloudProvider(api_key=api_key, verbose=False)
      async with provider:
          vms = await provider.list_vms()
          for vm in vms:
              print({
                  "name": vm["name"],
                  "status": vm["status"],
                  "api_url": vm.get("api_url"),
                  "vnc_url": vm.get("vnc_url"),
              })

  if __name__ == "__main__":
      asyncio.run(main())
  ```

  </Tab>
  <Tab value="curl">

  ```bash
  curl -H "Authorization: Bearer $CUA_API_KEY" \
       "https://api.cua.ai/v1/vms"
  ```

  Example response:
  ```json
  [
    {
      "name": "s-windows-x4snp46ebf",
      "status": "running"
    }
  ]
  ```

  </Tab>
</Tabs>

---

## Start a VM
Provide the VM name you want to start.

<Tabs items={["Python", "curl"]}>
  <Tab value="Python">

  ```python
  import os
  import asyncio
  from computer.providers.cloud.provider import CloudProvider

  async def main():
      api_key = os.getenv("CUA_API_KEY") or "your-api-key"
      name = "my-vm-name"  # e.g., "m-linux-96lcxd2c2k"

      provider = CloudProvider(api_key=api_key)
      async with provider:
          resp = await provider.run_vm(name)
          print(resp)  # { "name": name, "status": "starting" }

  if __name__ == "__main__":
      asyncio.run(main())
  ```

  </Tab>
  <Tab value="curl">

  ```bash
  curl -X POST \
       -H "Authorization: Bearer $CUA_API_KEY" \
       "https://api.cua.ai/v1/vms/my-vm-name/start" -i
  ```

  Example response headers (no body):
  ```text
  HTTP/1.1 204 No Content
  ```

  </Tab>
</Tabs>

---

## Stop a VM
Stops the VM asynchronously.

<Tabs items={["Python", "curl"]}>
  <Tab value="Python">

  ```python
  import os
  import asyncio
  from computer.providers.cloud.provider import CloudProvider

  async def main():
      api_key = os.getenv("CUA_API_KEY") or "your-api-key"
      name = "my-vm-name"

      provider = CloudProvider(api_key=api_key)
      async with provider:
          resp = await provider.stop_vm(name)
          print(resp)  # { "name": name, "status": "stopping" }

  if __name__ == "__main__":
      asyncio.run(main())
  ```

  </Tab>
  <Tab value="curl">

  ```bash
  curl -X POST \
       -H "Authorization: Bearer $CUA_API_KEY" \
       "https://api.cua.ai/v1/vms/my-vm-name/stop"
  ```

  Example response:
  ```json
  { "status": "stopping" }
  ```

  </Tab>
</Tabs>

---

## Restart a VM
Restarts the VM asynchronously.

<Tabs items={["Python", "curl"]}>
  <Tab value="Python">

  ```python
  import os
  import asyncio
  from computer.providers.cloud.provider import CloudProvider

  async def main():
      api_key = os.getenv("CUA_API_KEY") or "your-api-key"
      name = "my-vm-name"

      provider = CloudProvider(api_key=api_key)
      async with provider:
          resp = await provider.restart_vm(name)
          print(resp)  # { "name": name, "status": "restarting" }

  if __name__ == "__main__":
      asyncio.run(main())
  ```

  </Tab>
  <Tab value="curl">

  ```bash
  curl -X POST \
       -H "Authorization: Bearer $CUA_API_KEY" \
       "https://api.cua.ai/v1/vms/my-vm-name/restart"
  ```

  Example response:
  ```json
  { "status": "restarting" }
  ```

  </Tab>
</Tabs>

---

## Query a VM by name
Query the computer-server running on the VM. Useful for checking details like status or OS type.

<Tabs items={["Python", "curl"]}>
  <Tab value="Python">

  ```python
  import os
  import asyncio
  from computer.providers.cloud.provider import CloudProvider

  async def main():
      api_key = os.getenv("CUA_API_KEY") or "your-api-key"
      name = "my-vm-name"

      provider = CloudProvider(api_key=api_key)
      async with provider:
          info = await provider.get_vm(name)
          print(info)

  if __name__ == "__main__":
      asyncio.run(main())
  ```

  </Tab>
  <Tab value="curl">

  ```bash
  curl "https://my-vm-name.containers.cloud.cua.ai:8443/status"
  ```

  Example response:
  ```json
  { "status": "ok", "os_type": "linux", "features": ["agent"] }
  ```

  </Tab>
</Tabs>

```

--------------------------------------------------------------------------------
/examples/computer_examples_windows.py:
--------------------------------------------------------------------------------

```python
import os
import asyncio
from pathlib import Path
import sys
import traceback

# Load environment variables from .env file
project_root = Path(__file__).parent.parent
env_file = project_root / ".env"
print(f"Loading environment from: {env_file}")
from computer.helpers import sandboxed
from dotenv import load_dotenv

load_dotenv(env_file)

# Add paths to sys.path if needed
pythonpath = os.environ.get("PYTHONPATH", "")
for path in pythonpath.split(":"):
    if path and path not in sys.path:
        sys.path.insert(0, path)  # Insert at beginning to prioritize
        print(f"Added to sys.path: {path}")

from computer.computer import Computer
from computer.providers.base import VMProviderType
from computer.logger import LogLevel

# ANSI color codes
RED = '\033[91m'
RESET = '\033[0m'

async def main():
    try:
        print("\n=== Using direct initialization ===")

        # Create a remote Windows computer with Cua
        computer = Computer(
            os_type="windows",
            api_key=os.getenv("CUA_API_KEY"),
            name=os.getenv("CONTAINER_NAME") or "",
            provider_type=VMProviderType.CLOUD,
        )
        
        try:
            # Run the computer with default parameters
            await computer.run()
            
            # Create output directory if it doesn't exist
            output_dir = Path("./output")
            output_dir.mkdir(exist_ok=True)
            
            # Keyboard Actions Examples
            print("\n=== Keyboard Actions ===")
            await computer.interface.type_text("Hello, World!")
            await computer.interface.press_key("enter")

            # Mouse Actions Examples
            print("\n=== Mouse Actions ===")
            await computer.interface.move_cursor(100, 100)
            await computer.interface.left_click()
            await computer.interface.double_click(400, 400)
            await computer.interface.right_click(300, 300)

            print("\n=== RPC ===")
            await computer.venv_install("demo_venv", ["mss"])

            @sandboxed("demo_venv")
            def greet_and_print(name):
                from mss import mss
                import os
                # get username
                username = os.getlogin()
                print(f"Hello from inside the container, {name}!")
                print("Username:", username)
                print("Screens:", mss().monitors)

                # take a screenshot
                with mss() as sct:
                    filename = sct.shot(mon=-1, output='C:/Users/azureuser/Desktop/fullscreen.png')
                    print(filename)
                
                return {"greeted": name, "username": username}

            # Call with args and kwargs
            result = await greet_and_print("John Doe")
            print("Result from sandboxed function:", result)

            # Command Actions Examples
            print("\n=== Command Actions ===")
            result = await computer.interface.run_command("notepad")
            print("Result from command:", result)

            screenshot = await computer.interface.screenshot()
            screenshot_path = output_dir / "screenshot.png"
            with open(screenshot_path, "wb") as f:
                f.write(screenshot)
            print(f"Screenshot saved to: {screenshot_path.absolute()}")
            
            # Clipboard Actions Examples
            print("\n=== Clipboard Actions ===")
            await computer.interface.set_clipboard("Test clipboard")
            content = await computer.interface.copy_to_clipboard()
            print(f"Clipboard content: {content}")


            # Simple REPL Loop
            print("\n=== Command REPL ===")
            print("Enter commands to run on the remote computer.")
            print("Type 'exit' or 'quit' to leave the REPL.\n")
            
            while True:
                try:
                    # Get command from user
                    command = input("command> ").strip()
                    
                    # Check for exit commands
                    if command.lower() in ['exit', 'quit', '']:
                        if command.lower() in ['exit', 'quit']:
                            print("Exiting REPL...")
                        break
                    
                    # Run the command
                    result = await computer.interface.run_command(command)
                    
                    print(result.stdout)
                    if result.stderr:
                        print(f"{RED}{result.stderr}{RESET}")
                except KeyboardInterrupt:
                    print("\nExiting REPL...")
                    break
                except Exception as e:
                    print(f"{RED}Error running command: {e}{RESET}")


        finally:
            # Important to clean up resources
            # await computer.stop()
            pass
    except Exception as e:
        print(f"Error in main: {e}")
        traceback.print_exc()


if __name__ == "__main__":
    asyncio.run(main())

```

--------------------------------------------------------------------------------
/libs/lume/src/VNC/VNCService.swift:
--------------------------------------------------------------------------------

```swift
import Foundation
import Dynamic
import Virtualization

/// Protocol defining the interface for VNC server operations
@MainActor
protocol VNCService {
    var url: String? { get }
    func start(port: Int, virtualMachine: Any?) async throws
    func stop()
    func openClient(url: String) async throws
}

/// Default implementation of VNCService
@MainActor
final class DefaultVNCService: VNCService {
    private var vncServer: Any?
    private let vmDirectory: VMDirectory
    
    init(vmDirectory: VMDirectory) {
        self.vmDirectory = vmDirectory
    }
    
    var url: String? {
        get {
            return try? vmDirectory.loadSession().url
        }
    }
    
    func start(port: Int, virtualMachine: Any?) async throws {
        let password = Array(PassphraseGenerator().prefix(4)).joined(separator: "-")
        let securityConfiguration = Dynamic._VZVNCAuthenticationSecurityConfiguration(password: password)
        
        // Create VNC server with specified port
        let server = Dynamic._VZVNCServer(port: port, queue: DispatchQueue.main,
                                      securityConfiguration: securityConfiguration)
        
        if let vm = virtualMachine as? VZVirtualMachine {
            server.virtualMachine = vm
        }
        server.start()
        
        vncServer = server
        
        // Wait for port to be assigned (both for auto-assign and specific port)
        var attempts = 0
        let maxAttempts = 20  // 1 second total wait time
        while true {
            if let assignedPort: UInt16 = server.port.asUInt16 {
                // If we got a non-zero port, check if it matches our request
                if assignedPort != 0 {
                    // For specific port requests, verify we got the requested port
                    if port != 0 && Int(assignedPort) != port {
                        throw VMError.vncPortBindingFailed(requested: port, actual: Int(assignedPort))
                    }
                    
                    // Get the local IP address for the URL - prefer IPv4
                    let hostIP = try getLocalIPAddress() ?? "127.0.0.1"
                    let url = "vnc://:\(password)@127.0.0.1:\(assignedPort)"  // Use localhost for local connections
                    let externalUrl = "vnc://:\(password)@\(hostIP):\(assignedPort)"  // External URL for remote connections
                    
                    Logger.info("VNC server started", metadata: [
                        "local": url,
                        "external": externalUrl
                    ])
                    
                    // Save session information with local URL for the client
                    let session = VNCSession(url: url)
                    try vmDirectory.saveSession(session)
                    break
                }
            }
            
            attempts += 1
            if attempts >= maxAttempts {
                // If we've timed out and we requested a specific port, it likely means binding failed
                vncServer = nil
                if port != 0 {
                    throw VMError.vncPortBindingFailed(requested: port, actual: -1)
                }
                throw VMError.internalError("Timeout waiting for VNC server to start")
            }
            try await Task.sleep(nanoseconds: 50_000_000)  // 50ms delay between checks
        }
    }
    
    // Modified to prefer IPv4 addresses
    private func getLocalIPAddress() throws -> String? {
        var address: String?
        
        var ifaddr: UnsafeMutablePointer<ifaddrs>?
        guard getifaddrs(&ifaddr) == 0 else {
            return nil
        }
        defer { freeifaddrs(ifaddr) }
        
        var ptr = ifaddr
        while ptr != nil {
            defer { ptr = ptr?.pointee.ifa_next }
            
            let interface = ptr?.pointee
            let family = interface?.ifa_addr.pointee.sa_family
            
            // Only look for IPv4 addresses
            if family == UInt8(AF_INET) {
                let name = String(cString: (interface?.ifa_name)!)
                if name == "en0" { // Primary interface
                    var hostname = [CChar](repeating: 0, count: Int(NI_MAXHOST))
                    getnameinfo(interface?.ifa_addr,
                              socklen_t((interface?.ifa_addr.pointee.sa_len)!),
                              &hostname,
                              socklen_t(hostname.count),
                              nil,
                              0,
                              NI_NUMERICHOST)
                    address = String(cString: hostname, encoding: .utf8)
                    break
                }
            }
        }
        
        return address
    }
    
    func stop() {
        if let server = vncServer as? Dynamic {
            server.stop()
        }
        vncServer = nil
        vmDirectory.clearSession()
    }
    
    func openClient(url: String) async throws {
        let processRunner = DefaultProcessRunner()
        try processRunner.run(executable: "/usr/bin/open", arguments: [url])
    }
} 
```

--------------------------------------------------------------------------------
/libs/typescript/agent/examples/playground-example.html:
--------------------------------------------------------------------------------

```html
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>CUA Agent Playground Example</title>
</head>
<body>
    <h1>CUA Agent Playground Example</h1>
    
    <div>
        <h2>Configuration</h2>
        <label for="url">Agent URL:</label><br>
        <input type="text" id="url" placeholder="https://localhost:8000 or peer://peer-id" value="https://localhost:8000" style="width: 400px;"><br><br>
        
        <label for="model">Model:</label><br>
        <input type="text" id="model" placeholder="anthropic/claude-opus-4-1-20250805" value="anthropic/claude-opus-4-1-20250805" style="width: 400px;"><br><br>
    </div>

    <div>
        <h2>Chat</h2>
        <label for="message">Message:</label><br>
        <input type="text" id="message" placeholder="Enter your message here..." style="width: 400px;"><br><br>
        
        <button onclick="sendMessage()">Send Message</button>
        <!-- <button onclick="checkHealth()">Check Health</button> -->
        <button onclick="clearOutput()">Clear Output</button><br><br>
        
        <label for="output">Output:</label><br>
        <textarea id="output" rows="20" cols="80" readonly></textarea>
    </div>

    <script src="https://unpkg.com/[email protected]/dist/peerjs.min.js"></script>
    <script type="module">
        // Import the AgentClient from the built library
        import AgentClient from '/dist/index.js';
        
        let client = null;
        
        // Make functions available globally
        window.sendMessage = sendMessage;
        window.checkHealth = checkHealth;
        window.clearOutput = clearOutput;
        
        function log(message) {
            const output = document.getElementById('output');
            const timestamp = new Date().toLocaleTimeString();
            output.value += `[${timestamp}] ${message}\n`;
            output.scrollTop = output.scrollHeight;
        }
        
        function getClient() {
            const url = document.getElementById('url').value.trim();
            if (!url) {
                log('ERROR: Please enter a URL');
                return null;
            }
            
            // Create new client if URL changed or client doesn't exist
            if (!client || client.url !== url) {
                try {
                    client = new AgentClient(url);
                    client.url = url; // Store URL for comparison
                    log(`Created new client for: ${url}`);
                } catch (error) {
                    log(`ERROR creating client: ${error.message}`);
                    return null;
                }
            }
            
            return client;
        }
        
        async function sendMessage() {
            const messageInput = document.getElementById('message');
            const modelInput = document.getElementById('model');
            
            const message = messageInput.value.trim();
            const model = modelInput.value.trim();
            
            if (!message) {
                log('ERROR: Please enter a message');
                return;
            }
            
            if (!model) {
                log('ERROR: Please enter a model');
                return;
            }
            
            const agentClient = getClient();
            if (!agentClient) return;
            
            try {
                log(`Sending message: "${message}"`);
                log(`Using model: ${model}`);
                
                const request = {
                    model: model,
                    input: message
                };
                
                log('Sending request...');
                const response = await agentClient.responses.create(request);
                
                log('Response received:');
                log(JSON.stringify(response, null, 2));
                
                // Clear the message input
                messageInput.value = '';
                
            } catch (error) {
                log(`ERROR: ${error.message}`);
            }
        }
        
        async function checkHealth() {
            const agentClient = getClient();
            if (!agentClient) return;
            
            try {
                log('Checking health...');
                const health = await agentClient.health();
                log(`Health status: ${health.status}`);
            } catch (error) {
                log(`ERROR checking health: ${error.message}`);
            }
        }
        
        function clearOutput() {
            document.getElementById('output').value = '';
        }
        
        // Allow sending message with Enter key
        document.getElementById('message').addEventListener('keypress', function(e) {
            if (e.key === 'Enter') {
                sendMessage();
            }
        });
        
        // Log initial message
        log('CUA Agent Client Browser Example loaded');
        log('Enter a URL (HTTP/HTTPS or peer://) and model, then send a message');
    </script>
</body>
</html>

```

--------------------------------------------------------------------------------
/docs/src/assets/logo-black.svg:
--------------------------------------------------------------------------------

```
<?xml version="1.0" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN"
 "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
<svg version="1.0" xmlns="http://www.w3.org/2000/svg"
 width="1000.000000pt" height="1000.000000pt" viewBox="0 0 1000.000000 1000.000000"
 preserveAspectRatio="xMidYMid meet">

<g transform="translate(0.000000,1000.000000) scale(0.100000,-0.100000)"
fill="#000000" stroke="none">
<path d="M4934 9086 c-40 -14 -62 -33 -80 -69 -22 -42 -21 -994 1 -1037 38
-73 174 -101 243 -50 19 14 43 42 53 62 18 35 19 65 19 510 0 471 0 473 -23
513 -38 69 -133 101 -213 71z"/>
<path d="M3702 8472 c-52 -28 -82 -81 -82 -147 0 -67 8 -80 125 -210 44 -49
107 -121 139 -160 165 -196 233 -268 278 -291 58 -29 66 -30 124 -2 67 31 104
86 104 154 0 60 -14 82 -149 235 -42 47 -95 108 -117 135 -23 27 -52 61 -65
75 -13 14 -57 65 -98 112 -41 47 -89 93 -107 102 -42 20 -111 19 -152 -3z"/>
<path d="M6145 8472 c-29 -18 -136 -133 -235 -252 -53 -64 -190 -222 -230
-265 -37 -41 -70 -108 -70 -142 0 -16 10 -49 23 -73 17 -36 33 -51 79 -73 57
-29 57 -29 107 -12 44 14 63 31 149 128 54 62 122 141 151 177 30 36 57 67 60
70 12 10 157 175 179 204 33 43 31 150 -2 188 -56 64 -151 86 -211 50z"/>
<path d="M2245 7400 c-188 -14 -374 -75 -585 -191 -222 -123 -464 -366 -577
-579 -13 -25 -28 -52 -33 -60 -74 -123 -137 -348 -161 -580 -10 -106 1 -310
22 -384 5 -17 9 -44 9 -60 0 -72 116 -366 181 -458 11 -14 19 -29 19 -33 0
-33 296 -355 326 -355 7 0 14 -4 16 -10 5 -17 139 -99 243 -150 106 -52 216
-91 303 -109 98 -20 92 -7 92 -215 0 -176 26 -472 50 -571 5 -22 12 -56 15
-75 8 -44 31 -129 56 -201 10 -31 19 -62 19 -69 0 -8 8 -32 19 -54 10 -23 30
-70 45 -106 76 -182 189 -363 319 -515 296 -344 701 -603 1162 -743 216 -66
521 -126 730 -143 335 -27 467 -31 653 -19 103 6 237 15 297 19 120 8 282 32
415 62 47 10 98 19 113 19 16 0 37 5 48 11 11 5 48 16 82 24 34 7 85 21 112
31 104 36 161 58 201 76 22 10 43 18 47 18 12 0 185 85 263 131 44 25 116 71
159 100 43 30 87 61 99 68 107 74 344 310 444 444 40 53 72 98 72 101 0 2 17
31 38 63 68 104 202 390 202 431 0 10 4 22 9 28 12 12 53 168 80 304 30 149
43 293 48 538 l5 214 33 14 c18 7 53 16 77 20 23 4 48 10 53 14 6 4 28 13 50
19 91 27 214 86 318 152 224 141 416 353 524 580 98 206 129 320 153 562 19
189 -20 467 -92 657 -144 382 -420 674 -811 859 -48 22 -93 41 -101 41 -7 0
-35 8 -62 19 -27 10 -92 29 -144 41 -84 20 -119 23 -325 22 -212 0 -238 -2
-330 -25 -55 -14 -131 -37 -170 -52 -38 -15 -84 -32 -101 -39 -18 -6 -38 -16
-45 -22 -8 -6 -27 -18 -44 -26 -79 -40 -121 -67 -205 -134 -69 -54 -225 -212
-255 -257 -21 -32 -26 -33 -84 -6 -25 12 -64 29 -86 40 -183 84 -514 183 -705
209 -41 6 -91 15 -110 20 -50 13 -318 30 -470 30 -159 0 -363 -16 -450 -35
-36 -8 -87 -17 -115 -20 -48 -7 -178 -36 -240 -55 -84 -26 -222 -71 -240 -79
-11 -4 -47 -19 -80 -31 -77 -30 -162 -66 -198 -85 -32 -17 -67 -20 -67 -6 0
16 -211 230 -274 279 -96 74 -124 92 -237 149 -204 102 -346 139 -569 146 -85
2 -200 1 -255 -3z m396 -331 c163 -33 302 -93 433 -184 97 -68 232 -206 299
-307 32 -48 70 -94 85 -104 38 -25 155 -24 185 3 28 24 183 99 302 146 180 70
201 77 214 77 8 0 39 8 70 19 77 26 221 57 376 82 111 17 173 20 418 20 159 0
305 -5 325 -10 21 -5 71 -14 112 -21 178 -28 372 -81 590 -161 65 -24 225
-102 279 -137 48 -30 63 -34 118 -34 78 1 105 20 179 131 65 97 213 245 301
303 74 48 228 128 248 128 6 0 25 6 41 14 61 30 229 56 359 56 202 0 365 -39
550 -131 285 -142 521 -410 616 -699 108 -331 69 -692 -109 -995 -79 -134
-217 -274 -366 -369 -63 -40 -221 -116 -242 -116 -8 0 -28 -7 -44 -15 -16 -8
-55 -19 -87 -24 -230 -37 -274 -55 -306 -124 -15 -30 -16 -58 -7 -238 18 -382
-25 -716 -128 -994 -63 -171 -182 -380 -298 -523 -59 -74 -186 -204 -244 -251
-25 -20 -54 -44 -65 -54 -26 -24 -178 -128 -235 -161 -25 -14 -88 -46 -140
-72 -52 -25 -106 -51 -120 -58 -34 -18 -216 -80 -315 -107 -114 -31 -197 -48
-410 -85 -126 -21 -452 -46 -625 -48 -376 -3 -837 62 -1105 155 -16 6 -50 17
-75 24 -72 21 -256 98 -320 135 -8 5 -40 21 -70 36 -63 31 -172 103 -277 181
-199 148 -392 374 -504 588 -118 228 -190 479 -220 775 -11 113 -7 483 7 597
5 42 2 62 -15 96 -37 77 -60 86 -318 127 -29 4 -67 15 -84 24 -18 9 -41 16
-52 16 -10 0 -36 8 -56 18 -20 10 -58 30 -86 43 -139 67 -301 202 -395 329
-150 203 -229 445 -230 705 0 331 117 613 355 850 175 176 364 280 615 339 96
22 103 23 243 25 95 1 154 -4 228 -20z"/>
<path d="M3464 5185 c-17 -8 -43 -28 -58 -45 l-26 -32 0 -265 c0 -249 1 -268
20 -298 38 -62 51 -65 244 -65 l175 0 36 34 37 35 -4 283 c-4 378 13 353 -253
362 -108 4 -147 2 -171 -9z"/>
<path d="M6174 5171 c-12 -5 -31 -22 -43 -37 -22 -28 -22 -32 -19 -309 l3
-281 25 -31 25 -32 189 0 188 -1 41 40 40 40 -5 253 c-6 260 -10 288 -53 342
-15 18 -29 20 -193 22 -97 1 -187 -2 -198 -6z"/>
<path d="M4935 5079 c-199 -25 -341 -112 -454 -278 -49 -71 -134 -238 -151
-296 -7 -22 -21 -59 -31 -83 -11 -23 -19 -50 -19 -60 0 -9 -7 -37 -15 -60 -9
-24 -20 -69 -25 -100 -5 -32 -16 -93 -25 -137 -12 -59 -16 -144 -17 -325 -1
-238 0 -247 25 -321 63 -188 164 -313 318 -394 86 -45 137 -61 274 -85 236
-42 492 -10 651 81 238 137 348 357 348 699 0 89 -21 335 -34 390 -6 25 -15
70 -20 100 -5 30 -15 71 -21 90 -6 19 -15 51 -19 70 -24 100 -107 282 -186
406 -59 94 -167 193 -265 242 -46 23 -93 42 -104 42 -12 0 -25 4 -30 9 -15 13
-132 19 -200 10z"/>
</g>
</svg>

```

--------------------------------------------------------------------------------
/docs/src/assets/logo-white.svg:
--------------------------------------------------------------------------------

```
<?xml version="1.0" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN"
 "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
<svg version="1.0" xmlns="http://www.w3.org/2000/svg"
 width="1000.000000pt" height="1000.000000pt" viewBox="0 0 1000.000000 1000.000000"
 preserveAspectRatio="xMidYMid meet">

<g transform="translate(0.000000,1000.000000) scale(0.100000,-0.100000)"
fill="#ffffff" stroke="none">
<path d="M4934 9086 c-40 -14 -62 -33 -80 -69 -22 -42 -21 -994 1 -1037 38
-73 174 -101 243 -50 19 14 43 42 53 62 18 35 19 65 19 510 0 471 0 473 -23
513 -38 69 -133 101 -213 71z"/>
<path d="M3702 8472 c-52 -28 -82 -81 -82 -147 0 -67 8 -80 125 -210 44 -49
107 -121 139 -160 165 -196 233 -268 278 -291 58 -29 66 -30 124 -2 67 31 104
86 104 154 0 60 -14 82 -149 235 -42 47 -95 108 -117 135 -23 27 -52 61 -65
75 -13 14 -57 65 -98 112 -41 47 -89 93 -107 102 -42 20 -111 19 -152 -3z"/>
<path d="M6145 8472 c-29 -18 -136 -133 -235 -252 -53 -64 -190 -222 -230
-265 -37 -41 -70 -108 -70 -142 0 -16 10 -49 23 -73 17 -36 33 -51 79 -73 57
-29 57 -29 107 -12 44 14 63 31 149 128 54 62 122 141 151 177 30 36 57 67 60
70 12 10 157 175 179 204 33 43 31 150 -2 188 -56 64 -151 86 -211 50z"/>
<path d="M2245 7400 c-188 -14 -374 -75 -585 -191 -222 -123 -464 -366 -577
-579 -13 -25 -28 -52 -33 -60 -74 -123 -137 -348 -161 -580 -10 -106 1 -310
22 -384 5 -17 9 -44 9 -60 0 -72 116 -366 181 -458 11 -14 19 -29 19 -33 0
-33 296 -355 326 -355 7 0 14 -4 16 -10 5 -17 139 -99 243 -150 106 -52 216
-91 303 -109 98 -20 92 -7 92 -215 0 -176 26 -472 50 -571 5 -22 12 -56 15
-75 8 -44 31 -129 56 -201 10 -31 19 -62 19 -69 0 -8 8 -32 19 -54 10 -23 30
-70 45 -106 76 -182 189 -363 319 -515 296 -344 701 -603 1162 -743 216 -66
521 -126 730 -143 335 -27 467 -31 653 -19 103 6 237 15 297 19 120 8 282 32
415 62 47 10 98 19 113 19 16 0 37 5 48 11 11 5 48 16 82 24 34 7 85 21 112
31 104 36 161 58 201 76 22 10 43 18 47 18 12 0 185 85 263 131 44 25 116 71
159 100 43 30 87 61 99 68 107 74 344 310 444 444 40 53 72 98 72 101 0 2 17
31 38 63 68 104 202 390 202 431 0 10 4 22 9 28 12 12 53 168 80 304 30 149
43 293 48 538 l5 214 33 14 c18 7 53 16 77 20 23 4 48 10 53 14 6 4 28 13 50
19 91 27 214 86 318 152 224 141 416 353 524 580 98 206 129 320 153 562 19
189 -20 467 -92 657 -144 382 -420 674 -811 859 -48 22 -93 41 -101 41 -7 0
-35 8 -62 19 -27 10 -92 29 -144 41 -84 20 -119 23 -325 22 -212 0 -238 -2
-330 -25 -55 -14 -131 -37 -170 -52 -38 -15 -84 -32 -101 -39 -18 -6 -38 -16
-45 -22 -8 -6 -27 -18 -44 -26 -79 -40 -121 -67 -205 -134 -69 -54 -225 -212
-255 -257 -21 -32 -26 -33 -84 -6 -25 12 -64 29 -86 40 -183 84 -514 183 -705
209 -41 6 -91 15 -110 20 -50 13 -318 30 -470 30 -159 0 -363 -16 -450 -35
-36 -8 -87 -17 -115 -20 -48 -7 -178 -36 -240 -55 -84 -26 -222 -71 -240 -79
-11 -4 -47 -19 -80 -31 -77 -30 -162 -66 -198 -85 -32 -17 -67 -20 -67 -6 0
16 -211 230 -274 279 -96 74 -124 92 -237 149 -204 102 -346 139 -569 146 -85
2 -200 1 -255 -3z m396 -331 c163 -33 302 -93 433 -184 97 -68 232 -206 299
-307 32 -48 70 -94 85 -104 38 -25 155 -24 185 3 28 24 183 99 302 146 180 70
201 77 214 77 8 0 39 8 70 19 77 26 221 57 376 82 111 17 173 20 418 20 159 0
305 -5 325 -10 21 -5 71 -14 112 -21 178 -28 372 -81 590 -161 65 -24 225
-102 279 -137 48 -30 63 -34 118 -34 78 1 105 20 179 131 65 97 213 245 301
303 74 48 228 128 248 128 6 0 25 6 41 14 61 30 229 56 359 56 202 0 365 -39
550 -131 285 -142 521 -410 616 -699 108 -331 69 -692 -109 -995 -79 -134
-217 -274 -366 -369 -63 -40 -221 -116 -242 -116 -8 0 -28 -7 -44 -15 -16 -8
-55 -19 -87 -24 -230 -37 -274 -55 -306 -124 -15 -30 -16 -58 -7 -238 18 -382
-25 -716 -128 -994 -63 -171 -182 -380 -298 -523 -59 -74 -186 -204 -244 -251
-25 -20 -54 -44 -65 -54 -26 -24 -178 -128 -235 -161 -25 -14 -88 -46 -140
-72 -52 -25 -106 -51 -120 -58 -34 -18 -216 -80 -315 -107 -114 -31 -197 -48
-410 -85 -126 -21 -452 -46 -625 -48 -376 -3 -837 62 -1105 155 -16 6 -50 17
-75 24 -72 21 -256 98 -320 135 -8 5 -40 21 -70 36 -63 31 -172 103 -277 181
-199 148 -392 374 -504 588 -118 228 -190 479 -220 775 -11 113 -7 483 7 597
5 42 2 62 -15 96 -37 77 -60 86 -318 127 -29 4 -67 15 -84 24 -18 9 -41 16
-52 16 -10 0 -36 8 -56 18 -20 10 -58 30 -86 43 -139 67 -301 202 -395 329
-150 203 -229 445 -230 705 0 331 117 613 355 850 175 176 364 280 615 339 96
22 103 23 243 25 95 1 154 -4 228 -20z"/>
<path d="M3464 5185 c-17 -8 -43 -28 -58 -45 l-26 -32 0 -265 c0 -249 1 -268
20 -298 38 -62 51 -65 244 -65 l175 0 36 34 37 35 -4 283 c-4 378 13 353 -253
362 -108 4 -147 2 -171 -9z"/>
<path d="M6174 5171 c-12 -5 -31 -22 -43 -37 -22 -28 -22 -32 -19 -309 l3
-281 25 -31 25 -32 189 0 188 -1 41 40 40 40 -5 253 c-6 260 -10 288 -53 342
-15 18 -29 20 -193 22 -97 1 -187 -2 -198 -6z"/>
<path d="M4935 5079 c-199 -25 -341 -112 -454 -278 -49 -71 -134 -238 -151
-296 -7 -22 -21 -59 -31 -83 -11 -23 -19 -50 -19 -60 0 -9 -7 -37 -15 -60 -9
-24 -20 -69 -25 -100 -5 -32 -16 -93 -25 -137 -12 -59 -16 -144 -17 -325 -1
-238 0 -247 25 -321 63 -188 164 -313 318 -394 86 -45 137 -61 274 -85 236
-42 492 -10 651 81 238 137 348 357 348 699 0 89 -21 335 -34 390 -6 25 -15
70 -20 100 -5 30 -15 71 -21 90 -6 19 -15 51 -19 70 -24 100 -107 282 -186
406 -59 94 -167 193 -265 242 -46 23 -93 42 -104 42 -12 0 -25 4 -30 9 -15 13
-132 19 -200 10z"/>
</g>
</svg>

```

--------------------------------------------------------------------------------
/scripts/build.ps1:
--------------------------------------------------------------------------------

```
# PowerShell Build Script for CUA
# Exit on error
$ErrorActionPreference = "Stop"

# Colors for output
$RED = "Red"
$GREEN = "Green"
$BLUE = "Blue"

# Function to print step information
function Print-Step {
    param([string]$Message)
    Write-Host "==> $Message" -ForegroundColor $BLUE
}

# Function to print success message
function Print-Success {
    param([string]$Message)
    Write-Host "==> Success: $Message" -ForegroundColor $GREEN
}

# Function to print error message
function Print-Error {
    param([string]$Message)
    Write-Host "==> Error: $Message" -ForegroundColor $RED
}

# Get the script's directory and project root
$SCRIPT_DIR = Split-Path -Parent $MyInvocation.MyCommand.Path
$PROJECT_ROOT = Split-Path -Parent $SCRIPT_DIR

# Change to project root
Set-Location $PROJECT_ROOT

# Load environment variables from .env.local
if (Test-Path ".env.local") {
    Print-Step "Loading environment variables from .env.local..."
    Get-Content ".env.local" | ForEach-Object {
        if ($_ -match "^([^#][^=]*?)=(.*)$") {
            [Environment]::SetEnvironmentVariable($matches[1], $matches[2], "Process")
        }
    }
    Print-Success "Environment variables loaded"
} else {
    Print-Error ".env.local file not found"
    exit 1
}

# Check if conda is available
try {
    conda --version | Out-Null
    Print-Success "Conda is available"
} catch {
    Print-Error "Conda is not available. Please install Anaconda or Miniconda first."
    exit 1
}

# Create or update conda environment
Print-Step "Creating/updating conda environment 'cua' with Python 3.12..."
try {
    # Check if environment exists
    $envExists = conda env list | Select-String "^cua\s"
    if ($envExists) {
        Print-Step "Environment 'cua' already exists. Updating..."
        conda env update -n cua -f environment.yml --prune
    } else {
        Print-Step "Creating new environment 'cua'..."
        conda create -n cua python=3.12 -y
    }
    Print-Success "Conda environment 'cua' ready"
} catch {
    Print-Error "Failed to create/update conda environment"
    exit 1
}

# Activate conda environment
Print-Step "Activating conda environment 'cua'..."
try {
    conda activate cua
    Print-Success "Environment activated"
} catch {
    Print-Error "Failed to activate conda environment 'cua'"
    Print-Step "Please run: conda activate cua"
    Print-Step "Then re-run this script"
    exit 1
}

# Clean up existing environments and cache
Print-Step "Cleaning up existing environments..."
Get-ChildItem -Path . -Recurse -Directory -Name "__pycache__" | ForEach-Object { Remove-Item -Path $_ -Recurse -Force }
Get-ChildItem -Path . -Recurse -Directory -Name ".pytest_cache" | ForEach-Object { Remove-Item -Path $_ -Recurse -Force }
Get-ChildItem -Path . -Recurse -Directory -Name "dist" | ForEach-Object { Remove-Item -Path $_ -Recurse -Force }
Get-ChildItem -Path . -Recurse -Directory -Name "*.egg-info" | ForEach-Object { Remove-Item -Path $_ -Recurse -Force }

# Function to install a package and its dependencies
function Install-Package {
    param(
        [string]$PackageDir,
        [string]$PackageName,
        [string]$Extras = ""
    )
    
    Print-Step "Installing $PackageName..."
    Set-Location $PackageDir
    
    if (Test-Path "pyproject.toml") {
        if ($Extras) {
            pip install -e ".[$Extras]"
        } else {
            pip install -e .
        }
    } else {
        Print-Error "No pyproject.toml found in $PackageDir"
        Set-Location $PROJECT_ROOT
        return $false
    }
    
    Set-Location $PROJECT_ROOT
    return $true
}

# Install packages in order of dependency
Print-Step "Installing packages in development mode..."

# Install core first (base package with telemetry support)
if (-not (Install-Package "libs/python/core" "core")) { exit 1 }

# Install pylume (base dependency)
if (-not (Install-Package "libs/python/pylume" "pylume")) { exit 1 }

# Install computer with all its dependencies and extras
if (-not (Install-Package "libs/python/computer" "computer" "all")) { exit 1 }

# Install omniparser
if (-not (Install-Package "libs/python/som" "som")) { exit 1 }

# Install agent with all its dependencies and extras
if (-not (Install-Package "libs/python/agent" "agent" "all")) { exit 1 }

# Install computer-server
if (-not (Install-Package "libs/python/computer-server" "computer-server")) { exit 1 }

# Install mcp-server
if (-not (Install-Package "libs/python/mcp-server" "mcp-server")) { exit 1 }

# Install development tools from root project
Print-Step "Installing development dependencies..."
pip install -e ".[dev,test,docs]"

# Create a .env file for VS Code to use the virtual environment
Print-Step "Creating .env file for VS Code..."
$pythonPath = "$PROJECT_ROOT/libs/python/core;$PROJECT_ROOT/libs/python/computer;$PROJECT_ROOT/libs/python/agent;$PROJECT_ROOT/libs/python/som;$PROJECT_ROOT/libs/python/pylume;$PROJECT_ROOT/libs/python/computer-server;$PROJECT_ROOT/libs/python/mcp-server"
"PYTHONPATH=$pythonPath" | Out-File -FilePath ".env" -Encoding UTF8

Print-Success "All packages installed successfully!"
Print-Step "Your conda environment 'cua' is ready. To activate it:"
Write-Host "  conda activate cua" -ForegroundColor Yellow

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/integrations/hud.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: HUD Evals
description: Use ComputerAgent with HUD for benchmarking and evaluation
---

<Callout>A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/eval_osworld.ipynb" target="_blank">Jupyter Notebook</a> is available for this documentation.</Callout>

The HUD integration allows an agent to be benchmarked using the [HUD framework](https://www.hud.so/). Through the HUD integration, the agent controls a computer inside HUD, where tests are run to evaluate the success of each task.

## Installation

First, install the required package:

```bash
pip install "cua-agent[hud]"
## or install hud-python directly
# pip install hud-python==0.4.12
```

## Environment Variables

Before running any evaluations, you’ll need to set up your environment variables for HUD and your model providers:

```bash
# HUD access
export HUD_API_KEY="your_hud_api_key"

# Model provider keys (at least one required)
export OPENAI_API_KEY="your_openai_key"
export ANTHROPIC_API_KEY="your_anthropic_key"
```

## Running a Single Task

You can run a single task from a HUD dataset for quick verification.

### Example

```python
from agent.integrations.hud import run_single_task

await run_single_task(
    dataset="hud-evals/OSWorld-Verified",   # or another HUD dataset
    model="openai/computer-use-preview+openai/gpt-5-nano",  # any supported model string
    task_id=155,  # e.g., reopen last closed tab
)
```

### Parameters

- `task_id` (`int`): Default: `0`
  Index of the task to run from the dataset.

## Running a Full Dataset

To benchmark your agent at scale, you can run an entire dataset (or a subset) in parallel.

### Example

```python
from agent.integrations.hud import run_full_dataset

results = await run_full_dataset(
    dataset="hud-evals/OSWorld-Verified",   # can also pass a Dataset or list[dict]
    model="openai/computer-use-preview",
    split="train[:3]",           # try a few tasks to start
    max_concurrent=20,            # tune to your infra
    max_steps=50                  # safety cap per task
)
```

### Parameters

- `job_name` (`str` | `None`):
  Optional human-readable name for the evaluation job (shows up in HUD UI).
- `max_concurrent` (`int`): Default: `30`
  Number of tasks to run in parallel. Scale this based on your infra.
- `max_steps` (`int`): Default: `50`
  Safety cap on steps per task to prevent infinite loops.
- `split` (`str`): Default: `"train"`
  Dataset split or subset to run. Uses the [Hugging Face split format](https://huggingface.co/docs/datasets/v1.11.0/splits.html), e.g., `"train[:10]"` for the first 10 tasks.

## Additional Parameters

Both single-task and full-dataset runs share a common set of configuration options. These let you fine-tune how the evaluation runs.

- `dataset` (`str` | `Dataset` | `list[dict]`): **Required**
  HUD dataset name (e.g. `"hud-evals/OSWorld-Verified"`), a loaded `Dataset`, or a list of tasks.
- `model` (`str`): Default: `"computer-use-preview"`
  Model string, e.g. `"openai/computer-use-preview+openai/gpt-5-nano"`. Supports composition with `+` (planning + grounding).
- `allowed_tools` (`list[str]`): Default: `["openai_computer"]`
  Restrict which tools the agent may use.
- `tools` (`list[Any]`):
  Extra tool configs to inject.
- `custom_loop` (`Callable`):
  Optional custom agent loop function. If provided, overrides automatic loop selection.
- `only_n_most_recent_images` (`int`): Default: `5` for full dataset, `None` for single task.
  Retain only the last N screenshots in memory.
- `callbacks` (`list[Any]`):
  Hook functions for logging, telemetry, or side effects.
- `verbosity` (`int`):
  Logging level. Set `2` for debugging every call/action.
- `trajectory_dir` (`str` | `dict`):
  Save local copies of trajectories for replay/analysis.
- `max_retries` (`int`): Default: `3`
  Number of retries for failed model/tool calls.
- `screenshot_delay` (`float` | `int`): Default: `0.5`
  Delay (seconds) between screenshots to avoid race conditions.
- `use_prompt_caching` (`bool`): Default: `False`
  Cache repeated prompts to reduce API calls.
- `max_trajectory_budget` (`float` | `dict`):
  Limit on trajectory size/budget (e.g., tokens, steps).
- `telemetry_enabled` (`bool`): Default: `True`
  Whether to send telemetry/traces to HUD.
- `**kwargs` (`any`):
  Any additional keyword arguments are passed through to the agent loop or model provider.

## Available Benchmarks

HUD provides multiple benchmark datasets for realistic evaluation.

1. **[OSWorld-Verified](/agent-sdk/benchmarks/osworld-verified)** – Benchmark on 369+ real-world desktop tasks across Chrome, LibreOffice, GIMP, VS Code, etc.
   *Best for*: evaluating full computer-use agents in realistic environments.
   *Verified variant*: fixes 300+ issues from earlier versions for reliability.

**Coming soon:** SheetBench (spreadsheet automation) and other specialized HUD datasets.

See the [HUD docs](https://docs.hud.so/environment-creation) for more eval environments.

## Tips

* **Debugging:** set `verbosity=2` to see every model call and tool action.
* **Performance:** lower `screenshot_delay` for faster runs; raise it if you see race conditions.
* **Safety:** always set `max_steps` (defaults to 50) to prevent runaway loops.
* **Custom tools:** pass extra `tools=[...]` into the agent config if you need beyond `openai_computer`.
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/message-format.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Message Format
---

This page documents the Python message and response schema used by the Agent SDK.
It mirrors the structure shown in Chat History and provides precise type definitions you can target in your own code.

All examples below use Python type hints with `TypedDict` and `Literal` from the standard `typing` module.

## Response

The agent yields response chunks as an async generator of objects with `output` and `usage`.

```python
from typing import List, TypedDict

class Usage(TypedDict, total=False):
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int
    response_cost: float  # USD cost if available

class AgentResponse(TypedDict):
    output: List["AgentMessage"]
    usage: Usage
```

## Messages

Agent messages represent the state of the conversation and the agent's actions.

```python
from typing import List, Literal, Optional, TypedDict, Union

# Union of all message variants
AgentMessage = Union[
    "UserMessage",
    "AssistantMessage",
    "ReasoningMessage",
    "ComputerCallMessage",
    "ComputerCallOutputMessage",
    "FunctionCallMessage",
    "FunctionCallOutputMessage",
]

# Input message (role: user/system/developer)
class UserMessage(TypedDict, total=False):
    type: Literal["message"]  # optional for user input
    role: Literal["user", "system", "developer"]
    content: Union[str, List["InputContent"]]

# Output message (assistant text)
class AssistantMessage(TypedDict):
    type: Literal["message"]
    role: Literal["assistant"]
    content: List["OutputContent"]

# Output reasoning/thinking message
class ReasoningMessage(TypedDict):
    type: Literal["reasoning"]
    summary: List["SummaryContent"]

# Output computer action call (agent intends to act)
class ComputerCallMessage(TypedDict):
    type: Literal["computer_call"]
    call_id: str
    status: Literal["completed", "failed", "pending"]
    action: "ComputerAction"

# Output computer action result (always a screenshot)
class ComputerCallOutputMessage(TypedDict):
    type: Literal["computer_call_output"]
    call_id: str
    output: "ComputerResultContent"

# Output function call (agent calls a Python tool)
class FunctionCallMessage(TypedDict):
    type: Literal["function_call"]
    call_id: str
    status: Literal["completed", "failed", "pending"]
    name: str
    arguments: str  # JSON-serialized kwargs

# Output function call result (text)
class FunctionCallOutputMessage(TypedDict):
    type: Literal["function_call_output"]
    call_id: str
    output: str
```

## Message Content

These content items appear inside `content` arrays for the message types above.

```python
# Input content kinds
class InputContent(TypedDict):
    type: Literal["input_image", "input_text"]
    text: Optional[str]
    image_url: Optional[str]  # e.g., data URL

# Assistant output content
class OutputContent(TypedDict):
    type: Literal["output_text"]
    text: str

# Reasoning/summary output content
class SummaryContent(TypedDict):
    type: Literal["summary_text"]
    text: str

# Computer call outputs (screenshots)
class ComputerResultContent(TypedDict):
    type: Literal["computer_screenshot", "input_image"]
    image_url: str  # data URL (e.g., "data:image/png;base64,....")
```

## Actions

Computer actions represent concrete operations the agent will perform on the computer.

Two broad families exist depending on the provider: OpenAI-style and Anthropic-style.

```python
# Union of all supported computer actions
ComputerAction = Union[
    "ClickAction",
    "DoubleClickAction",
    "DragAction",
    "KeyPressAction",
    "MoveAction",
    "ScreenshotAction",
    "ScrollAction",
    "TypeAction",
    "WaitAction",
    # Anthropic variants
    "LeftMouseDownAction",
    "LeftMouseUpAction",
]

# OpenAI Computer Actions
class ClickAction(TypedDict):
    type: Literal["click"]
    button: Literal["left", "right", "wheel", "back", "forward"]
    x: int
    y: int

class DoubleClickAction(TypedDict, total=False):
    type: Literal["double_click"]
    button: Literal["left", "right", "wheel", "back", "forward"]
    x: int
    y: int

class DragAction(TypedDict, total=False):
    type: Literal["drag"]
    button: Literal["left", "right", "wheel", "back", "forward"]
    path: List[tuple[int, int]]  # [(x1, y1), (x2, y2), ...]

class KeyPressAction(TypedDict):
    type: Literal["keypress"]
    keys: List[str]  # e.g., ["ctrl", "a"]

class MoveAction(TypedDict):
    type: Literal["move"]
    x: int
    y: int

class ScreenshotAction(TypedDict):
    type: Literal["screenshot"]

class ScrollAction(TypedDict):
    type: Literal["scroll"]
    scroll_x: int
    scroll_y: int
    x: int
    y: int

class TypeAction(TypedDict):
    type: Literal["type"]
    text: str

class WaitAction(TypedDict):
    type: Literal["wait"]

# Anthropic Computer Actions
class LeftMouseDownAction(TypedDict):
    type: Literal["left_mouse_down"]
    x: int
    y: int

class LeftMouseUpAction(TypedDict):
    type: Literal["left_mouse_up"]
    x: int
    y: int
```

## Notes

- The agent runtime may add provider-specific fields when available (e.g., usage cost). Unknown fields should be ignored for forward compatibility.
- Computer action outputs are screenshots as data URLs. For security and storage, some serializers may redact or omit large fields in persisted metadata.
- The message flow typically alternates between reasoning, actions, screenshots, and concluding assistant text. See [Chat History](./chat-history) for a step-by-step example.

```

--------------------------------------------------------------------------------
/libs/typescript/agent/src/client.ts:
--------------------------------------------------------------------------------

```typescript
import {Peer}  from "peerjs";
import type {
  AgentRequest,
  AgentResponse,
  ConnectionType,
  AgentClientOptions,
} from "./types";

export class AgentClient {
  private url: string;
  private connectionType: ConnectionType;
  private options: AgentClientOptions;
  private peer?: Peer;
  private connection?: any;

  constructor(url: string, options: AgentClientOptions = {}) {
    this.url = url;
    this.options = {
      timeout: 30000,
      retries: 3,
      ...options,
    };

    // Determine connection type from URL
    if (url.startsWith("http://") || url.startsWith("https://")) {
      this.connectionType = url.startsWith("https://") ? "https" : "http";
    } else if (url.startsWith("peer://")) {
      this.connectionType = "peer";
    } else {
      throw new Error(
        "Invalid URL format. Must start with http://, https://, or peer://"
      );
    }
  }

  // Main responses API matching the desired usage pattern
  public responses = {
    create: async (request: AgentRequest): Promise<AgentResponse> => {
      return this.sendRequest(request);
    },
  };

  private async sendRequest(request: AgentRequest): Promise<AgentResponse> {
    switch (this.connectionType) {
      case "http":
      case "https":
        return this.sendHttpRequest(request);
      case "peer":
        return this.sendPeerRequest(request);
      default:
        throw new Error(`Unsupported connection type: ${this.connectionType}`);
    }
  }

  private async sendHttpRequest(request: AgentRequest): Promise<AgentResponse> {
    const controller = new AbortController();
    const timeoutId = setTimeout(
      () => controller.abort(),
      this.options.timeout
    );

    try {
      const headers: Record<string, string> = {
        "Content-Type": "application/json",
      };
      if (this.options.apiKey) {
        headers["X-API-Key"] = this.options.apiKey;
      }

      const response = await fetch(`${this.url}/responses`, {
        method: "POST",
        headers,
        body: JSON.stringify(request),
        signal: controller.signal,
      });

      clearTimeout(timeoutId);

      if (!response.ok) {
        throw new Error(`HTTP error! status: ${response.status}`);
      }

      const data = await response.json();
      return data as AgentResponse;
    } catch (error) {
      clearTimeout(timeoutId);
      if (error instanceof Error) {
        throw new Error(`Failed to send HTTP request: ${error.message}`);
      }
      throw error;
    }
  }

  private async sendPeerRequest(request: AgentRequest): Promise<AgentResponse> {
    // Extract peer ID from peer:// URL
    const peerId = this.url.replace("peer://", "");

    if (!this.peer) {
      // Initialize peer connection with default options as requested
      this.peer = new Peer();

      return new Promise<AgentResponse>((resolve, reject) => {
        const timeout = setTimeout(() => {
          reject(new Error("Peer connection timeout"));
        }, this.options.timeout);

        this.peer!.on("open", () => {
          // Connect to the target peer
          this.connection = this.peer!.connect(peerId);

          this.connection.on("open", () => {
            // Send the request
            this.connection!.send(JSON.stringify(request));
          });

          this.connection.on("data", (data: any) => {
            clearTimeout(timeout);
            try {
              const response =
                typeof data === "string" ? JSON.parse(data) : data;
              resolve(response as AgentResponse);
            } catch (error) {
              reject(new Error("Failed to parse peer response"));
            }
          });

          this.connection.on("error", (error: any) => {
            clearTimeout(timeout);
            reject(new Error(`Peer connection error: ${error}`));
          });
        });

        this.peer!.on("error", (error: any) => {
          clearTimeout(timeout);
          reject(new Error(`Peer error: ${error}`));
        });
      });
    } else {
      // Reuse existing connection
      return new Promise<AgentResponse>((resolve, reject) => {
        const timeout = setTimeout(() => {
          reject(new Error("Peer request timeout"));
        }, this.options.timeout);

        if (this.connection && this.connection.open) {
          this.connection.send(JSON.stringify(request));

          const handleData = (data: any) => {
            clearTimeout(timeout);
            this.connection!.off("data", handleData);
            try {
              const response =
                typeof data === "string" ? JSON.parse(data) : data;
              resolve(response as AgentResponse);
            } catch (error) {
              reject(new Error("Failed to parse peer response"));
            }
          };

          this.connection.on("data", handleData);
        } else {
          clearTimeout(timeout);
          reject(new Error("Peer connection not available"));
        }
      });
    }
  }

  // Health check method
  async health(): Promise<{ status: string }> {
    if (this.connectionType === "peer") {
      return { status: this.peer?.open ? "connected" : "disconnected" };
    }

    try {
      const response = await fetch(`${this.url}/health`);
      if (response.ok) {
        return { status: "healthy" };
      }
      return { status: "unhealthy" };
    } catch {
      return { status: "unreachable" };
    }
  }

  // Clean up resources
  async disconnect(): Promise<void> {
    if (this.connection) {
      this.connection.close();
      this.connection = undefined;
    }
    if (this.peer) {
      this.peer.destroy();
      this.peer = undefined;
    }
  }
}

```

--------------------------------------------------------------------------------
/scripts/build-uv.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash

# Exit on error
set -e

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Function to print step information
print_step() {
    echo -e "${BLUE}==> $1${NC}"
}

# Function to print success message
print_success() {
    echo -e "${GREEN}==> Success: $1${NC}"
}

# Function to print error message
print_error() {
    echo -e "${RED}==> Error: $1${NC}" >&2
}

# Function to print warning message
print_warning() {
    echo -e "${YELLOW}==> Warning: $1${NC}"
}

# Function to check if UV is installed
check_uv() {
    if command -v uv &> /dev/null; then
        print_success "UV is already installed"
        uv --version
        return 0
    else
        return 1
    fi
}

# Function to install UV
install_uv() {
    print_step "UV not found. Installing UV..."
    
    # Detect OS
    if [[ "$OSTYPE" == "linux-gnu"* ]] || [[ "$OSTYPE" == "darwin"* ]]; then
        print_step "Installing UV for Unix-like system..."
        curl -LsSf https://astral.sh/uv/install.sh | sh
        
        # Add UV to PATH for current session
        export PATH="$HOME/.cargo/bin:$PATH"
        
        # Check if installation was successful
        if command -v uv &> /dev/null; then
            print_success "UV installed successfully"
            uv --version
        else
            print_error "UV installation failed"
            print_step "Please restart your terminal and try again, or install manually:"
            echo "  curl -LsSf https://astral.sh/uv/install.sh | sh"
            exit 1
        fi
    elif [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]]; then
        print_error "For Windows, please use PowerShell and run:"
        echo "  powershell -ExecutionPolicy ByPass -c \"irm https://astral.sh/uv/install.ps1 | iex\""
        exit 1
    else
        print_error "Unsupported operating system: $OSTYPE"
        print_step "Please install UV manually from: https://docs.astral.sh/uv/getting-started/installation/"
        exit 1
    fi
}

# Get the script's directory
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
PROJECT_ROOT="$( cd "${SCRIPT_DIR}/.." && pwd )"

# Change to project root
cd "$PROJECT_ROOT"

# Check if UV is installed, install if not
if ! check_uv; then
    install_uv
fi

# Load environment variables from .env.local
if [ -f .env.local ]; then
    print_step "Loading environment variables from .env.local..."
    set -a
    source .env.local
    set +a
    print_success "Environment variables loaded"
else
    print_error ".env.local file not found"
    exit 1
fi

# Clean up existing environments and cache
print_step "Cleaning up existing environments..."
find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
find . -type d -name ".pytest_cache" -exec rm -rf {} + 2>/dev/null || true
find . -type d -name "dist" -exec rm -rf {} + 2>/dev/null || true
find . -type d -name ".venv" -exec rm -rf {} + 2>/dev/null || true
find . -type d -name "*.egg-info" -exec rm -rf {} + 2>/dev/null || true
print_success "Environment cleanup complete"

# Install Python 3.12 using UV
print_step "Installing Python 3.12 using UV..."
uv python install 3.12
print_success "Python 3.12 installed"

# Create virtual environment using UV
print_step "Creating virtual environment with UV..."
uv venv .venv --python 3.12
print_success "Virtual environment created"

# Activate virtual environment
print_step "Activating virtual environment..."
source .venv/bin/activate
print_success "Virtual environment activated"

# Function to install a package and its dependencies using UV
install_package() {
    local package_dir=$1
    local package_name=$2
    local extras=$3
    print_step "Installing ${package_name} with UV..."
    cd "$package_dir"
    
    if [ -f "pyproject.toml" ]; then
        if [ -n "$extras" ]; then
            uv pip install -e ".[${extras}]"
        else
            uv pip install -e .
        fi
    else
        print_error "No pyproject.toml found in ${package_dir}"
        return 1
    fi
    
    cd "$PROJECT_ROOT"
}

# Install packages in order of dependency
print_step "Installing packages in development mode with UV..."

# Install core first (base package with telemetry support)
install_package "libs/python/core" "core"

# Install pylume (base dependency)
install_package "libs/python/pylume" "pylume"

# Install computer with all its dependencies and extras
install_package "libs/python/computer" "computer" "all"

# Install omniparser
install_package "libs/python/som" "som"

# Install agent with all its dependencies and extras
install_package "libs/python/agent" "agent" "all"

# Install computer-server
install_package "libs/python/computer-server" "computer-server"

# Install mcp-server
install_package "libs/python/mcp-server" "mcp-server"

# Install development tools from root project
print_step "Installing development dependencies with UV..."
uv pip install -e ".[dev,test,docs]"

# Create a .env file for VS Code to use the virtual environment
print_step "Creating .env file for VS Code..."
echo "PYTHONPATH=${PROJECT_ROOT}/libs/python/core:${PROJECT_ROOT}/libs/python/computer:${PROJECT_ROOT}/libs/python/agent:${PROJECT_ROOT}/libs/python/som:${PROJECT_ROOT}/libs/python/pylume:${PROJECT_ROOT}/libs/python/computer-server:${PROJECT_ROOT}/libs/python/mcp-server" > .env

print_success "All packages installed successfully with UV!"
print_step "Your virtual environment is ready. To activate it:"
echo "  source .venv/bin/activate"
print_step "UV provides fast dependency resolution and installation."
print_step "You can also use 'uv run' to run commands in the virtual environment without activation."

```

--------------------------------------------------------------------------------
/libs/python/computer/computer/providers/winsandbox/setup_script.ps1:
--------------------------------------------------------------------------------

```
# Setup script for Windows Sandbox CUA Computer provider
# This script runs when the sandbox starts

Write-Host "Starting CUA Computer setup in Windows Sandbox..."

# Function to find the mapped Python installation from pywinsandbox
function Find-MappedPython {
    Write-Host "Looking for mapped Python installation from pywinsandbox..."
    
    # pywinsandbox maps the host Python installation to the sandbox
    # Look for mapped shared folders on the desktop (common pywinsandbox pattern)
    $desktopPath = "C:\Users\WDAGUtilityAccount\Desktop"
    $sharedFolders = Get-ChildItem -Path $desktopPath -Directory -ErrorAction SilentlyContinue
    
    foreach ($folder in $sharedFolders) {
        # Look for Python executables in shared folders
        $pythonPaths = @(
            "$($folder.FullName)\python.exe",
            "$($folder.FullName)\Scripts\python.exe",
            "$($folder.FullName)\bin\python.exe"
        )
        
        foreach ($pythonPath in $pythonPaths) {
            if (Test-Path $pythonPath) {
                try {
                    $version = & $pythonPath --version 2>&1
                    if ($version -match "Python") {
                        Write-Host "Found mapped Python: $pythonPath - $version"
                        return $pythonPath
                    }
                } catch {
                    continue
                }
            }
        }
        
        # Also check subdirectories that might contain Python
        $subDirs = Get-ChildItem -Path $folder.FullName -Directory -ErrorAction SilentlyContinue
        foreach ($subDir in $subDirs) {
            $pythonPath = "$($subDir.FullName)\python.exe"
            if (Test-Path $pythonPath) {
                try {
                    $version = & $pythonPath --version 2>&1
                    if ($version -match "Python") {
                        Write-Host "Found mapped Python in subdirectory: $pythonPath - $version"
                        return $pythonPath
                    }
                } catch {
                    continue
                }
            }
        }
    }
    
    # Fallback: try common Python commands that might be available
    $pythonCommands = @("python", "py", "python3")
    foreach ($cmd in $pythonCommands) {
        try {
            $version = & $cmd --version 2>&1
            if ($version -match "Python") {
                Write-Host "Found Python via command '$cmd': $version"
                return $cmd
            }
        } catch {
            continue
        }
    }
    
    throw "Could not find any Python installation (mapped or otherwise)"
}

try {
    # Step 1: Find the mapped Python installation
    Write-Host "Step 1: Finding mapped Python installation..."
    $pythonExe = Find-MappedPython
    Write-Host "Using Python: $pythonExe"
    
    # Verify Python works and show version
    $pythonVersion = & $pythonExe --version 2>&1
    Write-Host "Python version: $pythonVersion"

    # Step 2: Create a dedicated virtual environment in mapped Desktop folder (persistent)
    Write-Host "Step 2: Creating virtual environment (if needed)..."
    $cachePath = "C:\Users\WDAGUtilityAccount\Desktop\wsb_cache"
    $venvPath = "C:\Users\WDAGUtilityAccount\Desktop\wsb_cache\venv"
    if (!(Test-Path $venvPath)) {
        Write-Host "Creating venv at: $venvPath"
        & $pythonExe -m venv $venvPath
    } else {
        Write-Host "Venv already exists at: $venvPath"
    }
    # Hide the folder to keep Desktop clean
    try {
        $item = Get-Item $cachePath -ErrorAction SilentlyContinue
        if ($item) {
            if (-not ($item.Attributes -band [IO.FileAttributes]::Hidden)) {
                $item.Attributes = $item.Attributes -bor [IO.FileAttributes]::Hidden
            }
        }
    } catch { }
    $venvPython = Join-Path $venvPath "Scripts\python.exe"
    if (!(Test-Path $venvPython)) {
        throw "Virtual environment Python not found at $venvPython"
    }
    Write-Host "Using venv Python: $venvPython"

    # Step 3: Install cua-computer-server into the venv
    Write-Host "Step 3: Installing cua-computer-server..."
    
    Write-Host "Upgrading pip..."
    & $venvPython -m pip install --upgrade pip --quiet
    
    Write-Host "Installing cua-computer-server..."
    & $venvPython -m pip install cua-computer-server
    
    Write-Host "cua-computer-server installation completed."

    # Step 4: Start computer server in background using the venv Python
    Write-Host "Step 4: Starting computer server in background..."
    Write-Host "Starting computer server with: $venvPython"
    
    # Start the computer server in the background
    $serverProcess = Start-Process -FilePath $venvPython -ArgumentList "-m", "computer_server.main" -WindowStyle Hidden -PassThru
    Write-Host "Computer server started in background with PID: $($serverProcess.Id)"
    
    # Give it a moment to start
    Start-Sleep -Seconds 3
    
    # Check if the process is still running
    if (Get-Process -Id $serverProcess.Id -ErrorAction SilentlyContinue) {
        Write-Host "Computer server is running successfully in background"
    } else {
        throw "Computer server failed to start or exited immediately"
    }

} catch {
    Write-Error "Setup failed: $_"
    Write-Host "Error details: $($_.Exception.Message)"
    Write-Host "Stack trace: $($_.ScriptStackTrace)"
    Write-Host ""
    Write-Host "Press any key to close this window..."
    $null = $Host.UI.RawUI.ReadKey("NoEcho,IncludeKeyDown")
    exit 1
}

Write-Host ""
Write-Host "Setup completed successfully!"
Write-Host "Press any key to close this window..."
$null = $Host.UI.RawUI.ReadKey("NoEcho,IncludeKeyDown")

```

--------------------------------------------------------------------------------
/libs/python/som/som/ocr.py:
--------------------------------------------------------------------------------

```python
from typing import List, Dict, Any, Tuple, Union
import logging
import signal
from contextlib import contextmanager
from pathlib import Path
import easyocr
from PIL import Image
import numpy as np
import torch

logger = logging.getLogger(__name__)


class TimeoutException(Exception):
    pass


@contextmanager
def timeout(seconds: int):
    import threading
    
    # Check if we're in the main thread
    if threading.current_thread() is threading.main_thread():
        def timeout_handler(signum, frame):
            raise TimeoutException("OCR process timed out")

        original_handler = signal.signal(signal.SIGALRM, timeout_handler)
        signal.alarm(seconds)

        try:
            yield
        finally:
            signal.alarm(0)
            signal.signal(signal.SIGALRM, original_handler)
    else:
        # In a non-main thread, we can't use signal
        logger.warning("Timeout function called from non-main thread; signal-based timeout disabled")
        try:
            yield
        finally:
            pass


class OCRProcessor:
    """Class for handling OCR text detection."""

    _shared_reader = None  # Class-level shared reader instance

    def __init__(self):
        """Initialize the OCR processor."""
        self.reader = None
        # Determine best available device
        self.device = "cpu"
        if torch.cuda.is_available():
            self.device = "cuda"
        elif (
            hasattr(torch, "backends")
            and hasattr(torch.backends, "mps")
            and torch.backends.mps.is_available()
        ):
            self.device = "mps"
        logger.info(f"OCR processor initialized with device: {self.device}")

    def _ensure_reader(self):
        """Ensure EasyOCR reader is initialized.

        Uses a class-level cached reader to avoid reinitializing on every instance.
        """
        # First check if we already have a class-level reader
        if OCRProcessor._shared_reader is not None:
            self.reader = OCRProcessor._shared_reader
            return

        # Otherwise initialize a new one
        if self.reader is None:
            try:
                logger.info("Initializing EasyOCR reader...")
                import easyocr

                # Use GPU if available
                use_gpu = self.device in ["cuda", "mps"]
                self.reader = easyocr.Reader(["en"], gpu=use_gpu)
                
                # Verify reader initialization
                if self.reader is None:
                    raise ValueError("Failed to initialize EasyOCR reader")

                # Cache the reader at class level
                OCRProcessor._shared_reader = self.reader

                logger.info(f"EasyOCR reader initialized successfully with GPU={use_gpu}")
            except Exception as e:
                logger.error(f"Failed to initialize EasyOCR reader: {str(e)}")
                # Set to a placeholder that will be checked
                self.reader = None
                raise RuntimeError(f"EasyOCR initialization failed: {str(e)}") from e

    def detect_text(
        self, image: Image.Image, confidence_threshold: float = 0.5, timeout_seconds: int = 5
    ) -> List[Dict[str, Any]]:
        """Detect text in an image using EasyOCR.

        Args:
            image: PIL Image to process
            confidence_threshold: Minimum confidence for text detection
            timeout_seconds: Maximum time to wait for OCR

        Returns:
            List of text detection dictionaries
        """
        try:
            # Try to initialize reader, catch any exceptions
            try:
                self._ensure_reader()
            except Exception as e:
                logger.error(f"Failed to initialize OCR reader: {str(e)}")
                return []

            # Ensure reader was properly initialized
            if self.reader is None:
                logger.error("OCR reader is None after initialization")
                return []

            # Convert PIL Image to numpy array
            image_np = np.array(image)

            try:
                with timeout(timeout_seconds):
                    results = self.reader.readtext(
                        image_np, paragraph=False, text_threshold=confidence_threshold
                    )
            except TimeoutException:
                logger.warning("OCR timed out")
                return []
            except Exception as e:
                logger.warning(f"OCR failed: {str(e)}")
                return []

            detections = []
            img_width, img_height = image.size

            for box, text, conf in results:
                # Ensure conf is float
                conf_float = float(conf)
                if conf_float < confidence_threshold:
                    continue

                # Convert box format to [x1, y1, x2, y2]
                # Ensure box points are properly typed as float
                x1 = min(float(point[0]) for point in box) / img_width
                y1 = min(float(point[1]) for point in box) / img_height
                x2 = max(float(point[0]) for point in box) / img_width
                y2 = max(float(point[1]) for point in box) / img_height

                detections.append(
                    {
                        "type": "text",
                        "bbox": [x1, y1, x2, y2],
                        "content": text,
                        "confidence": conf,
                        "interactivity": False,  # Text is typically non-interactive
                    }
                )

            return detections
        except Exception as e:
            logger.error(f"Unexpected error in OCR processing: {str(e)}")
            return []

```

--------------------------------------------------------------------------------
/.github/workflows/pypi-publish-mcp-server.yml:
--------------------------------------------------------------------------------

```yaml
name: Publish MCP Server Package

on:
  push:
    tags:
      - "mcp-server-v*"
  workflow_dispatch:
    inputs:
      version:
        description: "Version to publish (without v prefix)"
        required: true
        default: "0.1.0"
  workflow_call:
    inputs:
      version:
        description: "Version to publish"
        required: true
        type: string
    outputs:
      version:
        description: "The version that was published"
        value: ${{ jobs.prepare.outputs.version }}

# Adding permissions at workflow level
permissions:
  contents: write

jobs:
  prepare:
    runs-on: macos-latest
    outputs:
      version: ${{ steps.get-version.outputs.version }}
      agent_version: ${{ steps.update-deps.outputs.agent_version }}
      computer_version: ${{ steps.update-deps.outputs.computer_version }}
    steps:
      - uses: actions/checkout@v4

      - name: Determine version
        id: get-version
        run: |
          if [ "${{ github.event_name }}" == "push" ]; then
            # Extract version from tag (for package-specific tags)
            if [[ "${{ github.ref }}" =~ ^refs/tags/mcp-server-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then
              VERSION=${BASH_REMATCH[1]}
            else
              echo "Invalid tag format for mcp-server"
              exit 1
            fi
          elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
            # Use version from workflow dispatch
            VERSION=${{ github.event.inputs.version }}
          else
            # Use version from workflow_call
            VERSION=${{ inputs.version }}
          fi
          echo "VERSION=$VERSION"
          echo "version=$VERSION" >> $GITHUB_OUTPUT

      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: "3.11"

      - name: Update dependencies to latest versions
        id: update-deps
        run: |
          cd libs/python/mcp-server

          # Install required package for PyPI API access
          pip install requests

          # Create a Python script for PyPI version checking
          cat > get_latest_versions.py << 'EOF'
          import requests
          import json
          import sys

          def get_package_version(package_name, fallback="0.1.0"):
              try:
                  response = requests.get(f'https://pypi.org/pypi/{package_name}/json')
                  print(f"API Response Status for {package_name}: {response.status_code}", file=sys.stderr)
                  
                  if response.status_code != 200:
                      print(f"API request failed for {package_name}, using fallback version", file=sys.stderr)
                      return fallback
                  
                  data = json.loads(response.text)
                  
                  if 'info' not in data:
                      print(f"Missing 'info' key in API response for {package_name}, using fallback version", file=sys.stderr)
                      return fallback
                      
                  return data['info']['version']
              except Exception as e:
                  print(f"Error fetching version for {package_name}: {str(e)}", file=sys.stderr)
                  return fallback

          # Get latest versions
          print(get_package_version('cua-agent'))
          print(get_package_version('cua-computer'))
          EOF

          # Execute the script to get the versions
          VERSIONS=($(python get_latest_versions.py))
          LATEST_AGENT=${VERSIONS[0]}
          LATEST_COMPUTER=${VERSIONS[1]}

          echo "Latest cua-agent version: $LATEST_AGENT"
          echo "Latest cua-computer version: $LATEST_COMPUTER"

          # Output the versions for the next job
          echo "agent_version=$LATEST_AGENT" >> $GITHUB_OUTPUT
          echo "computer_version=$LATEST_COMPUTER" >> $GITHUB_OUTPUT

          # Determine major version for version constraint
          AGENT_MAJOR=$(echo $LATEST_AGENT | cut -d. -f1)
          COMPUTER_MAJOR=$(echo $LATEST_COMPUTER | cut -d. -f1)

          NEXT_AGENT_MAJOR=$((AGENT_MAJOR + 1))
          NEXT_COMPUTER_MAJOR=$((COMPUTER_MAJOR + 1))

          # Update dependencies in pyproject.toml
          if [[ "$OSTYPE" == "darwin"* ]]; then
            # macOS version of sed needs an empty string for -i
            # Update cua-agent with all extras
            sed -i '' "s/\"cua-agent\[all\]>=.*,<.*\"/\"cua-agent[all]>=$LATEST_AGENT,<$NEXT_AGENT_MAJOR.0.0\"/" pyproject.toml
            sed -i '' "s/\"cua-computer>=.*,<.*\"/\"cua-computer>=$LATEST_COMPUTER,<$NEXT_COMPUTER_MAJOR.0.0\"/" pyproject.toml
          else
            # Linux version
            sed -i "s/\"cua-agent\[all\]>=.*,<.*\"/\"cua-agent[all]>=$LATEST_AGENT,<$NEXT_AGENT_MAJOR.0.0\"/" pyproject.toml
            sed -i "s/\"cua-computer>=.*,<.*\"/\"cua-computer>=$LATEST_COMPUTER,<$NEXT_COMPUTER_MAJOR.0.0\"/" pyproject.toml
          fi

          # Display the updated dependencies
          echo "Updated dependencies in pyproject.toml:"
          grep -E "cua-agent|cua-computer" pyproject.toml

  publish:
    needs: prepare
    uses: ./.github/workflows/pypi-reusable-publish.yml
    with:
      package_name: "mcp-server"
      package_dir: "libs/python/mcp-server"
      version: ${{ needs.prepare.outputs.version }}
      is_lume_package: false
      base_package_name: "cua-mcp-server"
    secrets:
      PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}

  set-env-variables:
    needs: [prepare, publish]
    runs-on: macos-latest
    steps:
      - name: Set environment variables for use in other jobs
        run: |
          echo "AGENT_VERSION=${{ needs.prepare.outputs.agent_version }}" >> $GITHUB_ENV
          echo "COMPUTER_VERSION=${{ needs.prepare.outputs.computer_version }}" >> $GITHUB_ENV

```