#
tokens: 48711/50000 36/501 files (page 4/21)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 4 of 21. Use http://codebase.md/trycua/cua?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .all-contributorsrc
├── .cursorignore
├── .devcontainer
│   ├── devcontainer.json
│   ├── post-install.sh
│   └── README.md
├── .dockerignore
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── scripts
│   │   ├── get_pyproject_version.py
│   │   └── tests
│   │       ├── __init__.py
│   │       ├── README.md
│   │       └── test_get_pyproject_version.py
│   └── workflows
│       ├── ci-lume.yml
│       ├── docker-publish-kasm.yml
│       ├── docker-publish-xfce.yml
│       ├── docker-reusable-publish.yml
│       ├── npm-publish-computer.yml
│       ├── npm-publish-core.yml
│       ├── publish-lume.yml
│       ├── pypi-publish-agent.yml
│       ├── pypi-publish-computer-server.yml
│       ├── pypi-publish-computer.yml
│       ├── pypi-publish-core.yml
│       ├── pypi-publish-mcp-server.yml
│       ├── pypi-publish-pylume.yml
│       ├── pypi-publish-som.yml
│       ├── pypi-reusable-publish.yml
│       └── test-validation-script.yml
├── .gitignore
├── .vscode
│   ├── docs.code-workspace
│   ├── launch.json
│   ├── libs-ts.code-workspace
│   ├── lume.code-workspace
│   ├── lumier.code-workspace
│   └── py.code-workspace
├── blog
│   ├── app-use.md
│   ├── assets
│   │   ├── composite-agents.png
│   │   ├── docker-ubuntu-support.png
│   │   ├── hack-booth.png
│   │   ├── hack-closing-ceremony.jpg
│   │   ├── hack-cua-ollama-hud.jpeg
│   │   ├── hack-leaderboard.png
│   │   ├── hack-the-north.png
│   │   ├── hack-winners.jpeg
│   │   ├── hack-workshop.jpeg
│   │   ├── hud-agent-evals.png
│   │   └── trajectory-viewer.jpeg
│   ├── bringing-computer-use-to-the-web.md
│   ├── build-your-own-operator-on-macos-1.md
│   ├── build-your-own-operator-on-macos-2.md
│   ├── composite-agents.md
│   ├── cua-hackathon.md
│   ├── hack-the-north.md
│   ├── hud-agent-evals.md
│   ├── human-in-the-loop.md
│   ├── introducing-cua-cloud-containers.md
│   ├── lume-to-containerization.md
│   ├── sandboxed-python-execution.md
│   ├── training-computer-use-models-trajectories-1.md
│   ├── trajectory-viewer.md
│   ├── ubuntu-docker-support.md
│   └── windows-sandbox.md
├── CONTRIBUTING.md
├── Development.md
├── Dockerfile
├── docs
│   ├── .gitignore
│   ├── .prettierrc
│   ├── content
│   │   └── docs
│   │       ├── agent-sdk
│   │       │   ├── agent-loops.mdx
│   │       │   ├── benchmarks
│   │       │   │   ├── index.mdx
│   │       │   │   ├── interactive.mdx
│   │       │   │   ├── introduction.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── osworld-verified.mdx
│   │       │   │   ├── screenspot-pro.mdx
│   │       │   │   └── screenspot-v2.mdx
│   │       │   ├── callbacks
│   │       │   │   ├── agent-lifecycle.mdx
│   │       │   │   ├── cost-saving.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── logging.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── pii-anonymization.mdx
│   │       │   │   └── trajectories.mdx
│   │       │   ├── chat-history.mdx
│   │       │   ├── custom-computer-handlers.mdx
│   │       │   ├── custom-tools.mdx
│   │       │   ├── customizing-computeragent.mdx
│   │       │   ├── integrations
│   │       │   │   ├── hud.mdx
│   │       │   │   └── meta.json
│   │       │   ├── message-format.mdx
│   │       │   ├── meta.json
│   │       │   ├── migration-guide.mdx
│   │       │   ├── prompt-caching.mdx
│   │       │   ├── supported-agents
│   │       │   │   ├── composed-agents.mdx
│   │       │   │   ├── computer-use-agents.mdx
│   │       │   │   ├── grounding-models.mdx
│   │       │   │   ├── human-in-the-loop.mdx
│   │       │   │   └── meta.json
│   │       │   ├── supported-model-providers
│   │       │   │   ├── index.mdx
│   │       │   │   └── local-models.mdx
│   │       │   └── usage-tracking.mdx
│   │       ├── computer-sdk
│   │       │   ├── cloud-vm-management.mdx
│   │       │   ├── commands.mdx
│   │       │   ├── computer-ui.mdx
│   │       │   ├── computers.mdx
│   │       │   ├── meta.json
│   │       │   └── sandboxed-python.mdx
│   │       ├── index.mdx
│   │       ├── libraries
│   │       │   ├── agent
│   │       │   │   └── index.mdx
│   │       │   ├── computer
│   │       │   │   └── index.mdx
│   │       │   ├── computer-server
│   │       │   │   ├── Commands.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── REST-API.mdx
│   │       │   │   └── WebSocket-API.mdx
│   │       │   ├── core
│   │       │   │   └── index.mdx
│   │       │   ├── lume
│   │       │   │   ├── cli-reference.mdx
│   │       │   │   ├── faq.md
│   │       │   │   ├── http-api.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── prebuilt-images.mdx
│   │       │   ├── lumier
│   │       │   │   ├── building-lumier.mdx
│   │       │   │   ├── docker-compose.mdx
│   │       │   │   ├── docker.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   └── meta.json
│   │       │   ├── mcp-server
│   │       │   │   ├── client-integrations.mdx
│   │       │   │   ├── configuration.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── llm-integrations.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── tools.mdx
│   │       │   │   └── usage.mdx
│   │       │   └── som
│   │       │       ├── configuration.mdx
│   │       │       └── index.mdx
│   │       ├── meta.json
│   │       ├── quickstart-cli.mdx
│   │       ├── quickstart-devs.mdx
│   │       └── telemetry.mdx
│   ├── next.config.mjs
│   ├── package-lock.json
│   ├── package.json
│   ├── pnpm-lock.yaml
│   ├── postcss.config.mjs
│   ├── public
│   │   └── img
│   │       ├── agent_gradio_ui.png
│   │       ├── agent.png
│   │       ├── cli.png
│   │       ├── computer.png
│   │       ├── som_box_threshold.png
│   │       └── som_iou_threshold.png
│   ├── README.md
│   ├── source.config.ts
│   ├── src
│   │   ├── app
│   │   │   ├── (home)
│   │   │   │   ├── [[...slug]]
│   │   │   │   │   └── page.tsx
│   │   │   │   └── layout.tsx
│   │   │   ├── api
│   │   │   │   └── search
│   │   │   │       └── route.ts
│   │   │   ├── favicon.ico
│   │   │   ├── global.css
│   │   │   ├── layout.config.tsx
│   │   │   ├── layout.tsx
│   │   │   ├── llms.mdx
│   │   │   │   └── [[...slug]]
│   │   │   │       └── route.ts
│   │   │   └── llms.txt
│   │   │       └── route.ts
│   │   ├── assets
│   │   │   ├── discord-black.svg
│   │   │   ├── discord-white.svg
│   │   │   ├── logo-black.svg
│   │   │   └── logo-white.svg
│   │   ├── components
│   │   │   ├── iou.tsx
│   │   │   └── mermaid.tsx
│   │   ├── lib
│   │   │   ├── llms.ts
│   │   │   └── source.ts
│   │   └── mdx-components.tsx
│   └── tsconfig.json
├── examples
│   ├── agent_examples.py
│   ├── agent_ui_examples.py
│   ├── cloud_api_examples.py
│   ├── computer_examples_windows.py
│   ├── computer_examples.py
│   ├── computer_ui_examples.py
│   ├── computer-example-ts
│   │   ├── .env.example
│   │   ├── .gitignore
│   │   ├── .prettierrc
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── README.md
│   │   ├── src
│   │   │   ├── helpers.ts
│   │   │   └── index.ts
│   │   └── tsconfig.json
│   ├── docker_examples.py
│   ├── evals
│   │   ├── hud_eval_examples.py
│   │   └── wikipedia_most_linked.txt
│   ├── pylume_examples.py
│   ├── sandboxed_functions_examples.py
│   ├── som_examples.py
│   ├── utils.py
│   └── winsandbox_example.py
├── img
│   ├── agent_gradio_ui.png
│   ├── agent.png
│   ├── cli.png
│   ├── computer.png
│   ├── logo_black.png
│   └── logo_white.png
├── libs
│   ├── kasm
│   │   ├── Dockerfile
│   │   ├── LICENSE
│   │   ├── README.md
│   │   └── src
│   │       └── ubuntu
│   │           └── install
│   │               └── firefox
│   │                   ├── custom_startup.sh
│   │                   ├── firefox.desktop
│   │                   └── install_firefox.sh
│   ├── lume
│   │   ├── .cursorignore
│   │   ├── CONTRIBUTING.md
│   │   ├── Development.md
│   │   ├── img
│   │   │   └── cli.png
│   │   ├── Package.resolved
│   │   ├── Package.swift
│   │   ├── README.md
│   │   ├── resources
│   │   │   └── lume.entitlements
│   │   ├── scripts
│   │   │   ├── build
│   │   │   │   ├── build-debug.sh
│   │   │   │   ├── build-release-notarized.sh
│   │   │   │   └── build-release.sh
│   │   │   └── install.sh
│   │   ├── src
│   │   │   ├── Commands
│   │   │   │   ├── Clone.swift
│   │   │   │   ├── Config.swift
│   │   │   │   ├── Create.swift
│   │   │   │   ├── Delete.swift
│   │   │   │   ├── Get.swift
│   │   │   │   ├── Images.swift
│   │   │   │   ├── IPSW.swift
│   │   │   │   ├── List.swift
│   │   │   │   ├── Logs.swift
│   │   │   │   ├── Options
│   │   │   │   │   └── FormatOption.swift
│   │   │   │   ├── Prune.swift
│   │   │   │   ├── Pull.swift
│   │   │   │   ├── Push.swift
│   │   │   │   ├── Run.swift
│   │   │   │   ├── Serve.swift
│   │   │   │   ├── Set.swift
│   │   │   │   └── Stop.swift
│   │   │   ├── ContainerRegistry
│   │   │   │   ├── ImageContainerRegistry.swift
│   │   │   │   ├── ImageList.swift
│   │   │   │   └── ImagesPrinter.swift
│   │   │   ├── Errors
│   │   │   │   └── Errors.swift
│   │   │   ├── FileSystem
│   │   │   │   ├── Home.swift
│   │   │   │   ├── Settings.swift
│   │   │   │   ├── VMConfig.swift
│   │   │   │   ├── VMDirectory.swift
│   │   │   │   └── VMLocation.swift
│   │   │   ├── LumeController.swift
│   │   │   ├── Main.swift
│   │   │   ├── Server
│   │   │   │   ├── Handlers.swift
│   │   │   │   ├── HTTP.swift
│   │   │   │   ├── Requests.swift
│   │   │   │   ├── Responses.swift
│   │   │   │   └── Server.swift
│   │   │   ├── Utils
│   │   │   │   ├── CommandRegistry.swift
│   │   │   │   ├── CommandUtils.swift
│   │   │   │   ├── Logger.swift
│   │   │   │   ├── NetworkUtils.swift
│   │   │   │   ├── Path.swift
│   │   │   │   ├── ProcessRunner.swift
│   │   │   │   ├── ProgressLogger.swift
│   │   │   │   ├── String.swift
│   │   │   │   └── Utils.swift
│   │   │   ├── Virtualization
│   │   │   │   ├── DarwinImageLoader.swift
│   │   │   │   ├── DHCPLeaseParser.swift
│   │   │   │   ├── ImageLoaderFactory.swift
│   │   │   │   └── VMVirtualizationService.swift
│   │   │   ├── VM
│   │   │   │   ├── DarwinVM.swift
│   │   │   │   ├── LinuxVM.swift
│   │   │   │   ├── VM.swift
│   │   │   │   ├── VMDetails.swift
│   │   │   │   ├── VMDetailsPrinter.swift
│   │   │   │   ├── VMDisplayResolution.swift
│   │   │   │   └── VMFactory.swift
│   │   │   └── VNC
│   │   │       ├── PassphraseGenerator.swift
│   │   │       └── VNCService.swift
│   │   └── tests
│   │       ├── Mocks
│   │       │   ├── MockVM.swift
│   │       │   ├── MockVMVirtualizationService.swift
│   │       │   └── MockVNCService.swift
│   │       ├── VM
│   │       │   └── VMDetailsPrinterTests.swift
│   │       ├── VMTests.swift
│   │       ├── VMVirtualizationServiceTests.swift
│   │       └── VNCServiceTests.swift
│   ├── lumier
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   └── src
│   │       ├── bin
│   │       │   └── entry.sh
│   │       ├── config
│   │       │   └── constants.sh
│   │       ├── hooks
│   │       │   └── on-logon.sh
│   │       └── lib
│   │           ├── utils.sh
│   │           └── vm.sh
│   ├── python
│   │   ├── agent
│   │   │   ├── agent
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── adapters
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── huggingfacelocal_adapter.py
│   │   │   │   │   ├── human_adapter.py
│   │   │   │   │   ├── mlxvlm_adapter.py
│   │   │   │   │   └── models
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── generic.py
│   │   │   │   │       ├── internvl.py
│   │   │   │   │       ├── opencua.py
│   │   │   │   │       └── qwen2_5_vl.py
│   │   │   │   ├── agent.py
│   │   │   │   ├── callbacks
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── budget_manager.py
│   │   │   │   │   ├── image_retention.py
│   │   │   │   │   ├── logging.py
│   │   │   │   │   ├── operator_validator.py
│   │   │   │   │   ├── pii_anonymization.py
│   │   │   │   │   ├── prompt_instructions.py
│   │   │   │   │   ├── telemetry.py
│   │   │   │   │   └── trajectory_saver.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── computers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cua.py
│   │   │   │   │   └── custom.py
│   │   │   │   ├── decorators.py
│   │   │   │   ├── human_tool
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── server.py
│   │   │   │   │   └── ui.py
│   │   │   │   ├── integrations
│   │   │   │   │   └── hud
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── agent.py
│   │   │   │   │       └── proxy.py
│   │   │   │   ├── loops
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── anthropic.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── composed_grounded.py
│   │   │   │   │   ├── gemini.py
│   │   │   │   │   ├── glm45v.py
│   │   │   │   │   ├── gta1.py
│   │   │   │   │   ├── holo.py
│   │   │   │   │   ├── internvl.py
│   │   │   │   │   ├── model_types.csv
│   │   │   │   │   ├── moondream3.py
│   │   │   │   │   ├── omniparser.py
│   │   │   │   │   ├── openai.py
│   │   │   │   │   ├── opencua.py
│   │   │   │   │   └── uitars.py
│   │   │   │   ├── proxy
│   │   │   │   │   ├── examples.py
│   │   │   │   │   └── handlers.py
│   │   │   │   ├── responses.py
│   │   │   │   ├── types.py
│   │   │   │   └── ui
│   │   │   │       ├── __init__.py
│   │   │   │       ├── __main__.py
│   │   │   │       └── gradio
│   │   │   │           ├── __init__.py
│   │   │   │           ├── app.py
│   │   │   │           └── ui_components.py
│   │   │   ├── benchmarks
│   │   │   │   ├── .gitignore
│   │   │   │   ├── contrib.md
│   │   │   │   ├── interactive.py
│   │   │   │   ├── models
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   └── gta1.py
│   │   │   │   ├── README.md
│   │   │   │   ├── ss-pro.py
│   │   │   │   ├── ss-v2.py
│   │   │   │   └── utils.py
│   │   │   ├── example.py
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── computer
│   │   │   ├── computer
│   │   │   │   ├── __init__.py
│   │   │   │   ├── computer.py
│   │   │   │   ├── diorama_computer.py
│   │   │   │   ├── helpers.py
│   │   │   │   ├── interface
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── logger.py
│   │   │   │   ├── models.py
│   │   │   │   ├── providers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cloud
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── docker
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── lume
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── lume_api.py
│   │   │   │   │   ├── lumier
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── types.py
│   │   │   │   │   └── winsandbox
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── provider.py
│   │   │   │   │       └── setup_script.ps1
│   │   │   │   ├── ui
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   └── gradio
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── app.py
│   │   │   │   └── utils.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── computer-server
│   │   │   ├── computer_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── diorama
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── diorama_computer.py
│   │   │   │   │   ├── diorama.py
│   │   │   │   │   ├── draw.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── safezone.py
│   │   │   │   ├── handlers
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── main.py
│   │   │   │   ├── server.py
│   │   │   │   └── watchdog.py
│   │   │   ├── examples
│   │   │   │   ├── __init__.py
│   │   │   │   └── usage_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   ├── run_server.py
│   │   │   └── test_connection.py
│   │   ├── core
│   │   │   ├── core
│   │   │   │   ├── __init__.py
│   │   │   │   └── telemetry
│   │   │   │       ├── __init__.py
│   │   │   │       └── posthog.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── mcp-server
│   │   │   ├── mcp_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   └── server.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── scripts
│   │   │       ├── install_mcp_server.sh
│   │   │       └── start_mcp_server.sh
│   │   ├── pylume
│   │   │   ├── __init__.py
│   │   │   ├── pylume
│   │   │   │   ├── __init__.py
│   │   │   │   ├── client.py
│   │   │   │   ├── exceptions.py
│   │   │   │   ├── lume
│   │   │   │   ├── models.py
│   │   │   │   ├── pylume.py
│   │   │   │   └── server.py
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   └── som
│   │       ├── LICENSE
│   │       ├── poetry.toml
│   │       ├── pyproject.toml
│   │       ├── README.md
│   │       ├── som
│   │       │   ├── __init__.py
│   │       │   ├── detect.py
│   │       │   ├── detection.py
│   │       │   ├── models.py
│   │       │   ├── ocr.py
│   │       │   ├── util
│   │       │   │   └── utils.py
│   │       │   └── visualization.py
│   │       └── tests
│   │           └── test_omniparser.py
│   ├── typescript
│   │   ├── .gitignore
│   │   ├── .nvmrc
│   │   ├── agent
│   │   │   ├── examples
│   │   │   │   ├── playground-example.html
│   │   │   │   └── README.md
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── client.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   └── client.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── biome.json
│   │   ├── computer
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── computer
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── providers
│   │   │   │   │   │   ├── base.ts
│   │   │   │   │   │   ├── cloud.ts
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── base.ts
│   │   │   │   │   ├── factory.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── linux.ts
│   │   │   │   │   ├── macos.ts
│   │   │   │   │   └── windows.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   ├── computer
│   │   │   │   │   └── cloud.test.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── factory.test.ts
│   │   │   │   │   ├── index.test.ts
│   │   │   │   │   ├── linux.test.ts
│   │   │   │   │   ├── macos.test.ts
│   │   │   │   │   └── windows.test.ts
│   │   │   │   └── setup.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── core
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── index.ts
│   │   │   │   └── telemetry
│   │   │   │       ├── clients
│   │   │   │       │   ├── index.ts
│   │   │   │       │   └── posthog.ts
│   │   │   │       └── index.ts
│   │   │   ├── tests
│   │   │   │   └── telemetry.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── pnpm-workspace.yaml
│   │   └── README.md
│   └── xfce
│       ├── .dockerignore
│       ├── .gitignore
│       ├── Dockerfile
│       ├── README.md
│       └── src
│           ├── scripts
│           │   ├── resize-display.sh
│           │   ├── start-computer-server.sh
│           │   ├── start-novnc.sh
│           │   ├── start-vnc.sh
│           │   └── xstartup.sh
│           ├── supervisor
│           │   └── supervisord.conf
│           └── xfce-config
│               ├── helpers.rc
│               ├── xfce4-power-manager.xml
│               └── xfce4-session.xml
├── LICENSE.md
├── notebooks
│   ├── agent_nb.ipynb
│   ├── blog
│   │   ├── build-your-own-operator-on-macos-1.ipynb
│   │   └── build-your-own-operator-on-macos-2.ipynb
│   ├── composite_agents_docker_nb.ipynb
│   ├── computer_nb.ipynb
│   ├── computer_server_nb.ipynb
│   ├── customizing_computeragent.ipynb
│   ├── eval_osworld.ipynb
│   ├── ollama_nb.ipynb
│   ├── pylume_nb.ipynb
│   ├── README.md
│   ├── sota_hackathon_cloud.ipynb
│   └── sota_hackathon.ipynb
├── pdm.lock
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── samples
│   └── community
│       ├── global-online
│       │   └── README.md
│       └── hack-the-north
│           └── README.md
├── scripts
│   ├── build-uv.sh
│   ├── build.ps1
│   ├── build.sh
│   ├── cleanup.sh
│   ├── playground-docker.sh
│   ├── playground.sh
│   └── run-docker-dev.sh
└── tests
    ├── pytest.ini
    ├── shell_cmd.py
    ├── test_files.py
    ├── test_shell_bash.py
    ├── test_telemetry.py
    ├── test_venv.py
    └── test_watchdog.py
```

# Files

--------------------------------------------------------------------------------
/libs/typescript/computer/src/computer/providers/base.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import os from "node:os";
  2 | import { Telemetry } from "@trycua/core";
  3 | import pino from "pino";
  4 | import type { OSType } from "../../types";
  5 | import type { BaseComputerConfig, Display, VMProviderType } from "../types";
  6 | 
  7 | const logger = pino({ name: "computer.provider_base" });
  8 | 
  9 | /**
 10 |  * Base Computer class with shared functionality
 11 |  */
 12 | export abstract class BaseComputer {
 13 | 	protected name: string;
 14 | 	protected osType: OSType;
 15 | 	protected vmProvider?: VMProviderType;
 16 | 	protected telemetry: Telemetry;
 17 | 
 18 | 	constructor(config: BaseComputerConfig) {
 19 | 		this.name = config.name;
 20 | 		this.osType = config.osType;
 21 | 		this.telemetry = new Telemetry();
 22 | 		this.telemetry.recordEvent("module_init", {
 23 | 			module: "computer",
 24 | 			version: process.env.npm_package_version,
 25 | 			node_version: process.version,
 26 | 		});
 27 | 
 28 | 		this.telemetry.recordEvent("computer_initialized", {
 29 | 			os: os.platform(),
 30 | 			os_version: os.version(),
 31 | 			node_version: process.version,
 32 | 		});
 33 | 	}
 34 | 
 35 | 	/**
 36 | 	 * Get the name of the computer
 37 | 	 */
 38 | 	getName(): string {
 39 | 		return this.name;
 40 | 	}
 41 | 
 42 | 	/**
 43 | 	 * Get the OS type of the computer
 44 | 	 */
 45 | 	getOSType(): OSType {
 46 | 		return this.osType;
 47 | 	}
 48 | 
 49 | 	/**
 50 | 	 * Get the VM provider type
 51 | 	 */
 52 | 	getVMProviderType(): VMProviderType | undefined {
 53 | 		return this.vmProvider;
 54 | 	}
 55 | 
 56 | 	/**
 57 | 	 * Shared method available to all computer types
 58 | 	 */
 59 | 	async disconnect(): Promise<void> {
 60 | 		logger.info(`Disconnecting from ${this.name}`);
 61 | 		// Implementation would go here
 62 | 	}
 63 | 
 64 | 	/**
 65 | 	 * Parse display string into Display object
 66 | 	 * @param display Display string in format "WIDTHxHEIGHT"
 67 | 	 * @returns Display object
 68 | 	 */
 69 | 	public static parseDisplayString(display: string): Display {
 70 | 		const match = display.match(/^(\d+)x(\d+)$/);
 71 | 		if (!match) {
 72 | 			throw new Error(
 73 | 				`Invalid display format: ${display}. Expected format: WIDTHxHEIGHT`,
 74 | 			);
 75 | 		}
 76 | 
 77 | 		return {
 78 | 			width: Number.parseInt(match[1], 10),
 79 | 			height: Number.parseInt(match[2], 10),
 80 | 		};
 81 | 	}
 82 | 
 83 | 	/**
 84 | 	 * Parse memory string to MB integer.
 85 | 	 *
 86 | 	 * Examples:
 87 | 	 *   "8GB" -> 8192
 88 | 	 *   "1024MB" -> 1024
 89 | 	 *   "512" -> 512
 90 | 	 *
 91 | 	 * @param memoryStr - Memory string to parse
 92 | 	 * @returns Memory value in MB
 93 | 	 */
 94 | 	public static parseMemoryString(memoryStr: string): number {
 95 | 		if (!memoryStr) {
 96 | 			return 0;
 97 | 		}
 98 | 
 99 | 		// Convert to uppercase for case-insensitive matching
100 | 		const upperStr = memoryStr.toUpperCase().trim();
101 | 
102 | 		// Extract numeric value and unit
103 | 		const match = upperStr.match(/^(\d+(?:\.\d+)?)\s*(GB|MB)?$/);
104 | 		if (!match) {
105 | 			throw new Error(`Invalid memory format: ${memoryStr}`);
106 | 		}
107 | 
108 | 		const value = Number.parseFloat(match[1]);
109 | 		const unit = match[2] || "MB"; // Default to MB if no unit specified
110 | 
111 | 		// Convert to MB
112 | 		if (unit === "GB") {
113 | 			return Math.round(value * 1024);
114 | 		}
115 | 		return Math.round(value);
116 | 	}
117 | }
118 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/adapters/models/generic.py:
--------------------------------------------------------------------------------

```python
 1 | from typing import List, Dict, Any, Optional
 2 | 
 3 | # Hugging Face imports are local to avoid hard dependency at module import
 4 | try:
 5 |     import torch  # type: ignore
 6 |     from transformers import AutoModel, AutoProcessor  # type: ignore
 7 |     HF_AVAILABLE = True
 8 | except Exception:
 9 |     HF_AVAILABLE = False
10 | 
11 | 
12 | class GenericHFModel:
13 |     """Generic Hugging Face vision-language model handler.
14 |     Loads an AutoModelForImageTextToText and AutoProcessor and generates text.
15 |     """
16 | 
17 |     def __init__(self, model_name: str, device: str = "auto", trust_remote_code: bool = False) -> None:
18 |         if not HF_AVAILABLE:
19 |             raise ImportError(
20 |                 "HuggingFace transformers dependencies not found. Install with: pip install \"cua-agent[uitars-hf]\""
21 |             )
22 |         self.model_name = model_name
23 |         self.device = device
24 |         self.model = None
25 |         self.processor = None
26 |         self.trust_remote_code = trust_remote_code
27 |         self._load()
28 | 
29 |     def _load(self) -> None:
30 |         # Load model
31 |         self.model = AutoModel.from_pretrained(
32 |             self.model_name,
33 |             torch_dtype=torch.float16,
34 |             device_map=self.device,
35 |             attn_implementation="sdpa",
36 |             trust_remote_code=self.trust_remote_code,
37 |         )
38 |         # Load processor
39 |         self.processor = AutoProcessor.from_pretrained(
40 |             self.model_name,
41 |             min_pixels=3136,
42 |             max_pixels=4096 * 2160,
43 |             device_map=self.device,
44 |             trust_remote_code=self.trust_remote_code,
45 |         )
46 | 
47 |     def generate(self, messages: List[Dict[str, Any]], max_new_tokens: int = 128) -> str:
48 |         """Generate text for the given HF-format messages.
49 |         messages: [{ role, content: [{type:'text'|'image', text|image}] }]
50 |         """
51 |         assert self.model is not None and self.processor is not None
52 |         # Apply chat template and tokenize
53 |         inputs = self.processor.apply_chat_template(
54 |             messages,
55 |             add_generation_prompt=True,
56 |             tokenize=True,
57 |             return_dict=True,
58 |             return_tensors="pt",
59 |         )
60 |         # Move inputs to the same device as model
61 |         inputs = inputs.to(self.model.device)
62 |         # Generate
63 |         with torch.no_grad():
64 |             generated_ids = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
65 |         # Trim prompt tokens from output
66 |         generated_ids_trimmed = [
67 |             out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
68 |         ]
69 |         # Decode
70 |         output_text = self.processor.batch_decode(
71 |             generated_ids_trimmed,
72 |             skip_special_tokens=True,
73 |             clean_up_tokenization_spaces=False,
74 |         )
75 |         return output_text[0] if output_text else ""
76 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx:
--------------------------------------------------------------------------------

```markdown
 1 | ---
 2 | title: All‑in‑one CUA Models
 3 | description: Models that support full computer-use agent capabilities with ComputerAgent.run()
 4 | ---
 5 | 
 6 | These models support complete computer-use agent functionality through `ComputerAgent.run()`. They can understand natural language instructions and autonomously perform sequences of actions to complete tasks.
 7 | 
 8 | All agent loops are compatible with any LLM provider supported by LiteLLM.
 9 | 
10 | See [Running Models Locally](../local-models) for how to use Hugging Face and MLX models on your own machine.
11 | 
12 | ## Gemini CUA
13 | 
14 | Gemini models with computer-use capabilities:
15 | 
16 | - Gemini 2.5 CUA: `gemini-2.5-computer-use-preview-10-2025`
17 | 
18 | ```python
19 | agent = ComputerAgent("gemini-2.5-computer-use-preview-10-2025", tools=[computer])
20 | async for _ in agent.run("Open Firefox and navigate to github.com"):
21 |     pass
22 | ```
23 | 
24 | ## Anthropic CUAs
25 | 
26 | Claude models with computer-use capabilities:
27 | 
28 | - Claude 4.5: `claude-sonnet-4-5-20250929`
29 | - Claude 4.1: `claude-opus-4-1-20250805`
30 | - Claude 4: `claude-opus-4-20250514`, `claude-sonnet-4-20250514`
31 | - Claude 3.7: `claude-3-7-sonnet-20250219`
32 | - Claude 3.5: `claude-3-5-sonnet-20241022`
33 | 
34 | ```python
35 | agent = ComputerAgent("claude-3-5-sonnet-20241022", tools=[computer])
36 | async for _ in agent.run("Open Firefox and navigate to github.com"):
37 |     pass
38 | ```
39 | 
40 | ## OpenAI CUA Preview
41 | 
42 | OpenAI's computer-use preview model:
43 | 
44 | - Computer-use-preview: `computer-use-preview`
45 | 
46 | ```python
47 | agent = ComputerAgent("openai/computer-use-preview", tools=[computer])
48 | async for _ in agent.run("Take a screenshot and describe what you see"):
49 |     pass
50 | ```
51 | 
52 | ## GLM-4.5V
53 | 
54 | Zhipu AI's GLM-4.5V vision-language model with computer-use capabilities:
55 | 
56 | - `openrouter/z-ai/glm-4.5v`
57 | - `huggingface-local/zai-org/GLM-4.5V`
58 | 
59 | ```python
60 | agent = ComputerAgent("openrouter/z-ai/glm-4.5v", tools=[computer])
61 | async for _ in agent.run("Click on the search bar and type 'hello world'"):
62 |     pass
63 | ```
64 | 
65 | ## InternVL 3.5
66 | 
67 | InternVL 3.5 family:
68 | - `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}`
69 | 
70 | ```python
71 | agent = ComputerAgent("huggingface-local/OpenGVLab/InternVL3_5-1B", tools=[computer])
72 | async for _ in agent.run("Open Firefox and navigate to github.com"):
73 |     pass
74 | ```
75 | 
76 | ## UI-TARS 1.5
77 | 
78 | Unified vision-language model for computer-use:
79 | 
80 | - `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B`
81 | - `huggingface/ByteDance-Seed/UI-TARS-1.5-7B` (requires TGI endpoint)
82 | 
83 | ```python
84 | agent = ComputerAgent("huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", tools=[computer])
85 | async for _ in agent.run("Open the settings menu and change the theme to dark mode"):
86 |     pass
87 | ```
88 | 
89 | ---
90 | 
91 | CUAs also support direct click prediction. See [Grounding Models](./grounding-models) for details on `predict_click()`.
92 | 
93 | For details on agent loop behavior and usage, see [Agent Loops](../agent-loops).
94 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/adapters/models/qwen2_5_vl.py:
--------------------------------------------------------------------------------

```python
 1 | from typing import List, Dict, Any, Optional
 2 | 
 3 | # Hugging Face imports are local to avoid hard dependency at module import
 4 | try:
 5 |     import torch  # type: ignore
 6 |     from transformers import AutoModelForImageTextToText, AutoProcessor  # type: ignore
 7 |     HF_AVAILABLE = True
 8 | except Exception:
 9 |     HF_AVAILABLE = False
10 | 
11 | 
12 | class Qwen2_5_VLModel:
13 |     """Qwen2.5-VL Hugging Face vision-language model handler.
14 |     Loads an AutoModelForImageTextToText and AutoProcessor and generates text.
15 |     """
16 | 
17 |     def __init__(self, model_name: str, device: str = "auto", trust_remote_code: bool = False) -> None:
18 |         if not HF_AVAILABLE:
19 |             raise ImportError(
20 |                 "HuggingFace transformers dependencies not found. Install with: pip install \"cua-agent[uitars-hf]\""
21 |             )
22 |         self.model_name = model_name
23 |         self.device = device
24 |         self.model = None
25 |         self.processor = None
26 |         self.trust_remote_code = trust_remote_code
27 |         self._load()
28 | 
29 |     def _load(self) -> None:
30 |         # Load model
31 |         self.model = AutoModelForImageTextToText.from_pretrained(
32 |             self.model_name,
33 |             torch_dtype=torch.bfloat16,
34 |             device_map=self.device,
35 |             attn_implementation="sdpa",
36 |             trust_remote_code=self.trust_remote_code,
37 |         )
38 |         # Load processor
39 |         self.processor = AutoProcessor.from_pretrained(
40 |             self.model_name,
41 |             min_pixels=3136,
42 |             max_pixels=4096 * 2160,
43 |             device_map=self.device,
44 |             trust_remote_code=self.trust_remote_code,
45 |         )
46 | 
47 |     def generate(self, messages: List[Dict[str, Any]], max_new_tokens: int = 128) -> str:
48 |         """Generate text for the given HF-format messages.
49 |         messages: [{ role, content: [{type:'text'|'image', text|image}] }]
50 |         """
51 |         assert self.model is not None and self.processor is not None
52 |         # Apply chat template and tokenize
53 |         inputs = self.processor.apply_chat_template(
54 |             messages,
55 |             add_generation_prompt=True,
56 |             tokenize=True,
57 |             return_dict=True,
58 |             return_tensors="pt",
59 |         )
60 |         # Move inputs to the same device as model
61 |         inputs = inputs.to(self.model.device)
62 |         # Generate
63 |         with torch.no_grad():
64 |             generated_ids = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
65 |         # Trim prompt tokens from output
66 |         generated_ids_trimmed = [
67 |             out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
68 |         ]
69 |         # Decode
70 |         output_text = self.processor.batch_decode(
71 |             generated_ids_trimmed,
72 |             skip_special_tokens=True,
73 |             clean_up_tokenization_spaces=False,
74 |         )
75 |         return output_text[0] if output_text else ""
76 | 
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/utils.py:
--------------------------------------------------------------------------------

```python
  1 | import base64
  2 | from typing import Tuple, Optional, Dict, Any
  3 | from PIL import Image, ImageDraw
  4 | import io
  5 | 
  6 | def decode_base64_image(base64_str: str) -> bytes:
  7 |     """Decode a base64 string into image bytes."""
  8 |     return base64.b64decode(base64_str)
  9 | 
 10 | def encode_base64_image(image_bytes: bytes) -> str:
 11 |     """Encode image bytes to base64 string."""
 12 |     return base64.b64encode(image_bytes).decode('utf-8')
 13 | 
 14 | def bytes_to_image(image_bytes: bytes) -> Image.Image:
 15 |     """Convert bytes to PIL Image.
 16 |     
 17 |     Args:
 18 |         image_bytes: Raw image bytes
 19 |         
 20 |     Returns:
 21 |         PIL.Image: The converted image
 22 |     """
 23 |     return Image.open(io.BytesIO(image_bytes))
 24 | 
 25 | def image_to_bytes(image: Image.Image, format: str = 'PNG') -> bytes:
 26 |     """Convert PIL Image to bytes."""
 27 |     buf = io.BytesIO()
 28 |     image.save(buf, format=format)
 29 |     return buf.getvalue()
 30 | 
 31 | def resize_image(image_bytes: bytes, scale_factor: float) -> bytes:
 32 |     """Resize an image by a scale factor.
 33 |     
 34 |     Args:
 35 |         image_bytes: The original image as bytes
 36 |         scale_factor: Factor to scale the image by (e.g., 0.5 for half size, 2.0 for double)
 37 |         
 38 |     Returns:
 39 |         bytes: The resized image as bytes
 40 |     """
 41 |     image = bytes_to_image(image_bytes)
 42 |     if scale_factor != 1.0:
 43 |         new_size = (int(image.width * scale_factor), int(image.height * scale_factor))
 44 |         image = image.resize(new_size, Image.Resampling.LANCZOS)
 45 |     return image_to_bytes(image)
 46 | 
 47 | def draw_box(
 48 |     image_bytes: bytes,
 49 |     x: int,
 50 |     y: int,
 51 |     width: int,
 52 |     height: int,
 53 |     color: str = "#FF0000",
 54 |     thickness: int = 2
 55 | ) -> bytes:
 56 |     """Draw a box on an image.
 57 |     
 58 |     Args:
 59 |         image_bytes: The original image as bytes
 60 |         x: X coordinate of top-left corner
 61 |         y: Y coordinate of top-left corner
 62 |         width: Width of the box
 63 |         height: Height of the box
 64 |         color: Color of the box in hex format
 65 |         thickness: Thickness of the box border in pixels
 66 |         
 67 |     Returns:
 68 |         bytes: The modified image as bytes
 69 |     """
 70 |     # Convert bytes to PIL Image
 71 |     image = bytes_to_image(image_bytes)
 72 |     
 73 |     # Create drawing context
 74 |     draw = ImageDraw.Draw(image)
 75 |     
 76 |     # Draw rectangle
 77 |     draw.rectangle(
 78 |         [(x, y), (x + width, y + height)],
 79 |         outline=color,
 80 |         width=thickness
 81 |     )
 82 |     
 83 |     # Convert back to bytes
 84 |     return image_to_bytes(image)
 85 | 
 86 | def get_image_size(image_bytes: bytes) -> Tuple[int, int]:
 87 |     """Get the dimensions of an image.
 88 |     
 89 |     Args:
 90 |         image_bytes: The image as bytes
 91 |         
 92 |     Returns:
 93 |         Tuple[int, int]: Width and height of the image
 94 |     """
 95 |     image = bytes_to_image(image_bytes)
 96 |     return image.size
 97 | 
 98 | def parse_vm_info(vm_info: Dict[str, Any]) -> Optional[Dict[str, Any]]:
 99 |     """Parse VM info from pylume response."""
100 |     if not vm_info:
101 |         return None 
```

--------------------------------------------------------------------------------
/examples/computer-example-ts/src/index.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { Computer, OSType } from "@trycua/computer";
  2 | import OpenAI from "openai";
  3 | import { executeAction } from "./helpers";
  4 | 
  5 | import "dotenv/config";
  6 | 
  7 | const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
  8 | 
  9 | const COMPUTER_USE_PROMPT = "Open firefox and go to trycua.com";
 10 | 
 11 | // Initialize the Computer Connection
 12 | const computer = new Computer({
 13 | 	apiKey: process.env.CUA_API_KEY!,
 14 | 	name: process.env.CUA_CONTAINER_NAME!,
 15 | 	osType: OSType.LINUX,
 16 | });
 17 | 
 18 | await computer.run();
 19 | // Take the initial screenshot
 20 | const screenshot = await computer.interface.screenshot();
 21 | const screenshotBase64 = screenshot.toString("base64");
 22 | 
 23 | // Setup openai config for computer use
 24 | const computerUseConfig: OpenAI.Responses.ResponseCreateParamsNonStreaming = {
 25 | 	model: "computer-use-preview",
 26 | 	tools: [
 27 | 		{
 28 | 			type: "computer_use_preview",
 29 | 			display_width: 1024,
 30 | 			display_height: 768,
 31 | 			environment: "linux", // we're using a linux vm
 32 | 		},
 33 | 	],
 34 | 	truncation: "auto",
 35 | };
 36 | 
 37 | // Send initial screenshot to the openai computer use model
 38 | let res = await openai.responses.create({
 39 | 	...computerUseConfig,
 40 | 	input: [
 41 | 		{
 42 | 			role: "user",
 43 | 			content: [
 44 | 				// what we want the ai to do
 45 | 				{ type: "input_text", text: COMPUTER_USE_PROMPT },
 46 | 				// current screenshot of the vm
 47 | 				{
 48 | 					type: "input_image",
 49 | 					image_url: `data:image/png;base64,${screenshotBase64}`,
 50 | 					detail: "auto",
 51 | 				},
 52 | 			],
 53 | 		},
 54 | 	],
 55 | });
 56 | 
 57 | // Loop until there are no more computer use actions.
 58 | while (true) {
 59 | 	const computerCalls = res.output.filter((o) => o.type === "computer_call");
 60 | 	if (computerCalls.length < 1) {
 61 | 		console.log("No more computer calls. Loop complete.");
 62 | 		break;
 63 | 	}
 64 | 	// Get the first call
 65 | 	const call = computerCalls[0];
 66 | 	const action = call.action;
 67 | 	console.log("Received action from OpenAI Responses API:", action);
 68 | 	let ackChecks: OpenAI.Responses.ResponseComputerToolCall.PendingSafetyCheck[] =
 69 | 		[];
 70 | 	if (call.pending_safety_checks.length > 0) {
 71 | 		console.log("Safety checks pending:", call.pending_safety_checks);
 72 | 		// In a real implementation, you would want to get user confirmation here
 73 | 		ackChecks = call.pending_safety_checks;
 74 | 	}
 75 | 
 76 | 	// Execute the action in the container
 77 | 	await executeAction(computer, action);
 78 | 	// Wait for changes to process within the container (1sec)
 79 | 	await new Promise((resolve) => setTimeout(resolve, 1000));
 80 | 
 81 | 	// Capture new screenshot
 82 | 	const newScreenshot = await computer.interface.screenshot();
 83 | 	const newScreenshotBase64 = newScreenshot.toString("base64");
 84 | 
 85 | 	// Screenshot back as computer_call_output
 86 | 
 87 | 	res = await openai.responses.create({
 88 | 		...computerUseConfig,
 89 | 		previous_response_id: res.id,
 90 | 		input: [
 91 | 			{
 92 | 				type: "computer_call_output",
 93 | 				call_id: call.call_id,
 94 | 				acknowledged_safety_checks: ackChecks,
 95 | 				output: {
 96 | 					type: "computer_screenshot",
 97 | 					image_url: `data:image/png;base64,${newScreenshotBase64}`,
 98 | 				},
 99 | 			},
100 | 		],
101 | 	});
102 | }
103 | 
104 | process.exit();
105 | 
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/logger.py:
--------------------------------------------------------------------------------

```python
 1 | """Logging utilities for the Computer module."""
 2 | 
 3 | import logging
 4 | from enum import IntEnum
 5 | 
 6 | 
 7 | # Keep LogLevel for backward compatibility, but it will be deprecated
 8 | class LogLevel(IntEnum):
 9 |     """Log levels for logging. Deprecated - use standard logging levels instead."""
10 | 
11 |     QUIET = 0  # Only warnings and errors
12 |     NORMAL = 1  # Info level, standard output
13 |     VERBOSE = 2  # More detailed information
14 |     DEBUG = 3  # Full debug information
15 | 
16 | 
17 | # Map LogLevel to standard logging levels for backward compatibility
18 | LOGLEVEL_MAP = {
19 |     LogLevel.QUIET: logging.WARNING,
20 |     LogLevel.NORMAL: logging.INFO,
21 |     LogLevel.VERBOSE: logging.DEBUG,
22 |     LogLevel.DEBUG: logging.DEBUG,
23 | }
24 | 
25 | 
26 | class Logger:
27 |     """Logger class for Computer."""
28 | 
29 |     def __init__(self, name: str, verbosity: int):
30 |         """Initialize the logger.
31 | 
32 |         Args:
33 |             name: The name of the logger.
34 |             verbosity: The log level (use standard logging levels like logging.INFO).
35 |                        For backward compatibility, LogLevel enum values are also accepted.
36 |         """
37 |         self.logger = logging.getLogger(name)
38 | 
39 |         # Convert LogLevel enum to standard logging level if needed
40 |         if isinstance(verbosity, LogLevel):
41 |             self.verbosity = LOGLEVEL_MAP.get(verbosity, logging.INFO)
42 |         else:
43 |             self.verbosity = verbosity
44 | 
45 |         self._configure()
46 | 
47 |     def _configure(self):
48 |         """Configure the logger based on log level."""
49 |         # Set the logging level directly
50 |         self.logger.setLevel(self.verbosity)
51 | 
52 |         # Log the verbosity level that was set
53 |         if self.verbosity <= logging.DEBUG:
54 |             self.logger.info("Logger set to DEBUG level")
55 |         elif self.verbosity <= logging.INFO:
56 |             self.logger.info("Logger set to INFO level")
57 |         elif self.verbosity <= logging.WARNING:
58 |             self.logger.warning("Logger set to WARNING level")
59 |         elif self.verbosity <= logging.ERROR:
60 |             self.logger.warning("Logger set to ERROR level")
61 |         elif self.verbosity <= logging.CRITICAL:
62 |             self.logger.warning("Logger set to CRITICAL level")
63 | 
64 |     def debug(self, message: str):
65 |         """Log a debug message if log level is DEBUG or lower."""
66 |         self.logger.debug(message)
67 | 
68 |     def info(self, message: str):
69 |         """Log an info message if log level is INFO or lower."""
70 |         self.logger.info(message)
71 | 
72 |     def verbose(self, message: str):
73 |         """Log a verbose message between INFO and DEBUG levels."""
74 |         # Since there's no standard verbose level,
75 |         # use debug level with [VERBOSE] prefix for backward compatibility
76 |         self.logger.debug(f"[VERBOSE] {message}")
77 | 
78 |     def warning(self, message: str):
79 |         """Log a warning message."""
80 |         self.logger.warning(message)
81 | 
82 |     def error(self, message: str):
83 |         """Log an error message."""
84 |         self.logger.error(message)
85 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/computer-sdk/sandboxed-python.mdx:
--------------------------------------------------------------------------------

```markdown
 1 | ---
 2 | title: Sandboxed Python
 3 | slug: sandboxed-python
 4 | ---
 5 | 
 6 | <Callout>A corresponding <a href="https://github.com/trycua/cua/blob/main/examples/sandboxed_functions_examples.py" target="_blank">Python example</a> is available for this documentation.</Callout>
 7 | 
 8 | You can run Python functions securely inside a sandboxed virtual environment on a remote Cua Computer. This is useful for executing untrusted user code, isolating dependencies, or providing a safe environment for automation tasks.
 9 | 
10 | ## How It Works
11 | 
12 | The `sandboxed` decorator from the Computer SDK wraps a Python function so that it is executed remotely in a specified virtual environment on the target Computer. The function and its arguments are serialized, sent to the remote, and executed in isolation. Results or errors are returned to the caller.
13 | 
14 | ## Example Usage
15 | 
16 | ```python
17 | from computer import Computer
18 | from computer.helpers import sandboxed
19 | 
20 | @sandboxed()
21 | def read_file(location: str) -> str:
22 |     """Read contents of a file"""
23 |     with open(location, 'r') as f:
24 |         return f.read()
25 | 
26 | async def main():
27 |     async with Computer(os_type="linux", provider_type="cloud", name="my-container", api_key="...") as computer:
28 |         # Call the sandboxed function (runs remotely)
29 |         result = await read_file("/etc/hostname")
30 |         print(result)
31 | ```
32 | 
33 | ## Installing Python Packages
34 | 
35 | You can specify the virtual environment name and target computer:
36 | 
37 | ```python
38 | @sandboxed(venv_name="myenv", computer=my_computer, max_retries=5)
39 | def my_function(...):
40 |     ...
41 | ```
42 | 
43 | You can also install packages in the virtual environment using the `venv_install` method:
44 | 
45 | ```python
46 | await my_computer.venv_install("myenv", ["requests"])
47 | ```
48 | 
49 | ## Example: Interacting with macOS Applications
50 | 
51 | You can use sandboxed functions to interact with macOS applications on a local Cua Computer (requires `os_type="darwin"`). This is particularly useful for automation tasks that involve GUI applications.
52 | 
53 | ```python
54 | # Example: Use sandboxed functions to execute code in a Cua Container
55 | from computer.helpers import sandboxed
56 | 
57 | await computer.venv_install("demo_venv", ["macos-pyxa"]) # Install packages in a virtual environment
58 | 
59 | @sandboxed("demo_venv")
60 | def greet_and_print(name):
61 |     """Get the HTML of the current Safari tab"""
62 |     import PyXA
63 |     safari = PyXA.Application("Safari")
64 |     html = safari.current_document.source()
65 |     print(f"Hello from inside the container, {name}!")
66 |     return {"greeted": name, "safari_html": html}
67 | 
68 | # When a @sandboxed function is called, it will execute in the container
69 | result = await greet_and_print("Cua")
70 | # Result: {"greeted": "Cua", "safari_html": "<html>...</html>"}
71 | # stdout and stderr are also captured and printed / raised
72 | print("Result from sandboxed function:", result)
73 | ```
74 | 
75 | ## Error Handling
76 | 
77 | If the remote execution fails, the decorator will retry up to `max_retries` times. If all attempts fail, the last exception is raised locally.
78 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/libraries/computer-server/Commands.mdx:
--------------------------------------------------------------------------------

```markdown
 1 | ---
 2 | title: Supported Commands
 3 | description: List of all commands supported by the Computer Server API (WebSocket and REST).
 4 | ---
 5 | 
 6 | # Commands Reference
 7 | 
 8 | This page lists all supported commands for the Computer Server, available via both WebSocket and REST API endpoints.
 9 | 
10 | | Command             | Description                                |
11 | |---------------------|--------------------------------------------|
12 | | version             | Get protocol and package version info       |
13 | | run_command         | Run a shell command                        |
14 | | screenshot          | Capture a screenshot                       |
15 | | get_screen_size     | Get the screen size                        |
16 | | get_cursor_position | Get the current mouse cursor position      |
17 | | mouse_down          | Mouse button down                          |
18 | | mouse_up            | Mouse button up                            |
19 | | left_click          | Left mouse click                           |
20 | | right_click         | Right mouse click                          |
21 | | double_click        | Double mouse click                         |
22 | | move_cursor         | Move mouse cursor to coordinates           |
23 | | drag_to             | Drag mouse to coordinates                  |
24 | | drag                | Drag mouse by offset                       |
25 | | key_down            | Keyboard key down                          |
26 | | key_up              | Keyboard key up                            |
27 | | type_text           | Type text                                  |
28 | | press_key           | Press a single key                         |
29 | | hotkey              | Press a hotkey combination                 |
30 | | scroll              | Scroll the screen                          |
31 | | scroll_down         | Scroll down                                |
32 | | scroll_up           | Scroll up                                  |
33 | | copy_to_clipboard   | Copy text to clipboard                     |
34 | | set_clipboard       | Set clipboard content                      |
35 | | file_exists         | Check if a file exists                     |
36 | | directory_exists    | Check if a directory exists                |
37 | | list_dir            | List files/directories in a directory      |
38 | | read_text           | Read text from a file                      |
39 | | write_text          | Write text to a file                       |
40 | | read_bytes          | Read bytes from a file                     |
41 | | write_bytes         | Write bytes to a file                      |
42 | | get_file_size       | Get file size                              |
43 | | delete_file         | Delete a file                              |
44 | | create_dir          | Create a directory                         |
45 | | delete_dir          | Delete a directory                         |
46 | | get_accessibility_tree | Get accessibility tree (if supported)    |
47 | | find_element        | Find element in accessibility tree         |
48 | | diorama_cmd         | Run a diorama command (if supported)       |
49 | 
```

--------------------------------------------------------------------------------
/libs/lume/tests/VNCServiceTests.swift:
--------------------------------------------------------------------------------

```swift
 1 | import Foundation
 2 | import Testing
 3 | @testable import lume
 4 | 
 5 | @Test("VNCService starts correctly")
 6 | func testVNCServiceStart() async throws {
 7 |     let tempDir = try createTempDirectory()
 8 |     let vmDir = VMDirectory(Path(tempDir.path))
 9 |     let service = await MockVNCService(vmDirectory: vmDir)
10 |     
11 |     // Initial state
12 |     let isRunning = await service.isRunning
13 |     let url = await service.url
14 |     #expect(!isRunning)
15 |     #expect(url == nil)
16 |     
17 |     // Start service
18 |     try await service.start(port: 5900, virtualMachine: nil)
19 |     #expect(await service.isRunning)
20 |     #expect(await service.url?.contains("5900") ?? false)
21 | }
22 | 
23 | @Test("VNCService stops correctly")
24 | func testVNCServiceStop() async throws {
25 |     let tempDir = try createTempDirectory()
26 |     let vmDir = VMDirectory(Path(tempDir.path))
27 |     let service = await MockVNCService(vmDirectory: vmDir)
28 |     try await service.start(port: 5900, virtualMachine: nil)
29 |     
30 |     await service.stop()
31 |     let isRunning = await service.isRunning
32 |     let url = await service.url
33 |     #expect(!isRunning)
34 |     #expect(url == nil)
35 | }
36 | 
37 | @Test("VNCService handles client operations")
38 | func testVNCServiceClient() async throws {
39 |     let tempDir = try createTempDirectory()
40 |     let vmDir = VMDirectory(Path(tempDir.path))
41 |     let service = await MockVNCService(vmDirectory: vmDir)
42 |     
43 |     // Should fail when not started
44 |     do {
45 |         try await service.openClient(url: "vnc://localhost:5900")
46 |         #expect(Bool(false), "Expected openClient to throw when not started")
47 |     } catch VMError.vncNotConfigured {
48 |         // Expected error
49 |     } catch {
50 |         #expect(Bool(false), "Expected vncNotConfigured error but got \(error)")
51 |     }
52 |     
53 |     // Start and try client operations
54 |     try await service.start(port: 5900, virtualMachine: nil)
55 |     try await service.openClient(url: "vnc://localhost:5900")
56 |     #expect(await service.clientOpenCount == 1)
57 |     
58 |     // Stop and verify client operations fail
59 |     await service.stop()
60 |     do {
61 |         try await service.openClient(url: "vnc://localhost:5900")
62 |         #expect(Bool(false), "Expected openClient to throw after stopping")
63 |     } catch VMError.vncNotConfigured {
64 |         // Expected error
65 |     } catch {
66 |         #expect(Bool(false), "Expected vncNotConfigured error but got \(error)")
67 |     }
68 | }
69 | 
70 | @Test("VNCService handles virtual machine attachment")
71 | func testVNCServiceVMAttachment() async throws {
72 |     let tempDir = try createTempDirectory()
73 |     let vmDir = VMDirectory(Path(tempDir.path))
74 |     let service = await MockVNCService(vmDirectory: vmDir)
75 |     let mockVM = "mock_vm"
76 |     
77 |     try await service.start(port: 5900, virtualMachine: mockVM)
78 |     let attachedVM = await service.attachedVM
79 |     #expect(attachedVM == mockVM)
80 | }
81 | 
82 | private func createTempDirectory() throws -> URL {
83 |     let tempDir = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString)
84 |     try FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: true)
85 |     return tempDir
86 | } 
```

--------------------------------------------------------------------------------
/libs/python/computer-server/computer_server/handlers/factory.py:
--------------------------------------------------------------------------------

```python
 1 | import platform
 2 | import subprocess
 3 | from typing import Tuple, Type
 4 | from .base import BaseAccessibilityHandler, BaseAutomationHandler, BaseFileHandler
 5 | from computer_server.diorama.base import BaseDioramaHandler
 6 | 
 7 | # Conditionally import platform-specific handlers
 8 | system = platform.system().lower()
 9 | if system == 'darwin':
10 |     from .macos import MacOSAccessibilityHandler, MacOSAutomationHandler
11 |     from computer_server.diorama.macos import MacOSDioramaHandler
12 | elif system == 'linux':
13 |     from .linux import LinuxAccessibilityHandler, LinuxAutomationHandler
14 | elif system == 'windows':
15 |     from .windows import WindowsAccessibilityHandler, WindowsAutomationHandler
16 | 
17 | from .generic import GenericFileHandler
18 | 
19 | class HandlerFactory:
20 |     """Factory for creating OS-specific handlers."""
21 |     
22 |     @staticmethod
23 |     def _get_current_os() -> str:
24 |         """Determine the current OS.
25 |         
26 |         Returns:
27 |             str: The OS type ('darwin' for macOS, 'linux' for Linux, or 'windows' for Windows)
28 |             
29 |         Raises:
30 |             RuntimeError: If unable to determine the current OS
31 |         """
32 |         try:
33 |             # Use platform.system() as primary method
34 |             system = platform.system().lower()
35 |             if system in ['darwin', 'linux', 'windows']:
36 |                 return system
37 |                 
38 |             # Fallback to uname if platform.system() doesn't return expected values (Unix-like systems only)
39 |             result = subprocess.run(['uname', '-s'], capture_output=True, text=True)
40 |             if result.returncode == 0:
41 |                 return result.stdout.strip().lower()
42 |             
43 |             raise RuntimeError(f"Unsupported OS: {system}")
44 |         except Exception as e:
45 |             raise RuntimeError(f"Failed to determine current OS: {str(e)}")
46 |     
47 |     @staticmethod
48 |     def create_handlers() -> Tuple[BaseAccessibilityHandler, BaseAutomationHandler, BaseDioramaHandler, BaseFileHandler]:
49 |         """Create and return appropriate handlers for the current OS.
50 |         
51 |         Returns:
52 |             Tuple[BaseAccessibilityHandler, BaseAutomationHandler, BaseDioramaHandler, BaseFileHandler]: A tuple containing
53 |             the appropriate accessibility, automation, diorama, and file handlers for the current OS.
54 |         
55 |         Raises:
56 |             NotImplementedError: If the current OS is not supported
57 |             RuntimeError: If unable to determine the current OS
58 |         """
59 |         os_type = HandlerFactory._get_current_os()
60 |         
61 |         if os_type == 'darwin':
62 |             return MacOSAccessibilityHandler(), MacOSAutomationHandler(), MacOSDioramaHandler(), GenericFileHandler()
63 |         elif os_type == 'linux':
64 |             return LinuxAccessibilityHandler(), LinuxAutomationHandler(), BaseDioramaHandler(), GenericFileHandler()
65 |         elif os_type == 'windows':
66 |             return WindowsAccessibilityHandler(), WindowsAutomationHandler(), BaseDioramaHandler(), GenericFileHandler()
67 |         else:
68 |             raise NotImplementedError(f"OS '{os_type}' is not supported")
69 | 
```

--------------------------------------------------------------------------------
/libs/lume/tests/VM/VMDetailsPrinterTests.swift:
--------------------------------------------------------------------------------

```swift
 1 | import Foundation
 2 | import Testing
 3 | 
 4 | @testable import lume
 5 | 
 6 | struct VMDetailsPrinterTests {
 7 | 
 8 |     @Test func printStatus_whenJSON() throws {
 9 |         // Given
10 |         let vms: [VMDetails] = [
11 |             VMDetails(
12 |                 name: "name",
13 |                 os: "os",
14 |                 cpuCount: 2,
15 |                 memorySize: 1024,
16 |                 diskSize: .init(allocated: 24, total: 30),
17 |                 display: "1024x768",
18 |                 status: "status",
19 |                 vncUrl: "vncUrl",
20 |                 ipAddress: "0.0.0.0",
21 |                 locationName: "mockLocation")
22 |         ]
23 |         let jsonEncoder = JSONEncoder()
24 |         jsonEncoder.outputFormatting = .prettyPrinted
25 |         let expectedOutput = try String(data: jsonEncoder.encode(vms), encoding: .utf8)!
26 | 
27 |         // When
28 |         var printedStatus: String?
29 |         try VMDetailsPrinter.printStatus(vms, format: .json, print: { printedStatus = $0 })
30 | 
31 |         // Then
32 |         // Decode both JSONs and compare the actual data structures
33 |         let jsonDecoder = JSONDecoder()
34 |         let printedVMs = try jsonDecoder.decode(
35 |             [VMDetails].self, from: printedStatus!.data(using: .utf8)!)
36 |         let expectedVMs = try jsonDecoder.decode(
37 |             [VMDetails].self, from: expectedOutput.data(using: .utf8)!)
38 | 
39 |         #expect(printedVMs.count == expectedVMs.count)
40 |         for (printed, expected) in zip(printedVMs, expectedVMs) {
41 |             #expect(printed.name == expected.name)
42 |             #expect(printed.os == expected.os)
43 |             #expect(printed.cpuCount == expected.cpuCount)
44 |             #expect(printed.memorySize == expected.memorySize)
45 |             #expect(printed.diskSize.allocated == expected.diskSize.allocated)
46 |             #expect(printed.diskSize.total == expected.diskSize.total)
47 |             #expect(printed.status == expected.status)
48 |             #expect(printed.vncUrl == expected.vncUrl)
49 |             #expect(printed.ipAddress == expected.ipAddress)
50 |         }
51 |     }
52 | 
53 |     @Test func printStatus_whenNotJSON() throws {
54 |         // Given
55 |         let vms: [VMDetails] = [
56 |             VMDetails(
57 |                 name: "name",
58 |                 os: "os",
59 |                 cpuCount: 2,
60 |                 memorySize: 1024,
61 |                 diskSize: .init(allocated: 24, total: 30),
62 |                 display: "1024x768",
63 |                 status: "status",
64 |                 vncUrl: "vncUrl",
65 |                 ipAddress: "0.0.0.0",
66 |                 locationName: "mockLocation")
67 |         ]
68 | 
69 |         // When
70 |         var printedLines: [String] = []
71 |         try VMDetailsPrinter.printStatus(vms, format: .text, print: { printedLines.append($0) })
72 | 
73 |         // Then
74 |         #expect(printedLines.count == 2)
75 | 
76 |         let headerParts = printedLines[0].split(whereSeparator: \.isWhitespace)
77 |         #expect(
78 |             headerParts == [
79 |                 "name", "os", "cpu", "memory", "disk", "display", "status", "storage", "shared_dirs", "ip", "vnc",
80 |             ])
81 | 
82 |         #expect(
83 |             printedLines[1].split(whereSeparator: \.isWhitespace).map(String.init) == [
84 |                 "name", "os", "2", "0.00G", "24.0B/30.0B", "1024x768", "status", "mockLocation",
85 |                 "-",
86 |                 "0.0.0.0",
87 |                 "vncUrl",
88 |             ])
89 |     }
90 | }
91 | 
```

--------------------------------------------------------------------------------
/libs/lume/src/Server/HTTP.swift:
--------------------------------------------------------------------------------

```swift
  1 | import Foundation
  2 | import Network
  3 | 
  4 | enum HTTPError: Error {
  5 |     case internalError
  6 | }
  7 | 
  8 | struct HTTPRequest {
  9 |     let method: String
 10 |     let path: String
 11 |     let headers: [String: String]
 12 |     let body: Data?
 13 |     
 14 |     init?(data: Data) {
 15 |         guard let requestString = String(data: data, encoding: .utf8) else { return nil }
 16 |         let components = requestString.components(separatedBy: "\r\n\r\n")
 17 |         guard components.count >= 1 else { return nil }
 18 |         
 19 |         let headerLines = components[0].components(separatedBy: "\r\n")
 20 |         guard !headerLines.isEmpty else { return nil }
 21 |         
 22 |         // Parse request line
 23 |         let requestLine = headerLines[0].components(separatedBy: " ")
 24 |         guard requestLine.count >= 2 else { return nil }
 25 |         
 26 |         self.method = requestLine[0]
 27 |         self.path = requestLine[1]
 28 |         
 29 |         // Parse headers
 30 |         var headers: [String: String] = [:]
 31 |         for line in headerLines.dropFirst() {
 32 |             let headerComponents = line.split(separator: ":", maxSplits: 1).map(String.init)
 33 |             if headerComponents.count == 2 {
 34 |                 headers[headerComponents[0].trimmingCharacters(in: .whitespaces)] = 
 35 |                     headerComponents[1].trimmingCharacters(in: .whitespaces)
 36 |             }
 37 |         }
 38 |         self.headers = headers
 39 |         
 40 |         // Parse body if present
 41 |         if components.count > 1 {
 42 |             self.body = components[1].data(using: .utf8)
 43 |         } else {
 44 |             self.body = nil
 45 |         }
 46 |     }
 47 | }
 48 | 
 49 | struct HTTPResponse {
 50 |     enum StatusCode: Int {
 51 |         case ok = 200
 52 |         case accepted = 202
 53 |         case badRequest = 400
 54 |         case notFound = 404
 55 |         case internalServerError = 500
 56 |         
 57 |         var description: String {
 58 |             switch self {
 59 |             case .ok: return "OK"
 60 |             case .accepted: return "Accepted"
 61 |             case .badRequest: return "Bad Request"
 62 |             case .notFound: return "Not Found"
 63 |             case .internalServerError: return "Internal Server Error"
 64 |             }
 65 |         }
 66 |     }
 67 |     
 68 |     let statusCode: StatusCode
 69 |     let headers: [String: String]
 70 |     let body: Data?
 71 |     
 72 |     init(statusCode: StatusCode, headers: [String: String] = [:], body: Data? = nil) {
 73 |         self.statusCode = statusCode
 74 |         self.headers = headers
 75 |         self.body = body
 76 |     }
 77 |     
 78 |     init(statusCode: StatusCode, body: String) {
 79 |         self.statusCode = statusCode
 80 |         self.headers = ["Content-Type": "text/plain"]
 81 |         self.body = body.data(using: .utf8)
 82 |     }
 83 |     
 84 |     func serialize() -> Data {
 85 |         var response = "HTTP/1.1 \(statusCode.rawValue) \(statusCode.description)\r\n"
 86 |         
 87 |         var headers = self.headers
 88 |         if let body = body {
 89 |             headers["Content-Length"] = "\(body.count)"
 90 |         }
 91 |         
 92 |         for (key, value) in headers {
 93 |             response += "\(key): \(value)\r\n"
 94 |         }
 95 |         
 96 |         response += "\r\n"
 97 |         
 98 |         var responseData = response.data(using: .utf8) ?? Data()
 99 |         if let body = body {
100 |             responseData.append(body)
101 |         }
102 |         
103 |         return responseData
104 |     }
105 | }
106 | 
107 | final class HTTPServer {
108 |     let port: UInt16
109 |     
110 |     init(port: UInt16) {
111 |         self.port = port
112 |     }
113 | } 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/callbacks/pii_anonymization.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | PII anonymization callback handler using Microsoft Presidio for text and image redaction.
 3 | """
 4 | 
 5 | from typing import List, Dict, Any, Optional, Tuple
 6 | from .base import AsyncCallbackHandler
 7 | import base64
 8 | import io
 9 | import logging
10 | 
11 | try:
12 |     # TODO: Add Presidio dependencies
13 |     from PIL import Image
14 |     PRESIDIO_AVAILABLE = True
15 | except ImportError:
16 |     PRESIDIO_AVAILABLE = False
17 | 
18 | logger = logging.getLogger(__name__)
19 | 
20 | class PIIAnonymizationCallback(AsyncCallbackHandler):
21 |     """
22 |     Callback handler that anonymizes PII in text and images using Microsoft Presidio.
23 |     
24 |     This handler:
25 |     1. Anonymizes PII in messages before sending to the agent loop
26 |     2. Deanonymizes PII in tool calls and message outputs after the agent loop
27 |     3. Redacts PII from images in computer_call_output messages
28 |     """
29 |     
30 |     def __init__(
31 |         self,
32 |         # TODO: Any extra kwargs if needed
33 |     ):
34 |         """
35 |         Initialize the PII anonymization callback.
36 |         
37 |         Args:
38 |             anonymize_text: Whether to anonymize text content
39 |             anonymize_images: Whether to redact images
40 |             entities_to_anonymize: List of entity types to anonymize (None for all)
41 |             anonymization_operator: Presidio operator to use ("replace", "mask", "redact", etc.)
42 |             image_redaction_color: RGB color for image redaction
43 |         """
44 |         if not PRESIDIO_AVAILABLE:
45 |             raise ImportError(
46 |                 "Presidio is not available. Install with: "
47 |                 "pip install cua-agent[pii-anonymization]"
48 |             )
49 |         
50 |         # TODO: Implement __init__
51 |     
52 |     async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
53 |         """
54 |         Anonymize PII in messages before sending to agent loop.
55 |         
56 |         Args:
57 |             messages: List of message dictionaries
58 |             
59 |         Returns:
60 |             List of messages with PII anonymized
61 |         """
62 |         anonymized_messages = []
63 |         for msg in messages:
64 |             anonymized_msg = await self._anonymize_message(msg)
65 |             anonymized_messages.append(anonymized_msg)
66 |         
67 |         return anonymized_messages
68 |     
69 |     async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
70 |         """
71 |         Deanonymize PII in tool calls and message outputs after agent loop.
72 |         
73 |         Args:
74 |             output: List of output dictionaries
75 |             
76 |         Returns:
77 |             List of output with PII deanonymized for tool calls
78 |         """
79 |         deanonymized_output = []
80 |         for item in output:
81 |             # Only deanonymize tool calls and computer_call messages
82 |             if item.get("type") in ["computer_call", "computer_call_output"]:
83 |                 deanonymized_item = await self._deanonymize_item(item)
84 |                 deanonymized_output.append(deanonymized_item)
85 |             else:
86 |                 deanonymized_output.append(item)
87 |         
88 |         return deanonymized_output
89 |     
90 |     async def _anonymize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
91 |         # TODO: Implement _anonymize_message
92 |         return message
93 |     
94 |     async def _deanonymize_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
95 |         # TODO: Implement _deanonymize_item
96 |         return item
97 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx:
--------------------------------------------------------------------------------

```markdown
 1 | ---
 2 | title: Grounding Models
 3 | description: Models that support click prediction with ComputerAgent.predict_click()
 4 | ---
 5 | 
 6 | These models specialize in UI element grounding and click prediction. They can identify precise coordinates for UI elements based on natural language descriptions, but cannot perform autonomous task planning.
 7 | 
 8 | Use `ComputerAgent.predict_click()` to get coordinates for specific UI elements.
 9 | 
10 | All models that support `ComputerAgent.run()` also support `ComputerAgent.predict_click()`. See [All‑in‑one CUAs](./computer-use-agents).
11 | 
12 | ### Anthropic CUAs
13 | 
14 | - Claude 4.1: `claude-opus-4-1-20250805`
15 | - Claude 4: `claude-opus-4-20250514`, `claude-sonnet-4-20250514`
16 | - Claude 3.7: `claude-3-7-sonnet-20250219`
17 | - Claude 3.5: `claude-3-5-sonnet-20241022`
18 | 
19 | ### OpenAI CUA Preview
20 | - Computer-use-preview: `computer-use-preview`
21 | 
22 | ### UI-TARS 1.5 (Unified VLM with grounding support)
23 | - `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B`
24 | - `huggingface/ByteDance-Seed/UI-TARS-1.5-7B` (requires TGI endpoint)
25 | 
26 | ## Specialized Grounding Models
27 | 
28 | These models are optimized specifically for click prediction and UI element grounding:
29 | 
30 | ### OpenCUA
31 | - `huggingface-local/xlangai/OpenCUA-{7B,32B}`
32 | 
33 | ### GTA1 Family
34 | - `huggingface-local/HelloKKMe/GTA1-{7B,32B,72B}`
35 | 
36 | ### Holo 1.5 Family
37 | - `huggingface-local/Hcompany/Holo1.5-{3B,7B,72B}`
38 | 
39 | ### InternVL 3.5 Family
40 | - `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}`
41 | 
42 | ### OmniParser (OCR)
43 | 
44 | OCR-focused set-of-marks model that requires an LLM for click prediction:
45 | 
46 | - `omniparser` (requires combination with any LiteLLM vision model)
47 | 
48 | ### Moondream3 (Local Grounding)
49 | 
50 | Moondream3 is a powerful small model that can perform UI grounding and click prediction.
51 | 
52 | - `moondream3`
53 | 
54 | ## Usage Examples
55 | 
56 | ```python
57 | # Using any grounding model for click prediction
58 | agent = ComputerAgent("claude-3-5-sonnet-20241022", tools=[computer])
59 | 
60 | # Predict coordinates for specific elements
61 | login_coords = agent.predict_click("find the login button")
62 | search_coords = agent.predict_click("locate the search text field")
63 | menu_coords = agent.predict_click("find the hamburger menu icon")
64 | 
65 | print(f"Login button: {login_coords}")
66 | print(f"Search field: {search_coords}")
67 | print(f"Menu icon: {menu_coords}")
68 | ```
69 | 
70 | ```python
71 | # OmniParser is just for OCR, so it requires an LLM for predict_click
72 | agent = ComputerAgent("omniparser+anthropic/claude-3-5-sonnet-20241022", tools=[computer])
73 | 
74 | # Predict click coordinates using composed agent
75 | coords = agent.predict_click("find the submit button")
76 | print(f"Click coordinates: {coords}")  # (450, 320)
77 | 
78 | # Note: Cannot use omniparser alone for click prediction
79 | # This will raise an error:
80 | # agent = ComputerAgent("omniparser", tools=[computer])
81 | # coords = agent.predict_click("find button")  # Error!
82 | ```
83 | 
84 | ```python
85 | agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B", tools=[computer])
86 | 
87 | # Predict click coordinates for UI elements
88 | coords = agent.predict_click("find the submit button")
89 | print(f"Click coordinates: {coords}")  # (450, 320)
90 | 
91 | # Note: GTA1 cannot perform autonomous task planning
92 | # This will raise an error:
93 | # agent.run("Fill out the form and submit it")
94 | ```
95 | 
96 | ---
97 | 
98 | For information on combining grounding models with planning capabilities, see [Composed Agents](./composed-agents) and [All‑in‑one CUAs](./computer-use-agents).
99 | 
```

--------------------------------------------------------------------------------
/libs/python/computer-server/computer_server/server.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Server interface for Computer API.
  3 | Provides a clean API for starting and stopping the server.
  4 | """
  5 | 
  6 | import asyncio
  7 | import logging
  8 | import uvicorn
  9 | from typing import Optional
 10 | from fastapi import FastAPI
 11 | 
 12 | from .main import app as fastapi_app
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | class Server:
 18 |     """
 19 |     Server interface for Computer API.
 20 | 
 21 |     Usage:
 22 |         from computer_api import Server
 23 | 
 24 |         # Synchronous usage
 25 |         server = Server()
 26 |         server.start()  # Blocks until server is stopped
 27 | 
 28 |         # Asynchronous usage
 29 |         server = Server()
 30 |         await server.start_async()  # Starts server in background
 31 |         # Do other things
 32 |         await server.stop()  # Stop the server
 33 |     """
 34 | 
 35 |     def __init__(self, host: str = "0.0.0.0", port: int = 8000, log_level: str = "info", 
 36 |                  ssl_keyfile: Optional[str] = None, ssl_certfile: Optional[str] = None):
 37 |         """
 38 |         Initialize the server.
 39 | 
 40 |         Args:
 41 |             host: Host to bind the server to
 42 |             port: Port to bind the server to
 43 |             log_level: Logging level (debug, info, warning, error, critical)
 44 |             ssl_keyfile: Path to SSL private key file (for HTTPS)
 45 |             ssl_certfile: Path to SSL certificate file (for HTTPS)
 46 |         """
 47 |         self.host = host
 48 |         self.port = port
 49 |         self.log_level = log_level
 50 |         self.ssl_keyfile = ssl_keyfile
 51 |         self.ssl_certfile = ssl_certfile
 52 |         self.app = fastapi_app
 53 |         self._server_task: Optional[asyncio.Task] = None
 54 |         self._should_exit = asyncio.Event()
 55 | 
 56 |     def start(self) -> None:
 57 |         """
 58 |         Start the server synchronously. This will block until the server is stopped.
 59 |         """
 60 |         uvicorn.run(
 61 |             self.app, 
 62 |             host=self.host, 
 63 |             port=self.port, 
 64 |             log_level=self.log_level,
 65 |             ssl_keyfile=self.ssl_keyfile,
 66 |             ssl_certfile=self.ssl_certfile
 67 |         )
 68 | 
 69 |     async def start_async(self) -> None:
 70 |         """
 71 |         Start the server asynchronously. This will return immediately and the server
 72 |         will run in the background.
 73 |         """
 74 |         server_config = uvicorn.Config(
 75 |             self.app, 
 76 |             host=self.host, 
 77 |             port=self.port, 
 78 |             log_level=self.log_level,
 79 |             ssl_keyfile=self.ssl_keyfile,
 80 |             ssl_certfile=self.ssl_certfile
 81 |         )
 82 | 
 83 |         self._should_exit.clear()
 84 |         server = uvicorn.Server(server_config)
 85 | 
 86 |         # Create a task to run the server
 87 |         self._server_task = asyncio.create_task(server.serve())
 88 | 
 89 |         # Wait a short time to ensure the server starts
 90 |         await asyncio.sleep(0.5)
 91 | 
 92 |         protocol = "https" if self.ssl_certfile else "http"
 93 |         logger.info(f"Server started at {protocol}://{self.host}:{self.port}")
 94 | 
 95 |     async def stop(self) -> None:
 96 |         """
 97 |         Stop the server if it's running asynchronously.
 98 |         """
 99 |         if self._server_task and not self._server_task.done():
100 |             # Signal the server to exit
101 |             self._should_exit.set()
102 | 
103 |             # Cancel the server task
104 |             self._server_task.cancel()
105 | 
106 |             try:
107 |                 await self._server_task
108 |             except asyncio.CancelledError:
109 |                 logger.info("Server stopped")
110 | 
111 |             self._server_task = None
112 | 
```

--------------------------------------------------------------------------------
/libs/lume/src/VM/VMDetailsPrinter.swift:
--------------------------------------------------------------------------------

```swift
 1 | import Foundation
 2 | 
 3 | /// Prints VM status information in a formatted table
 4 | enum VMDetailsPrinter {
 5 |     /// Represents a column in the VM status table
 6 |     private struct Column: Sendable {
 7 |         let header: String
 8 |         let width: Int
 9 |         let getValue: @Sendable (VMDetails) -> String
10 |     }
11 | 
12 |     /// Configuration for all columns in the status table
13 |     private static let columns: [Column] = [
14 |         Column(header: "name", width: 34, getValue: { $0.name }),
15 |         Column(header: "os", width: 8, getValue: { $0.os }),
16 |         Column(header: "cpu", width: 8, getValue: { String($0.cpuCount) }),
17 |         Column(
18 |             header: "memory", width: 8,
19 |             getValue: {
20 |                 String(format: "%.2fG", Float($0.memorySize) / (1024 * 1024 * 1024))
21 |             }),
22 |         Column(
23 |             header: "disk", width: 16,
24 |             getValue: {
25 |                 "\($0.diskSize.formattedAllocated)/\($0.diskSize.formattedTotal)"
26 |             }),
27 |         Column(header: "display", width: 12, getValue: { $0.display }),
28 |         Column(
29 |             header: "status", width: 16,
30 |             getValue: {
31 |                 $0.status
32 |             }),
33 |         Column(header: "storage", width: 16, getValue: { $0.locationName }),
34 |         Column(
35 |             header: "shared_dirs", width: 54,
36 |             getValue: { vm in
37 |                 // Only show shared directories if the VM is running
38 |                 if vm.status == "running", let dirs = vm.sharedDirectories, !dirs.isEmpty {
39 |                     return dirs.map { "\($0.hostPath) (\($0.readOnly ? "ro" : "rw"))" }.joined(separator: ", ")
40 |                 } else {
41 |                     return "-"
42 |                 }
43 |             }),
44 |         Column(
45 |             header: "ip", width: 16,
46 |             getValue: {
47 |                 $0.ipAddress ?? "-"
48 |             }),
49 |         Column(
50 |             header: "vnc", width: 50,
51 |             getValue: {
52 |                 $0.vncUrl ?? "-"
53 |             }),
54 |     ]
55 | 
56 |     /// Prints the status of all VMs in a formatted table
57 |     /// - Parameter vms: Array of VM status objects to display
58 |     static func printStatus(
59 |         _ vms: [VMDetails], format: FormatOption, print: (String) -> Void = { print($0) }
60 |     ) throws {
61 |         if format == .json {
62 |             let jsonEncoder = JSONEncoder()
63 |             jsonEncoder.outputFormatting = .prettyPrinted
64 |             let jsonData = try jsonEncoder.encode(vms)
65 |             let jsonString = String(data: jsonData, encoding: .utf8)!
66 |             print(jsonString)
67 |         } else {
68 |             printHeader(print: print)
69 |             vms.forEach({ vm in 
70 |                 printVM(vm, print: print)
71 |             })
72 |         }
73 |     }
74 | 
75 |     private static func printHeader(print: (String) -> Void = { print($0) }) {
76 |         let paddedHeaders = columns.map { $0.header.paddedToWidth($0.width) }
77 |         print(paddedHeaders.joined())
78 |     }
79 | 
80 |     private static func printVM(_ vm: VMDetails, print: (String) -> Void = { print($0) }) {
81 |         let paddedColumns = columns.map { column in
82 |             column.getValue(vm).paddedToWidth(column.width)
83 |         }
84 |         print(paddedColumns.joined())
85 |     }
86 | }
87 | 
88 | extension String {
89 |     /// Pads the string to the specified width with spaces
90 |     /// - Parameter width: Target width for padding
91 |     /// - Returns: Padded string
92 |     fileprivate func paddedToWidth(_ width: Int) -> String {
93 |         padding(toLength: width, withPad: " ", startingAt: 0)
94 |     }
95 | }
96 | 
```

--------------------------------------------------------------------------------
/libs/lume/src/VM/DarwinVM.swift:
--------------------------------------------------------------------------------

```swift
 1 | import Foundation
 2 | 
 3 | /// macOS-specific virtual machine implementation
 4 | @MainActor
 5 | final class DarwinVM: VM {
 6 |     private let imageLoader: ImageLoader
 7 | 
 8 |     init(
 9 |         vmDirContext: VMDirContext,
10 |         virtualizationServiceFactory: @escaping (VMVirtualizationServiceContext) throws -> VMVirtualizationService = { try DarwinVirtualizationService(configuration: $0) },
11 |         vncServiceFactory: @escaping (VMDirectory) -> VNCService = { DefaultVNCService(vmDirectory: $0) },
12 |         imageLoader: ImageLoader
13 |     ) {
14 |         self.imageLoader = imageLoader
15 |         super.init(
16 |             vmDirContext: vmDirContext,
17 |             virtualizationServiceFactory: virtualizationServiceFactory,
18 |             vncServiceFactory: vncServiceFactory
19 |         )
20 |     }
21 | 
22 |     override func getOSType() -> String {
23 |         return "macOS"
24 |     }
25 | 
26 |     // MARK: - Installation and Configuration
27 |     
28 |     override func setup(ipswPath: String, cpuCount: Int, memorySize: UInt64, diskSize: UInt64, display: String) async throws {
29 |         let imagePath: Path
30 |         if ipswPath == "latest" {
31 |             Logger.info("Downloading latest supported Image...")
32 |             let downloadedPath = try await self.imageLoader.downloadLatestImage()
33 |             imagePath = Path(downloadedPath.path)
34 |         } else {
35 |             imagePath = Path(ipswPath)
36 |         }
37 | 
38 |         let requirements = try await imageLoader.loadImageRequirements(from: imagePath.url)
39 |         try setDiskSize(diskSize)
40 | 
41 |         let finalCpuCount = max(cpuCount, requirements.minimumSupportedCPUCount)
42 |         try setCpuCount(finalCpuCount)
43 |         if finalCpuCount != cpuCount {
44 |             Logger.info("CPU count overridden due to minimum image requirements", metadata: ["original": "\(cpuCount)", "final": "\(finalCpuCount)"])
45 |         }
46 | 
47 |         let finalMemorySize = max(memorySize, requirements.minimumSupportedMemorySize)
48 |         try setMemorySize(finalMemorySize)
49 |         if finalMemorySize != memorySize {
50 |             Logger.info("Memory size overridden due to minimum image requirements", metadata: ["original": "\(memorySize)", "final": "\(finalMemorySize)"])
51 |         }
52 | 
53 |         try updateVMConfig(
54 |             vmConfig: try VMConfig(
55 |                 os: getOSType(),
56 |                 cpuCount: finalCpuCount,
57 |                 memorySize: finalMemorySize,
58 |                 diskSize: diskSize,
59 |                 macAddress: DarwinVirtualizationService.generateMacAddress(),
60 |                 display: display,
61 |                 hardwareModel: requirements.hardwareModel,
62 |                 machineIdentifier: DarwinVirtualizationService.generateMachineIdentifier()
63 |             )
64 |         )
65 | 
66 |         let service: any VMVirtualizationService = try virtualizationServiceFactory(
67 |             try createVMVirtualizationServiceContext(
68 |                 cpuCount: finalCpuCount,
69 |                 memorySize: finalMemorySize,
70 |                 display: display
71 |             )
72 |         )
73 |         guard let darwinService = service as? DarwinVirtualizationService else {
74 |             throw VMError.internalError("Installation requires DarwinVirtualizationService")
75 |         }
76 | 
77 |         // Create auxiliary storage with hardware model
78 |         try darwinService.createAuxiliaryStorage(at: vmDirContext.nvramPath, hardwareModel: requirements.hardwareModel)
79 | 
80 |         try await darwinService.installMacOS(imagePath: imagePath) { progress in
81 |             Logger.info("Installing macOS", metadata: ["progress": "\(Int(progress * 100))%"])
82 |         }
83 |     }
84 | }
85 | 
```

--------------------------------------------------------------------------------
/scripts/build.sh:
--------------------------------------------------------------------------------

```bash
  1 | #!/bin/bash
  2 | 
  3 | # Exit on error
  4 | set -e
  5 | 
  6 | # Colors for output
  7 | RED='\033[0;31m'
  8 | GREEN='\033[0;32m'
  9 | BLUE='\033[0;34m'
 10 | NC='\033[0m' # No Color
 11 | 
 12 | # Function to print step information
 13 | print_step() {
 14 |     echo -e "${BLUE}==> $1${NC}"
 15 | }
 16 | 
 17 | # Function to print success message
 18 | print_success() {
 19 |     echo -e "${GREEN}==> Success: $1${NC}"
 20 | }
 21 | 
 22 | # Function to print error message
 23 | print_error() {
 24 |     echo -e "${RED}==> Error: $1${NC}" >&2
 25 | }
 26 | 
 27 | # Get the script's directory
 28 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 29 | PROJECT_ROOT="$( cd "${SCRIPT_DIR}/.." && pwd )"
 30 | 
 31 | # Change to project root
 32 | cd "$PROJECT_ROOT"
 33 | 
 34 | # Load environment variables from .env.local
 35 | if [ -f .env.local ]; then
 36 |     print_step "Loading environment variables from .env.local..."
 37 |     set -a
 38 |     source .env.local
 39 |     set +a
 40 |     print_success "Environment variables loaded"
 41 | else
 42 |     print_error ".env.local file not found"
 43 |     exit 1
 44 | fi
 45 | 
 46 | # Clean up existing environments and cache
 47 | print_step "Cleaning up existing environments..."
 48 | find . -type d -name "__pycache__" -exec rm -rf {} +
 49 | find . -type d -name ".pytest_cache" -exec rm -rf {} +
 50 | find . -type d -name "dist" -exec rm -rf {} +
 51 | find . -type d -name ".venv" -exec rm -rf {} +
 52 | find . -type d -name "*.egg-info" -exec rm -rf {} +
 53 | print_success "Environment cleanup complete"
 54 | 
 55 | # Create and activate virtual environment
 56 | print_step "Creating virtual environment..."
 57 | python -m venv .venv
 58 | source .venv/bin/activate
 59 | 
 60 | # Upgrade pip and install build tools
 61 | print_step "Upgrading pip and installing build tools..."
 62 | python -m pip install --upgrade pip setuptools wheel
 63 | 
 64 | # Function to install a package and its dependencies
 65 | install_package() {
 66 |     local package_dir=$1
 67 |     local package_name=$2
 68 |     local extras=$3
 69 |     print_step "Installing ${package_name}..."
 70 |     cd "$package_dir"
 71 |     
 72 |     if [ -f "pyproject.toml" ]; then
 73 |         if [ -n "$extras" ]; then
 74 |             pip install -e ".[${extras}]"
 75 |         else
 76 |             pip install -e .
 77 |         fi
 78 |     else
 79 |         print_error "No pyproject.toml found in ${package_dir}"
 80 |         return 1
 81 |     fi
 82 |     
 83 |     cd "$PROJECT_ROOT"
 84 | }
 85 | 
 86 | # Install packages in order of dependency
 87 | print_step "Installing packages in development mode..."
 88 | 
 89 | # Install core first (base package with telemetry support)
 90 | install_package "libs/python/core" "core"
 91 | 
 92 | # Install pylume (base dependency)
 93 | install_package "libs/python/pylume" "pylume"
 94 | 
 95 | # Install computer with all its dependencies and extras
 96 | install_package "libs/python/computer" "computer" "all"
 97 | 
 98 | # Install omniparser
 99 | install_package "libs/python/som" "som"
100 | 
101 | # Install agent with all its dependencies and extras
102 | install_package "libs/python/agent" "agent" "all"
103 | 
104 | # Install computer-server
105 | install_package "libs/python/computer-server" "computer-server"
106 | 
107 | # Install mcp-server
108 | install_package "libs/python/mcp-server" "mcp-server"
109 | 
110 | # Install development tools from root project
111 | print_step "Installing development dependencies..."
112 | pip install -e ".[dev,test,docs]"
113 | 
114 | # Create a .env file for VS Code to use the virtual environment
115 | print_step "Creating .env file for VS Code..."
116 | echo "PYTHONPATH=${PROJECT_ROOT}/libs/python/core:${PROJECT_ROOT}/libs/python/computer:${PROJECT_ROOT}/libs/python/agent:${PROJECT_ROOT}/libs/python/som:${PROJECT_ROOT}/libs/python/pylume:${PROJECT_ROOT}/libs/python/computer-server:${PROJECT_ROOT}/libs/python/mcp-server" > .env
117 | 
118 | print_success "All packages installed successfully!"
119 | print_step "Your virtual environment is ready. To activate it:"
120 | echo "  source .venv/bin/activate"
121 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/callbacks/image_retention.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | Image retention callback handler that limits the number of recent images in message history.
 3 | """
 4 | 
 5 | from typing import List, Dict, Any, Optional
 6 | from .base import AsyncCallbackHandler
 7 | 
 8 | 
 9 | class ImageRetentionCallback(AsyncCallbackHandler):
10 |     """
11 |     Callback handler that applies image retention policy to limit the number
12 |     of recent images in message history to prevent context window overflow.
13 |     """
14 |     
15 |     def __init__(self, only_n_most_recent_images: Optional[int] = None):
16 |         """
17 |         Initialize the image retention callback.
18 |         
19 |         Args:
20 |             only_n_most_recent_images: If set, only keep the N most recent images in message history
21 |         """
22 |         self.only_n_most_recent_images = only_n_most_recent_images
23 |     
24 |     async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
25 |         """
26 |         Apply image retention policy to messages before sending to agent loop.
27 |         
28 |         Args:
29 |             messages: List of message dictionaries
30 |             
31 |         Returns:
32 |             List of messages with image retention policy applied
33 |         """
34 |         if self.only_n_most_recent_images is None:
35 |             return messages
36 |         
37 |         return self._apply_image_retention(messages)
38 |     
39 |     def _apply_image_retention(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
40 |         """Apply image retention policy to keep only the N most recent images.
41 |         
42 |         Removes computer_call_output items with image_url and their corresponding computer_call items,
43 |         keeping only the most recent N image pairs based on only_n_most_recent_images setting.
44 |         
45 |         Args:
46 |             messages: List of message dictionaries
47 |             
48 |         Returns:
49 |             Filtered list of messages with image retention applied
50 |         """
51 |         if self.only_n_most_recent_images is None:
52 |             return messages
53 | 
54 |         # Gather indices of all computer_call_output messages that contain an image_url
55 |         output_indices: List[int] = []
56 |         for idx, msg in enumerate(messages):
57 |             if msg.get("type") == "computer_call_output":
58 |                 out = msg.get("output")
59 |                 if isinstance(out, dict) and ("image_url" in out):
60 |                     output_indices.append(idx)
61 | 
62 |         # Nothing to trim
63 |         if len(output_indices) <= self.only_n_most_recent_images:
64 |             return messages
65 | 
66 |         # Determine which outputs to keep (most recent N)
67 |         keep_output_indices = set(output_indices[-self.only_n_most_recent_images :])
68 | 
69 |         # Build set of indices to remove in one pass
70 |         to_remove: set[int] = set()
71 | 
72 |         for idx in output_indices:
73 |             if idx in keep_output_indices:
74 |                 continue  # keep this screenshot and its context
75 | 
76 |             to_remove.add(idx)  # remove the computer_call_output itself
77 | 
78 |             # Remove the immediately preceding computer_call with matching call_id (if present)
79 |             call_id = messages[idx].get("call_id")
80 |             prev_idx = idx - 1
81 |             if prev_idx >= 0 and messages[prev_idx].get("type") == "computer_call" and messages[prev_idx].get("call_id") == call_id:
82 |                 to_remove.add(prev_idx)
83 |                 # Check a single reasoning immediately before that computer_call
84 |                 r_idx = prev_idx - 1
85 |                 if r_idx >= 0 and messages[r_idx].get("type") == "reasoning":
86 |                     to_remove.add(r_idx)
87 | 
88 |         # Construct filtered list
89 |         filtered = [m for i, m in enumerate(messages) if i not in to_remove]
90 |         return filtered
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/interface/models.py:
--------------------------------------------------------------------------------

```python
  1 | from enum import Enum
  2 | from typing import Dict, List, Any, TypedDict, Union, Literal
  3 | from dataclasses import dataclass
  4 | 
  5 | @dataclass
  6 | class CommandResult:
  7 |     stdout: str
  8 |     stderr: str  
  9 |     returncode: int
 10 |     
 11 |     def __init__(self, stdout: str, stderr: str, returncode: int):
 12 |         self.stdout = stdout
 13 |         self.stderr = stderr
 14 |         self.returncode = returncode
 15 | 
 16 | # Navigation key literals
 17 | NavigationKey = Literal['pagedown', 'pageup', 'home', 'end', 'left', 'right', 'up', 'down']
 18 | 
 19 | # Special key literals
 20 | SpecialKey = Literal['enter', 'esc', 'tab', 'space', 'backspace', 'del']
 21 | 
 22 | # Modifier key literals
 23 | ModifierKey = Literal['ctrl', 'alt', 'shift', 'win', 'command', 'option']
 24 | 
 25 | # Function key literals
 26 | FunctionKey = Literal['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12']
 27 | 
 28 | class Key(Enum):
 29 |     """Keyboard keys that can be used with press_key.
 30 |     
 31 |     These key names map to PyAutoGUI's expected key names.
 32 |     """
 33 |     # Navigation
 34 |     PAGE_DOWN = 'pagedown'
 35 |     PAGE_UP = 'pageup'
 36 |     HOME = 'home'
 37 |     END = 'end'
 38 |     LEFT = 'left'
 39 |     RIGHT = 'right'
 40 |     UP = 'up'
 41 |     DOWN = 'down'
 42 |     
 43 |     # Special keys
 44 |     RETURN = 'enter'
 45 |     ENTER = 'enter'
 46 |     ESCAPE = 'esc'
 47 |     ESC = 'esc'
 48 |     TAB = 'tab'
 49 |     SPACE = 'space'
 50 |     BACKSPACE = 'backspace'
 51 |     DELETE = 'del'
 52 |     
 53 |     # Modifier keys
 54 |     ALT = 'alt'
 55 |     CTRL = 'ctrl'
 56 |     SHIFT = 'shift'
 57 |     WIN = 'win'
 58 |     COMMAND = 'command'
 59 |     OPTION = 'option'
 60 |     
 61 |     # Function keys
 62 |     F1 = 'f1'
 63 |     F2 = 'f2'
 64 |     F3 = 'f3'
 65 |     F4 = 'f4'
 66 |     F5 = 'f5'
 67 |     F6 = 'f6'
 68 |     F7 = 'f7'
 69 |     F8 = 'f8'
 70 |     F9 = 'f9'
 71 |     F10 = 'f10'
 72 |     F11 = 'f11'
 73 |     F12 = 'f12'
 74 | 
 75 |     @classmethod
 76 |     def from_string(cls, key: str) -> 'Key | str':
 77 |         """Convert a string key name to a Key enum value.
 78 |         
 79 |         Args:
 80 |             key: String key name to convert
 81 |             
 82 |         Returns:
 83 |             Key enum value if the string matches a known key,
 84 |             otherwise returns the original string for single character keys
 85 |         """
 86 |         # Map common alternative names to enum values
 87 |         key_mapping = {
 88 |             'page_down': cls.PAGE_DOWN,
 89 |             'page down': cls.PAGE_DOWN,
 90 |             'pagedown': cls.PAGE_DOWN,
 91 |             'page_up': cls.PAGE_UP,
 92 |             'page up': cls.PAGE_UP,
 93 |             'pageup': cls.PAGE_UP,
 94 |             'return': cls.RETURN,
 95 |             'enter': cls.ENTER,
 96 |             'escape': cls.ESCAPE,
 97 |             'esc': cls.ESC,
 98 |             'delete': cls.DELETE,
 99 |             'del': cls.DELETE,
100 |             # Modifier key mappings
101 |             'alt': cls.ALT,
102 |             'ctrl': cls.CTRL,
103 |             'control': cls.CTRL,
104 |             'shift': cls.SHIFT,
105 |             'win': cls.WIN,
106 |             'windows': cls.WIN,
107 |             'super': cls.WIN,
108 |             'command': cls.COMMAND,
109 |             'cmd': cls.COMMAND,
110 |             '⌘': cls.COMMAND,
111 |             'option': cls.OPTION,
112 |             '⌥': cls.OPTION,
113 |         }
114 |         
115 |         normalized = key.lower().strip()
116 |         return key_mapping.get(normalized, key)
117 | 
118 | # Combined key type
119 | KeyType = Union[Key, NavigationKey, SpecialKey, ModifierKey, FunctionKey, str]
120 | 
121 | # Key type for mouse actions
122 | MouseButton = Literal['left', 'right', 'middle']
123 | 
124 | class AccessibilityWindow(TypedDict):
125 |     """Information about a window in the accessibility tree."""
126 |     app_name: str
127 |     pid: int
128 |     frontmost: bool
129 |     has_windows: bool
130 |     windows: List[Dict[str, Any]]
131 | 
132 | class AccessibilityTree(TypedDict):
133 |     """Complete accessibility tree information."""
134 |     success: bool
135 |     frontmost_application: str
136 |     windows: List[AccessibilityWindow] 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/migration-guide.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Migration Guide
  3 | ---
  4 | 
  5 | This guide lists **breaking changes** when migrating from the original `ComputerAgent` (v0.3.x) to the rewritten `ComputerAgent` (v0.4.x) and shows old vs new usage for all four agent loops.
  6 | 
  7 | ## Breaking Changes
  8 | 
  9 | - **Initialization:**
 10 |   - `ComputerAgent` (v0.4.x) uses `model` as a string (e.g. "anthropic/claude-3-5-sonnet-20241022") instead of `LLM` and `AgentLoop` objects.
 11 |   - `tools` is a list (can include multiple computers and decorated functions).
 12 |   - `callbacks` are now first-class for extensibility (image retention, budget, trajectory, logging, etc).
 13 | - **No explicit `loop` parameter:**
 14 |   - Loop is inferred from the `model` string (e.g. `anthropic/`, `openai/`, `omniparser+`, `ui-tars`).
 15 | - **No explicit `computer` parameter:**
 16 |   - Computers are added to `tools` list.
 17 | 
 18 | ---
 19 | 
 20 | ## Usage Examples: Old vs New
 21 | 
 22 | ### 1. Anthropic Loop
 23 | **Old:**
 24 | ```python
 25 | async with Computer() as computer:
 26 |     agent = ComputerAgent(
 27 |         computer=computer,
 28 |         loop=AgentLoop.ANTHROPIC,
 29 |         model=LLM(provider=LLMProvider.ANTHROPIC)
 30 |     )
 31 |     async for result in agent.run("Take a screenshot"):
 32 |         print(result)
 33 | ```
 34 | **New:**
 35 | ```python
 36 | async with Computer() as computer:
 37 |     agent = ComputerAgent(
 38 |         model="anthropic/claude-3-5-sonnet-20241022",
 39 |         tools=[computer]
 40 |     )
 41 |     messages = [{"role": "user", "content": "Take a screenshot"}]
 42 |     async for result in agent.run(messages):
 43 |         for item in result["output"]:
 44 |             if item["type"] == "message":
 45 |                 print(item["content"][0]["text"])
 46 | ```
 47 | 
 48 | ### 2. OpenAI Loop
 49 | **Old:**
 50 | ```python
 51 | async with Computer() as computer:
 52 |     agent = ComputerAgent(
 53 |         computer=computer,
 54 |         loop=AgentLoop.OPENAI,
 55 |         model=LLM(provider=LLMProvider.OPENAI)
 56 |     )
 57 |     async for result in agent.run("Take a screenshot"):
 58 |         print(result)
 59 | ```
 60 | **New:**
 61 | ```python
 62 | async with Computer() as computer:
 63 |     agent = ComputerAgent(
 64 |         model="openai/computer-use-preview",
 65 |         tools=[computer]
 66 |     )
 67 |     messages = [{"role": "user", "content": "Take a screenshot"}]
 68 |     async for result in agent.run(messages):
 69 |         for item in result["output"]:
 70 |             if item["type"] == "message":
 71 |                 print(item["content"][0]["text"])
 72 | ```
 73 | 
 74 | ### 3. UI-TARS Loop
 75 | **Old:**
 76 | ```python
 77 | async with Computer() as computer:
 78 |     agent = ComputerAgent(
 79 |         computer=computer,
 80 |         loop=AgentLoop.UITARS,
 81 |         model=LLM(provider=LLMProvider.OAICOMPAT, name="ByteDance-Seed/UI-TARS-1.5-7B", provider_base_url="https://.../v1")
 82 |     )
 83 |     async for result in agent.run("Take a screenshot"):
 84 |         print(result)
 85 | ```
 86 | **New:**
 87 | ```python
 88 | async with Computer() as computer:
 89 |     agent = ComputerAgent(
 90 |         model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B",
 91 |         tools=[computer]
 92 |     )
 93 |     messages = [{"role": "user", "content": "Take a screenshot"}]
 94 |     async for result in agent.run(messages):
 95 |         for item in result["output"]:
 96 |             if item["type"] == "message":
 97 |                 print(item["content"][0]["text"])
 98 | ```
 99 | 
100 | ### 4. Omni Loop
101 | **Old:**
102 | ```python
103 | async with Computer() as computer:
104 |     agent = ComputerAgent(
105 |         computer=computer,
106 |         loop=AgentLoop.OMNI,
107 |         model=LLM(provider=LLMProvider.OLLAMA, name="gemma3")
108 |     )
109 |     async for result in agent.run("Take a screenshot"):
110 |         print(result)
111 | ```
112 | **New:**
113 | ```python
114 | async with Computer() as computer:
115 |     agent = ComputerAgent(
116 |         model="omniparser+ollama_chat/gemma3",
117 |         tools=[computer]
118 |     )
119 |     messages = [{"role": "user", "content": "Take a screenshot"}]
120 |     async for result in agent.run(messages):
121 |         for item in result["output"]:
122 |             if item["type"] == "message":
123 |                 print(item["content"][0]["text"])
124 | ```
125 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/libraries/lume/faq.md:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: FAQ
  3 | ---
  4 | 
  5 | ### Where are the VMs stored?
  6 | 
  7 | VMs are stored in `~/.lume` by default. You can configure additional storage locations using the `lume config` command.
  8 | 
  9 | ### How are images cached?
 10 | 
 11 | Images are cached in `~/.lume/cache`. When doing `lume pull <image>`, it will check if the image is already cached. If not, it will download the image and cache it, removing any older versions.
 12 | 
 13 | ### Where is the configuration file stored?
 14 | 
 15 | Lume follows the XDG Base Directory specification for the configuration file:
 16 | 
 17 | - Configuration is stored in `$XDG_CONFIG_HOME/lume/config.yaml` (defaults to `~/.config/lume/config.yaml`)
 18 | 
 19 | By default, other data is stored in:
 20 | - VM data: `~/.lume`
 21 | - Cache files: `~/.lume/cache`
 22 | 
 23 | The config file contains settings for:
 24 | - VM storage locations and the default location
 25 | - Cache directory location
 26 | - Whether caching is enabled
 27 | 
 28 | You can view and modify these settings using the `lume config` commands:
 29 | 
 30 | ```bash
 31 | # View current configuration
 32 | lume config get
 33 | 
 34 | # Manage VM storage locations
 35 | lume config storage list                 # List all VM storage locations
 36 | lume config storage add <name> <path>    # Add a new VM storage location
 37 | lume config storage remove <name>        # Remove a VM storage location
 38 | lume config storage default <name>       # Set the default VM storage location
 39 | 
 40 | # Manage cache settings
 41 | lume config cache get                    # Get current cache directory
 42 | lume config cache set <path>             # Set cache directory
 43 | 
 44 | # Manage image caching settings
 45 | lume config caching get                  # Show current caching status
 46 | lume config caching set <boolean>        # Enable or disable image caching
 47 | ```
 48 | 
 49 | ### How do I use multiple VM storage locations?
 50 | 
 51 | Lume supports storing VMs in different locations (e.g., internal drive, external SSD). After configuring storage locations, you can specify which location to use with the `--storage` parameter in various commands:
 52 | 
 53 | ```bash
 54 | # Create a VM in a specific storage location
 55 | lume create my-vm --os macos --ipsw latest --storage ssd
 56 | 
 57 | # Run a VM from a specific storage location
 58 | lume run my-vm --storage ssd
 59 | 
 60 | # Delete a VM from a specific storage location
 61 | lume delete my-vm --storage ssd
 62 | 
 63 | # Pull an image to a specific storage location
 64 | lume pull macos-sequoia-vanilla:latest --name my-vm --storage ssd
 65 | 
 66 | # Clone a VM between storage locations
 67 | lume clone source-vm cloned-vm --source-storage default --dest-storage ssd
 68 | ```
 69 | 
 70 | If you don't specify a storage location, Lume will use the default one or search across all configured locations.
 71 | 
 72 | ### Are VM disks taking up all the disk space?
 73 | 
 74 | No, macOS uses sparse files, which only allocate space as needed. For example, VM disks totaling 50 GB may only use 20 GB on disk.
 75 | 
 76 | ### How do I get the latest macOS restore image URL?
 77 | 
 78 | ```bash
 79 | lume ipsw
 80 | ```
 81 | 
 82 | ### How do I delete a VM?
 83 | 
 84 | ```bash
 85 | lume delete <name>
 86 | ```
 87 | 
 88 | ### How to Install macOS from an IPSW Image
 89 | 
 90 | #### Create a new macOS VM using the latest supported IPSW image:
 91 | Run the following command to create a new macOS virtual machine using the latest available IPSW image:
 92 | 
 93 | ```bash
 94 | lume create <name> --os macos --ipsw latest
 95 | ```
 96 | 
 97 | #### Create a new macOS VM using a specific IPSW image:
 98 | To create a macOS virtual machine from an older or specific IPSW file, first download the desired IPSW (UniversalMac) from a trusted source.
 99 | 
100 | Then, use the downloaded IPSW path:
101 | 
102 | ```bash
103 | lume create <name> --os macos --ipsw <downloaded_ipsw_path>
104 | ```
105 | 
106 | ### How do I install a custom Linux image?
107 | 
108 | The process for creating a custom Linux image differs than macOS, with IPSW restore files not being used. You need to create a linux VM first, then mount a setup image file to the VM for the first boot.
109 | 
110 | ```bash
111 | lume create <name> --os linux
112 | 
113 | lume run <name> --mount <path-to-setup-image>
114 | 
115 | lume run <name>
116 | ```
117 | 
```

--------------------------------------------------------------------------------
/scripts/run-docker-dev.sh:
--------------------------------------------------------------------------------

```bash
  1 | #!/bin/bash
  2 | 
  3 | # Colors for output
  4 | GREEN='\033[0;32m'
  5 | BLUE='\033[0;34m'
  6 | RED='\033[0;31m'
  7 | NC='\033[0m' # No Color
  8 | 
  9 | # Print with color
 10 | print_info() {
 11 |     echo -e "${BLUE}==> $1${NC}"
 12 | }
 13 | 
 14 | print_success() {
 15 |     echo -e "${GREEN}==> $1${NC}"
 16 | }
 17 | 
 18 | print_error() {
 19 |     echo -e "${RED}==> $1${NC}"
 20 | }
 21 | 
 22 | # Docker image name
 23 | IMAGE_NAME="cua-dev-image"
 24 | CONTAINER_NAME="cua-dev-container"
 25 | PLATFORM="linux/arm64"
 26 | 
 27 | # Detect platform based on architecture
 28 | arch=$(uname -m)
 29 | 
 30 | if [[ $arch == x86_64* ]]; then
 31 |     PLATFORM="linux/amd64"
 32 |     print_info "X64 Architecture detected, using platform: ${PLATFORM}"
 33 | elif [[ $arch == i*86 ]]; then
 34 |     PLATFORM="linux/386"
 35 |     print_info "X32 Architecture detected, using platform: ${PLATFORM}"
 36 | elif [[ $arch == arm* ]] || [[ $arch == aarch64 ]]; then
 37 |     PLATFORM="linux/arm64"
 38 |     print_info "ARM Architecture detected, using platform: ${PLATFORM}"
 39 | else
 40 |     # Fallback to amd64 for unknown architectures
 41 |     PLATFORM="linux/amd64"
 42 |     print_info "Unknown architecture ($arch), defaulting to platform: ${PLATFORM}"
 43 | fi
 44 | 
 45 | # Environment variables
 46 | PYTHONPATH="/app/libs/python/core:/app/libs/python/computer:/app/libs/python/agent:/app/libs/python/som:/app/libs/python/pylume:/app/libs/python/computer-server:/app/libs/python/mcp-server"
 47 | 
 48 | # Check if Docker is installed
 49 | if ! command -v docker &> /dev/null; then
 50 |     print_error "Docker is not installed. Please install Docker first."
 51 |     exit 1
 52 | fi
 53 | 
 54 | # Command options
 55 | case "$1" in
 56 |     build)
 57 |         print_info "Building the development Docker image..."
 58 |         print_info "This will install all dependencies but won't include source code"
 59 |         docker build -f Dockerfile --platform=${PLATFORM} -t ${IMAGE_NAME} .
 60 |         print_success "Development Docker image built successfully!"
 61 |         ;;
 62 |     
 63 |     run)
 64 |         # Check for interactive flag
 65 |         if [ "$2" == "--interactive" ]; then
 66 |             print_info "Running the development Docker container with interactive shell..."
 67 |             print_info "Mounting source code from host"
 68 |             print_info "Connecting to host.docker.internal:7777"
 69 |             
 70 |             docker run -it --rm \
 71 |                 --platform=${PLATFORM} \
 72 |                 --name ${CONTAINER_NAME} \
 73 |                 -v "$(pwd):/app" \
 74 |                 -e PYTHONPATH=${PYTHONPATH} \
 75 |                 -e DISPLAY=${DISPLAY:-:0} \
 76 |                 -e PYLUME_HOST="host.docker.internal" \
 77 |                 -p 7860:7860 \
 78 |                 ${IMAGE_NAME} bash
 79 |         else
 80 |             # Run the specified example
 81 |             if [ -z "$2" ]; then
 82 |                 print_error "Please specify an example file, e.g., ./run-docker-dev.sh run computer_examples.py"
 83 |                 exit 1
 84 |             fi
 85 |             print_info "Running example: $2"
 86 |             print_info "Connecting to host.docker.internal:7777"
 87 |             
 88 |             docker run -it --rm \
 89 |                 --platform=${PLATFORM} \
 90 |                 --name ${CONTAINER_NAME} \
 91 |                 -v "$(pwd):/app" \
 92 |                 -e PYTHONPATH=${PYTHONPATH} \
 93 |                 -e DISPLAY=${DISPLAY:-:0} \
 94 |                 -e PYLUME_HOST="host.docker.internal" \
 95 |                 -p 7860:7860 \
 96 |                 ${IMAGE_NAME} python "/app/examples/$2"
 97 |         fi
 98 |         ;;
 99 |     
100 |     stop)
101 |         print_info "Stopping any running containers..."
102 |         docker stop ${CONTAINER_NAME} 2>/dev/null || true
103 |         print_success "Done!"
104 |         ;;
105 |         
106 |     *)
107 |         echo "Usage: $0 {build|run [--interactive] [filename]|stop}"
108 |         echo ""
109 |         echo "Commands:"
110 |         echo "  build                      Build the development Docker image with dependencies"
111 |         echo "  run [example_filename]     Run the specified example file in the container"
112 |         echo "  run --interactive          Run the container with mounted code and get an interactive shell"
113 |         echo "  stop                       Stop the container"
114 |         exit 1
115 | esac
116 | 
117 | exit 0 
```

--------------------------------------------------------------------------------
/libs/lume/src/Commands/Run.swift:
--------------------------------------------------------------------------------

```swift
  1 | import ArgumentParser
  2 | import Foundation
  3 | import Virtualization
  4 | 
  5 | struct Run: AsyncParsableCommand {
  6 |     static let configuration = CommandConfiguration(
  7 |         abstract: "Run a virtual machine"
  8 |     )
  9 | 
 10 |     @Argument(
 11 |         help: "Name of the virtual machine or image to pull and run (format: name or name:tag)",
 12 |         completion: .custom(completeVMName))
 13 |     var name: String
 14 | 
 15 |     @Flag(name: [.short, .long], help: "Do not start the VNC client")
 16 |     var noDisplay: Bool = false
 17 | 
 18 |     @Option(
 19 |         name: [.customLong("shared-dir")],
 20 |         help:
 21 |             "Directory to share with the VM. Can be just a path for read-write access (e.g. ~/src) or path:tag where tag is 'ro' for read-only or 'rw' for read-write (e.g. ~/src:ro)"
 22 |     )
 23 |     var sharedDirectories: [String] = []
 24 | 
 25 |     @Option(
 26 |         help:
 27 |             "For Linux VMs only, a read-only disk image to attach to the VM (e.g. --mount=\"ubuntu.iso\")",
 28 |         completion: .file())
 29 |     var mount: String?
 30 | 
 31 |     @Option(
 32 |         name: [.customLong("usb-storage")],
 33 |         help: "Disk image to attach as a USB mass storage device (e.g. --usb-storage=\"disk.img\")",
 34 |         completion: .file())
 35 |     var usbStorageDevices: [String] = []
 36 | 
 37 |     @Option(help: "Github Container Registry to pull the images from. Defaults to ghcr.io")
 38 |     var registry: String = "ghcr.io"
 39 | 
 40 |     @Option(help: "Organization to pull the images from. Defaults to trycua")
 41 |     var organization: String = "trycua"
 42 | 
 43 |     @Option(
 44 |         name: [.customLong("vnc-port")],
 45 |         help: "Port to use for the VNC server. Defaults to 0 (auto-assign)")
 46 |     var vncPort: Int = 0
 47 | 
 48 |     @Option(help: "For MacOS VMs only, boot into the VM in recovery mode")
 49 |     var recoveryMode: Bool = false
 50 | 
 51 |     @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
 52 |     var storage: String?
 53 | 
 54 |     private var parsedSharedDirectories: [SharedDirectory] {
 55 |         get throws {
 56 |             try sharedDirectories.map { dirString -> SharedDirectory in
 57 |                 let components = dirString.split(separator: ":", maxSplits: 1)
 58 |                 let hostPath = String(components[0])
 59 | 
 60 |                 // If no tag is provided, default to read-write
 61 |                 if components.count == 1 {
 62 |                     return SharedDirectory(
 63 |                         hostPath: hostPath,
 64 |                         tag: VZVirtioFileSystemDeviceConfiguration.macOSGuestAutomountTag,
 65 |                         readOnly: false
 66 |                     )
 67 |                 }
 68 | 
 69 |                 // Parse the tag if provided
 70 |                 let tag = String(components[1])
 71 |                 let readOnly: Bool
 72 |                 switch tag.lowercased() {
 73 |                 case "ro":
 74 |                     readOnly = true
 75 |                 case "rw":
 76 |                     readOnly = false
 77 |                 default:
 78 |                     throw ValidationError(
 79 |                         "Invalid tag value. Must be either 'ro' for read-only or 'rw' for read-write"
 80 |                     )
 81 |                 }
 82 | 
 83 |                 return SharedDirectory(
 84 |                     hostPath: hostPath,
 85 |                     tag: VZVirtioFileSystemDeviceConfiguration.macOSGuestAutomountTag,
 86 |                     readOnly: readOnly
 87 |                 )
 88 |             }
 89 |         }
 90 |     }
 91 | 
 92 |     private var parsedUSBStorageDevices: [Path] {
 93 |         usbStorageDevices.map { Path($0) }
 94 |     }
 95 | 
 96 |     init() {
 97 |     }
 98 | 
 99 |     @MainActor
100 |     func run() async throws {
101 |         try await LumeController().runVM(
102 |             name: name,
103 |             noDisplay: noDisplay,
104 |             sharedDirectories: parsedSharedDirectories,
105 |             mount: mount.map { Path($0) },
106 |             registry: registry,
107 |             organization: organization,
108 |             vncPort: vncPort,
109 |             recoveryMode: recoveryMode,
110 |             storage: storage,
111 |             usbMassStoragePaths: parsedUSBStorageDevices.isEmpty ? nil : parsedUSBStorageDevices
112 |         )
113 |     }
114 | }
115 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/adapters/models/opencua.py:
--------------------------------------------------------------------------------

```python
  1 | from typing import List, Dict, Any
  2 | import re
  3 | import base64
  4 | from io import BytesIO
  5 | 
  6 | try:
  7 |     import torch  # type: ignore
  8 |     from transformers import AutoTokenizer, AutoModel, AutoImageProcessor  # type: ignore
  9 |     from PIL import Image  # type: ignore
 10 |     import blobfile as _ # assert blobfile is installed
 11 |     OPENCUA_AVAILABLE = True
 12 | except Exception:
 13 |     OPENCUA_AVAILABLE = False
 14 | 
 15 | 
 16 | class OpenCUAModel:
 17 |     """OpenCUA model handler using AutoTokenizer, AutoModel and AutoImageProcessor."""
 18 | 
 19 |     def __init__(self, model_name: str, device: str = "auto", trust_remote_code: bool = False) -> None:
 20 |         if not OPENCUA_AVAILABLE:
 21 |             raise ImportError(
 22 |                 "OpenCUA requirements not found. Install with: pip install \"cua-agent[opencua-hf]\""
 23 |             )
 24 |         self.model_name = model_name
 25 |         self.device = device
 26 |         self.model = None
 27 |         self.tokenizer = None
 28 |         self.image_processor = None
 29 |         self.trust_remote_code = trust_remote_code
 30 |         self._load()
 31 | 
 32 |     def _load(self) -> None:
 33 |         self.tokenizer = AutoTokenizer.from_pretrained(
 34 |             self.model_name, trust_remote_code=self.trust_remote_code
 35 |         )
 36 |         self.model = AutoModel.from_pretrained(
 37 |             self.model_name,
 38 |             torch_dtype="auto",
 39 |             device_map=self.device,
 40 |             trust_remote_code=self.trust_remote_code,
 41 |             attn_implementation="sdpa",
 42 |         )
 43 |         self.image_processor = AutoImageProcessor.from_pretrained(
 44 |             self.model_name, trust_remote_code=self.trust_remote_code
 45 |         )
 46 | 
 47 |     @staticmethod
 48 |     def _extract_last_image_b64(messages: List[Dict[str, Any]]) -> str:
 49 |         # Expect HF-format messages with content items type: "image" with data URL
 50 |         for msg in reversed(messages):
 51 |             for item in reversed(msg.get("content", [])):
 52 |                 if isinstance(item, dict) and item.get("type") == "image":
 53 |                     url = item.get("image", "")
 54 |                     if isinstance(url, str) and url.startswith("data:image/"):
 55 |                         return url.split(",", 1)[1]
 56 |         return ""
 57 | 
 58 |     def generate(self, messages: List[Dict[str, Any]], max_new_tokens: int = 512) -> str:
 59 |         assert self.model is not None and self.tokenizer is not None and self.image_processor is not None
 60 | 
 61 |         # Tokenize text side using chat template
 62 |         input_ids = self.tokenizer.apply_chat_template(
 63 |             messages, tokenize=True, add_generation_prompt=True
 64 |         )
 65 |         input_ids = torch.tensor([input_ids]).to(self.model.device)
 66 | 
 67 |         # Prepare image inputs from last data URL image
 68 |         image_b64 = self._extract_last_image_b64(messages)
 69 |         pixel_values = None
 70 |         grid_thws = None
 71 |         if image_b64:
 72 |             image = Image.open(BytesIO(base64.b64decode(image_b64))).convert("RGB")
 73 |             image_info = self.image_processor.preprocess(images=[image])
 74 |             pixel_values = torch.tensor(image_info["pixel_values"]).to(
 75 |                 dtype=torch.bfloat16, device=self.model.device
 76 |             )
 77 |             grid_thws = torch.tensor(image_info["image_grid_thw"]) if "image_grid_thw" in image_info else None
 78 | 
 79 |         gen_kwargs: Dict[str, Any] = {
 80 |             "max_new_tokens": max_new_tokens,
 81 |             "temperature": 0,
 82 |         }
 83 |         if pixel_values is not None:
 84 |             gen_kwargs["pixel_values"] = pixel_values
 85 |         if grid_thws is not None:
 86 |             gen_kwargs["grid_thws"] = grid_thws
 87 | 
 88 |         with torch.no_grad():
 89 |             generated_ids = self.model.generate(
 90 |                 input_ids,
 91 |                 **gen_kwargs,
 92 |             )
 93 | 
 94 |         # Remove prompt tokens
 95 |         prompt_len = input_ids.shape[1]
 96 |         generated_ids = generated_ids[:, prompt_len:]
 97 |         output_text = self.tokenizer.batch_decode(
 98 |             generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
 99 |         )[0]
100 |         return output_text
101 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/custom-computer-handlers.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Custom Computers
  3 | slug: custom-computer-handlers
  4 | ---
  5 | 
  6 | The Agent SDK supports defining custom computer handlers using a simple dictionary interface. This enables integration with custom automation backends, testing frameworks, or specialized computer control systems.
  7 | 
  8 | ## Example: Defining a Custom Computer Handler
  9 | 
 10 | ```python
 11 | import asyncio
 12 | from PIL import Image
 13 | 
 14 | # Define your custom computer functions
 15 | async def take_screenshot():
 16 |     """Your custom screenshot implementation"""
 17 |     # Return PIL Image, bytes, or base64 string
 18 |     return Image.new('RGB', (1920, 1080), color='white')
 19 | 
 20 | # Create dict-based computer handler - only 'screenshot' is required
 21 | custom_computer = {
 22 |     'screenshot': take_screenshot, # required
 23 | 
 24 |     # everything below is optional
 25 |     'environment': 'linux', # linux, mac, windows, browser
 26 |     'dimensions': (1920, 1080), # (width, height)
 27 |     'click': lambda x, y, button: print(f"Clicking at ({x}, {y}) with {button} button"),
 28 | }
 29 | ```
 30 | 
 31 | You can then use this as a tool for your agent:
 32 | 
 33 | ```python
 34 | from agent import ComputerAgent
 35 | 
 36 | agent = ComputerAgent(
 37 |     model="anthropic/claude-3-5-sonnet-20241022",
 38 |     tools=[custom_computer],
 39 | )
 40 | 
 41 | # Agent will automatically convert dict to agent.computers.CustomComputerHandler
 42 | await agent.run("Take a screenshot and click at coordinates 100, 200")
 43 | ```
 44 | 
 45 | ## Class-Based Implementation
 46 | 
 47 | For more complex implementations, you can create a custom class by inheriting from `AsyncComputerHandler`:
 48 | 
 49 | ```python
 50 | from agent.computers import AsyncComputerHandler
 51 | from PIL import Image
 52 | from typing import Literal, List, Dict, Union, Optional
 53 | 
 54 | class MyCustomComputer(AsyncComputerHandler):
 55 |     """Custom computer handler implementation."""
 56 |     
 57 |     def __init__(self):
 58 |         # Initialize your custom computer interface here
 59 |         pass
 60 |     
 61 |     # ==== Computer-Use-Preview Action Space ==== 
 62 | 
 63 |     async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
 64 |         """Get the current environment type."""
 65 |         ...
 66 |     
 67 |     async def get_dimensions(self) -> tuple[int, int]:
 68 |         """Get screen dimensions as (width, height)."""
 69 |         ...
 70 |     
 71 |     async def screenshot(self) -> str:
 72 |         """Take a screenshot and return as base64 string."""
 73 |         ...
 74 |     
 75 |     async def click(self, x: int, y: int, button: str = "left") -> None:
 76 |         """Click at coordinates with specified button."""
 77 |         ...
 78 |     
 79 |     async def double_click(self, x: int, y: int) -> None:
 80 |         """Double click at coordinates."""
 81 |         ...
 82 |     
 83 |     async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
 84 |         """Scroll at coordinates with specified scroll amounts."""
 85 |         ...
 86 |     
 87 |     async def type(self, text: str) -> None:
 88 |         """Type text."""
 89 |         ...
 90 |     
 91 |     async def wait(self, ms: int = 1000) -> None:
 92 |         """Wait for specified milliseconds."""
 93 |         ...
 94 |     
 95 |     async def move(self, x: int, y: int) -> None:
 96 |         """Move cursor to coordinates."""
 97 |         ...
 98 |     
 99 |     async def keypress(self, keys: Union[List[str], str]) -> None:
100 |         """Press key combination."""
101 |         ...
102 |     
103 |     async def drag(self, path: List[Dict[str, int]]) -> None:
104 |         """Drag along specified path."""
105 |         ...
106 |     
107 |     async def get_current_url(self) -> str:
108 |         """Get current URL (for browser environments)."""
109 |         ...
110 |     
111 |     # ==== Anthropic Action Space ==== 
112 | 
113 |     async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
114 |         """Left mouse down at coordinates."""
115 |         ...
116 |     
117 |     async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
118 |         """Left mouse up at coordinates."""
119 |         ...
120 | 
121 | # Use with agent
122 | custom_computer = MyCustomComputer()
123 | 
124 | agent = ComputerAgent(
125 |     model="anthropic/claude-3-5-sonnet-20241022",
126 |     tools=[custom_computer],
127 | )
128 | 
129 | await agent.run("Take a screenshot and click at coordinates 100, 200")
130 | ```
```

--------------------------------------------------------------------------------
/libs/python/som/som/models.py:
--------------------------------------------------------------------------------

```python
  1 | from typing import List, Tuple, Optional, Literal, Dict, Any, Union
  2 | from pydantic import BaseModel, Field, validator
  3 | 
  4 | 
  5 | class BoundingBox(BaseModel):
  6 |     """Normalized bounding box coordinates."""
  7 | 
  8 |     x1: float = Field(..., description="Normalized left coordinate")
  9 |     y1: float = Field(..., description="Normalized top coordinate")
 10 |     x2: float = Field(..., description="Normalized right coordinate")
 11 |     y2: float = Field(..., description="Normalized bottom coordinate")
 12 | 
 13 |     @property
 14 |     def coordinates(self) -> List[float]:
 15 |         """Get coordinates as a list [x1, y1, x2, y2]."""
 16 |         return [self.x1, self.y1, self.x2, self.y2]
 17 | 
 18 | 
 19 | class UIElement(BaseModel):
 20 |     """Base class for UI elements."""
 21 | 
 22 |     id: Optional[int] = Field(None, description="Unique identifier for the element (1-indexed)")
 23 |     type: Literal["icon", "text"]
 24 |     bbox: BoundingBox
 25 |     interactivity: bool = Field(default=False, description="Whether the element is interactive")
 26 |     confidence: float = Field(default=1.0, description="Detection confidence score")
 27 | 
 28 | 
 29 | class IconElement(UIElement):
 30 |     """An interactive icon element."""
 31 | 
 32 |     type: Literal["icon"] = "icon"
 33 |     interactivity: bool = True
 34 |     scale: Optional[int] = Field(None, description="Detection scale used")
 35 | 
 36 | 
 37 | class TextElement(UIElement):
 38 |     """A text element."""
 39 | 
 40 |     type: Literal["text"] = "text"
 41 |     content: str = Field(..., description="The text content")
 42 |     interactivity: bool = False
 43 | 
 44 | 
 45 | class ImageData(BaseModel):
 46 |     """Image data with dimensions."""
 47 | 
 48 |     base64: str = Field(..., description="Base64 encoded image data")
 49 |     width: int = Field(..., description="Image width in pixels")
 50 |     height: int = Field(..., description="Image height in pixels")
 51 | 
 52 |     @validator("width", "height")
 53 |     def dimensions_must_be_positive(cls, v):
 54 |         if v <= 0:
 55 |             raise ValueError("Dimensions must be positive")
 56 |         return v
 57 | 
 58 | 
 59 | class ParserMetadata(BaseModel):
 60 |     """Metadata about the parsing process."""
 61 | 
 62 |     image_size: Tuple[int, int] = Field(
 63 |         ..., description="Original image dimensions (width, height)"
 64 |     )
 65 |     num_icons: int = Field(..., description="Number of icons detected")
 66 |     num_text: int = Field(..., description="Number of text elements detected")
 67 |     device: str = Field(..., description="Device used for detection (cpu/cuda/mps)")
 68 |     ocr_enabled: bool = Field(..., description="Whether OCR was enabled")
 69 |     latency: float = Field(..., description="Total processing time in seconds")
 70 | 
 71 |     @property
 72 |     def width(self) -> int:
 73 |         """Get image width from image_size."""
 74 |         return self.image_size[0]
 75 | 
 76 |     @property
 77 |     def height(self) -> int:
 78 |         """Get image height from image_size."""
 79 |         return self.image_size[1]
 80 | 
 81 | 
 82 | class ParseResult(BaseModel):
 83 |     """Result of parsing a UI screenshot."""
 84 | 
 85 |     elements: List[UIElement] = Field(..., description="Detected UI elements")
 86 |     annotated_image_base64: str = Field(..., description="Base64 encoded annotated image")
 87 |     metadata: ParserMetadata = Field(..., description="Processing metadata")
 88 |     screen_info: Optional[List[str]] = Field(
 89 |         None, description="Human-readable descriptions of elements"
 90 |     )
 91 |     parsed_content_list: Optional[List[Dict[str, Any]]] = Field(
 92 |         None, description="Parsed elements as dictionaries"
 93 |     )
 94 | 
 95 |     @property
 96 |     def image(self) -> ImageData:
 97 |         """Get image data as a convenience property."""
 98 |         return ImageData(
 99 |             base64=self.annotated_image_base64,
100 |             width=self.metadata.width,
101 |             height=self.metadata.height,
102 |         )
103 | 
104 |     @property
105 |     def width(self) -> int:
106 |         """Get image width from metadata."""
107 |         return self.metadata.width
108 | 
109 |     @property
110 |     def height(self) -> int:
111 |         """Get image height from metadata."""
112 |         return self.metadata.height
113 | 
114 |     def model_dump(self) -> Dict[str, Any]:
115 |         """Convert model to dict for compatibility with older code."""
116 |         result = super().model_dump()
117 |         # Add image data dict for backward compatibility
118 |         result["image"] = self.image.model_dump()
119 |         return result
120 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/customizing-computeragent.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Customizing Your ComputerAgent
  3 | ---
  4 | 
  5 | <Callout>A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/customizing_computeragent.ipynb" target="_blank">Jupyter Notebook</a> is available for this documentation.</Callout>
  6 | 
  7 | The `ComputerAgent` interface provides an easy proxy to any computer-using model configuration, and it is a powerful framework for extending and building your own agentic systems.
  8 | 
  9 | This guide shows four proven ways to increase capabilities and success rate:
 10 | 
 11 | - 1 — Simple: Prompt engineering
 12 | - 2 — Easy: Tools
 13 | - 3 — Intermediate: Callbacks
 14 | - 4 — Expert: Custom `@register_agent`
 15 | 
 16 | ## 1) Simple: Prompt engineering
 17 | 
 18 | Provide guiding instructions to shape behavior. `ComputerAgent` accepts an optional `instructions: str | None` which acts like a system-style preface. Internally, this uses a callback that pre-pends a user message before each LLM call.
 19 | 
 20 | ```python
 21 | from agent.agent import ComputerAgent
 22 | 
 23 | agent = ComputerAgent(
 24 |     model="openai/computer-use-preview",
 25 |     tools=[computer],
 26 |     instructions=(
 27 |         "You are a meticulous software operator. Prefer safe, deterministic actions. "
 28 |         "Always confirm via on-screen text before proceeding."
 29 |     ),
 30 | )
 31 | ```
 32 | 
 33 | ## 2) Easy: Tools
 34 | 
 35 | Expose deterministic capabilities as tools (Python functions or custom computer handlers). The agent will call them when appropriate.
 36 | 
 37 | ```python
 38 | def calculate_percentage(numerator: float, denominator: float) -> str:
 39 |     """Calculate percentage as a string.
 40 | 
 41 |     Args:
 42 |         numerator: Numerator value
 43 |         denominator: Denominator value
 44 |     Returns:
 45 |         A formatted percentage string (e.g., '75.00%').
 46 |     """
 47 |     if denominator == 0:
 48 |         return "0.00%"
 49 |     return f"{(numerator/denominator)*100:.2f}%"
 50 | 
 51 | agent = ComputerAgent(
 52 |     model="openai/computer-use-preview",
 53 |     tools=[computer, calculate_percentage],
 54 | )
 55 | ```
 56 | 
 57 | - See `docs/agent-sdk/custom-tools` for authoring function tools.
 58 | - See `docs/agent-sdk/custom-computer-handlers` for building full computer interfaces.
 59 | 
 60 | ## 3) Intermediate: Callbacks
 61 | 
 62 | Callbacks provide lifecycle hooks to preprocess messages, postprocess outputs, record trajectories, manage costs, and more.
 63 | 
 64 | ```python
 65 | from agent.callbacks import ImageRetentionCallback, TrajectorySaverCallback, BudgetManagerCallback
 66 | 
 67 | agent = ComputerAgent(
 68 |     model="anthropic/claude-3-5-sonnet-20241022",
 69 |     tools=[computer],
 70 |     callbacks=[
 71 |         ImageRetentionCallback(only_n_most_recent_images=3),
 72 |         TrajectorySaverCallback("./trajectories"),
 73 |         BudgetManagerCallback(max_budget=10.0, raise_error=True),
 74 |     ],
 75 | )
 76 | ```
 77 | 
 78 | - Browse implementations in `libs/python/agent/agent/loops/`.
 79 | 
 80 | ## 4) Expert: Custom `@register_agent`
 81 | 
 82 | Build your own agent configuration class to control prompting, message shaping, and tool handling. This is the most flexible option for specialized domains.
 83 | 
 84 | - Register your own `model=...` loop using `@register_agent`
 85 | - Browse implementations in `libs/python/agent/agent/loops/`.
 86 | - Implement `predict_step()` (and optionally `predict_click()`) and return the standardized output schema.
 87 | 
 88 | ```python
 89 | from agent.decorators import register_agent
 90 | 
 91 | @register_agent(models=r".*my-special-model.*", priority=10)
 92 | class MyCustomAgentConfig:
 93 |     async def predict_step(self, messages, model, tools, **kwargs):
 94 |         # 1) Format messages for your provider
 95 |         # 2) Call provider
 96 |         # 3) Convert responses to the agent output schema
 97 |         return {"output": [], "usage": {}}
 98 | 
 99 |     async def predict_click(self, model, image_b64, instruction):
100 |         # Optional: click-only capability
101 |         return None
102 | 
103 |     def get_capabilities(self):
104 |         return ["step"]
105 | ```
106 | 
107 | ## HUD integration (optional)
108 | 
109 | When using the HUD evaluation integration (`agent/integrations/hud/`), you can pass `instructions`, `tools`, and `callbacks` directly
110 | 
111 | ```python
112 | from agent.integrations.hud import run_single_task
113 | 
114 | await run_single_task(
115 |     dataset="username/dataset-name",
116 |     model="openai/computer-use-preview",
117 |     instructions="Operate carefully. Always verify on-screen text before actions.",
118 |     # tools=[your_custom_function],
119 |     # callbacks=[YourCustomCallback()],
120 | )
121 | ```
```

--------------------------------------------------------------------------------
/libs/python/pylume/pylume/client.py:
--------------------------------------------------------------------------------

```python
  1 | import json
  2 | import asyncio
  3 | import subprocess
  4 | from typing import Optional, Any, Dict
  5 | import shlex
  6 | 
  7 | from .exceptions import (
  8 |     LumeError,
  9 |     LumeServerError,
 10 |     LumeConnectionError,
 11 |     LumeTimeoutError,
 12 |     LumeNotFoundError,
 13 |     LumeConfigError,
 14 | )
 15 | 
 16 | class LumeClient:
 17 |     def __init__(self, base_url: str, timeout: float = 60.0, debug: bool = False):
 18 |         self.base_url = base_url
 19 |         self.timeout = timeout
 20 |         self.debug = debug
 21 | 
 22 |     def _log_debug(self, message: str, **kwargs) -> None:
 23 |         """Log debug information if debug mode is enabled."""
 24 |         if self.debug:
 25 |             print(f"DEBUG: {message}")
 26 |             if kwargs:
 27 |                 print(json.dumps(kwargs, indent=2))
 28 | 
 29 |     async def _run_curl(self, method: str, path: str, data: Optional[Dict[str, Any]] = None, params: Optional[Dict[str, Any]] = None) -> Any:
 30 |         """Execute a curl command and return the response."""
 31 |         url = f"{self.base_url}{path}"
 32 |         if params:
 33 |             param_str = "&".join(f"{k}={v}" for k, v in params.items())
 34 |             url = f"{url}?{param_str}"
 35 | 
 36 |         cmd = ["curl", "-X", method, "-s", "-w", "%{http_code}", "-m", str(self.timeout)]
 37 |         
 38 |         if data is not None:
 39 |             cmd.extend(["-H", "Content-Type: application/json", "-d", json.dumps(data)])
 40 |         
 41 |         cmd.append(url)
 42 |         
 43 |         self._log_debug(f"Running curl command: {' '.join(map(shlex.quote, cmd))}")
 44 |         
 45 |         try:
 46 |             process = await asyncio.create_subprocess_exec(
 47 |                 *cmd,
 48 |                 stdout=subprocess.PIPE,
 49 |                 stderr=subprocess.PIPE
 50 |             )
 51 |             stdout, stderr = await process.communicate()
 52 |             
 53 |             if process.returncode != 0:
 54 |                 raise LumeConnectionError(f"Curl command failed: {stderr.decode()}")
 55 |             
 56 |             # The last 3 characters are the status code
 57 |             response = stdout.decode()
 58 |             status_code = int(response[-3:])
 59 |             response_body = response[:-3]  # Remove status code from response
 60 |             
 61 |             if status_code >= 400:
 62 |                 if status_code == 404:
 63 |                     raise LumeNotFoundError(f"Resource not found: {path}")
 64 |                 elif status_code == 400:
 65 |                     raise LumeConfigError(f"Invalid request: {response_body}")
 66 |                 elif status_code >= 500:
 67 |                     raise LumeServerError(f"Server error: {response_body}")
 68 |                 else:
 69 |                     raise LumeError(f"Request failed with status {status_code}: {response_body}")
 70 |             
 71 |             return json.loads(response_body) if response_body.strip() else None
 72 |             
 73 |         except asyncio.TimeoutError:
 74 |             raise LumeTimeoutError(f"Request timed out after {self.timeout} seconds")
 75 | 
 76 |     async def get(self, path: str, params: Optional[Dict[str, Any]] = None) -> Any:
 77 |         """Make a GET request."""
 78 |         return await self._run_curl("GET", path, params=params)
 79 | 
 80 |     async def post(self, path: str, data: Optional[Dict[str, Any]] = None, timeout: Optional[float] = None) -> Any:
 81 |         """Make a POST request."""
 82 |         old_timeout = self.timeout
 83 |         if timeout is not None:
 84 |             self.timeout = timeout
 85 |         try:
 86 |             return await self._run_curl("POST", path, data=data)
 87 |         finally:
 88 |             self.timeout = old_timeout
 89 | 
 90 |     async def patch(self, path: str, data: Dict[str, Any]) -> None:
 91 |         """Make a PATCH request."""
 92 |         await self._run_curl("PATCH", path, data=data)
 93 | 
 94 |     async def delete(self, path: str) -> None:
 95 |         """Make a DELETE request."""
 96 |         await self._run_curl("DELETE", path)
 97 | 
 98 |     def print_curl(self, method: str, path: str, data: Optional[Dict[str, Any]] = None) -> None:
 99 |         """Print equivalent curl command for debugging."""
100 |         curl_cmd = f"""curl -X {method} \\
101 |   '{self.base_url}{path}'"""
102 |         
103 |         if data:
104 |             curl_cmd += f" \\\n  -H 'Content-Type: application/json' \\\n  -d '{json.dumps(data)}'"
105 |         
106 |         print("\nEquivalent curl command:")
107 |         print(curl_cmd)
108 |         print()
109 | 
110 |     async def close(self) -> None:
111 |         """Close the client resources."""
112 |         pass  # No shared resources to clean up
```

--------------------------------------------------------------------------------
/docs/src/components/iou.tsx:
--------------------------------------------------------------------------------

```typescript
  1 | 'use client';
  2 | import React, { useRef, useEffect, useState, useCallback } from 'react';
  3 | 
  4 | /**
  5 |  * Represents a rectangle with position, dimensions, styling, and identification
  6 |  */
  7 | interface Rectangle {
  8 |   /** The x-coordinate of the rectangle's left edge */
  9 |   left: number;
 10 |   /** The y-coordinate of the rectangle's top edge */
 11 |   top: number;
 12 |   /** The width of the rectangle */
 13 |   width: number;
 14 |   /** The height of the rectangle */
 15 |   height: number;
 16 |   /** The fill color of the rectangle */
 17 |   fill: string;
 18 |   /** The display name of the rectangle */
 19 |   name: string;
 20 | }
 21 | 
 22 | /**
 23 |  * Props for the IOU component
 24 |  */
 25 | interface IOUProps {
 26 |   /** The title to display above the visualization */
 27 |   title: string;
 28 |   /** The description text to display below the IOU value */
 29 |   description: string;
 30 |   /** The first rectangle for IOU calculation */
 31 |   rect1: Rectangle;
 32 |   /** The second rectangle for IOU calculation */
 33 |   rect2: Rectangle;
 34 | }
 35 | 
 36 | /**
 37 |  * A React component that visualizes and calculates the Intersection over Union (IOU) 
 38 |  * of two rectangles on a canvas
 39 |  * @param props - The component props
 40 |  * @returns The rendered IOU visualization component
 41 |  */
 42 | export default function IOU({ title, description, rect1, rect2 }: IOUProps) {
 43 |   const canvasRef = useRef<HTMLCanvasElement>(null);
 44 |   const [actualIOU, setActualIOU] = useState<number>(0);
 45 | 
 46 |   /**
 47 |    * Converts a rectangle to a bounding box with left, right, top, and bottom coordinates
 48 |    * @param rect - The rectangle to convert
 49 |    * @returns An object containing the bounding box coordinates
 50 |    */
 51 |   const getBbox = (rect: Rectangle) => ({
 52 |     left: rect.left,
 53 |     right: rect.left + rect.width,
 54 |     top: rect.top,
 55 |     bottom: rect.top + rect.height,
 56 |   });
 57 | 
 58 |   /**
 59 |    * Calculates the intersection area between two bounding boxes
 60 |    * @param bbox1 - The first bounding box
 61 |    * @param bbox2 - The second bounding box
 62 |    * @returns The area of intersection between the two bounding boxes
 63 |    */
 64 |   const calcIntersection = (bbox1: any, bbox2: any): number => {
 65 |     const x1 = Math.max(bbox1.left, bbox2.left);
 66 |     const x2 = Math.min(bbox1.right, bbox2.right);
 67 |     const y1 = Math.max(bbox1.top, bbox2.top);
 68 |     const y2 = Math.min(bbox1.bottom, bbox2.bottom);
 69 | 
 70 |     // Check if there's actually an overlap
 71 |     if (x2 <= x1 || y2 <= y1) {
 72 |       return 0;
 73 |     }
 74 | 
 75 |     const intersection = (x2 - x1) * (y2 - y1);
 76 |     return intersection;
 77 |   };
 78 | 
 79 |   /**
 80 |    * Calculates the area of a rectangle
 81 |    * @param rect - The rectangle to calculate area for
 82 |    * @returns The area of the rectangle
 83 |    */
 84 |   const calcArea = (rect: Rectangle): number => {
 85 |     return rect.width * rect.height;
 86 |   };
 87 | 
 88 |   /**
 89 |    * Draws the rectangles on the canvas and calculates the IOU value
 90 |    */
 91 |   const drawCanvas = useCallback(() => {
 92 |     const canvas = canvasRef.current;
 93 |     if (!canvas) return;
 94 | 
 95 |     const ctx = canvas.getContext('2d');
 96 |     if (!ctx) return;
 97 | 
 98 |     // Clear canvas
 99 |     ctx.clearRect(0, 0, canvas.width, canvas.height);
100 | 
101 |     // Calculate IOU
102 |     const bbox1 = getBbox(rect1);
103 |     const bbox2 = getBbox(rect2);
104 |     const intersection = calcIntersection(bbox1, bbox2);
105 |     const union = calcArea(rect1) + calcArea(rect2) - intersection;
106 |     const iou = intersection / union;
107 |     setActualIOU(iou);
108 | 
109 |     // Draw rectangles
110 |     [rect1, rect2].forEach((rect) => {
111 |       ctx.fillStyle = rect.fill;
112 |       ctx.fillRect(rect.left, rect.top, rect.width, rect.height);
113 | 
114 |       ctx.strokeStyle = '#000';
115 |       ctx.lineWidth = 2;
116 |       ctx.strokeRect(rect.left, rect.top, rect.width, rect.height);
117 | 
118 |       ctx.fillStyle = '#000';
119 |       ctx.font = '12px';
120 |       ctx.fillText(rect.name, rect.left + 5, rect.top + 15);
121 |     });
122 |   }, [rect1, rect2]);
123 | 
124 |   useEffect(() => {
125 |     drawCanvas();
126 |   }, [drawCanvas]);
127 | 
128 |   return (
129 |     <div className="">
130 |       <h3 className="text-sm font-semibold ">{title}</h3>
131 |       <div className="flex items-start gap-6">
132 |         <div>
133 |           <canvas
134 |             ref={canvasRef}
135 |             width={200}
136 |             height={150}
137 |             className="border bg-white rounded-md"
138 |           />
139 |           <div className="mt-2 text-sm">
140 |             <div className="font-mono mb-2">IOU = {actualIOU.toFixed(3)}</div>
141 |             <span className="">{description}</span>
142 |           </div>
143 |         </div>
144 |       </div>
145 |     </div>
146 |   );
147 | }
148 | 
```

--------------------------------------------------------------------------------
/blog/cua-hackathon.md:
--------------------------------------------------------------------------------

```markdown
 1 | # Computer-Use Agents SOTA Challenge: Hack the North + Global Online
 2 | 
 3 | *Published on August 25, 2025 by Francesco Bonacci*
 4 | 
 5 | We’re bringing something new to [Hack the North](https://hackthenorth.com), Canada’s largest hackathon, this year: a head-to-head competition for **Computer-Use Agents** - on-site at Waterloo and a **Global online challenge**. From September 12–14, 2025, teams build on the **Cua Agent Framework** and are scored in **HUD’s OSWorld-Verified** environment to push past today’s SOTA on [OS-World](https://os-world.github.io).
 6 | 
 7 | <img src="./assets/hack-the-north.png">
 8 | 
 9 | ## Track A: On-site @ Hack the North
10 | 
11 | There’s one global leaderboard: **Cua - Best State-of-the-Art Computer-Use Agent**. Use any model setup you like (cloud or local). After projects are submitted, [HUD](https://www.hud.so) runs the official benchmark; the top team earns a **guaranteed YC partner interview (W26 batch)**. We’ll also feature winners on our blog and socials and kit the team out with swag.
12 | 
13 | ## Track B: Cua Global Online Hackathon
14 | 
15 | **Cua** and [**Ollama**](https://ollama.com) organize a global hackathon to find the **most creative uses of local and hybrid computer-use agents**. There are no geographic restrictions on who can join — this is a worldwide competition focused on **originality, impact, and inventive applications** that showcase what's possible with local and hybrid inference.
16 | 
17 | **Prizes:** 
18 | - 1st **MacBook Air M4 (or equivalent value)** + features in Cua & Ollama channels
19 | - 2nd **$500 CAD + swag**
20 | - 3rd **swag + public feature**
21 | 
22 | ---
23 | 
24 | ## How it works
25 | 
26 | Two different tracks, two different processes:
27 | 
28 | ### On-site (Track A)
29 | Build during the weekend and submit a repo with a one-line start command. **HUD** executes your command in a clean environment and runs **OSWorld-Verified**. Scores come from official benchmark results; ties break by median, then wall-clock time, then earliest submission. Any model setup is allowed (cloud or local).
30 | 
31 | **HUD** runs official evaluations immediately after submission. Winners are announced at the **closing ceremony**.
32 | 
33 | ### Rules
34 | - Fork and star the [Cua repo](https://github.com/trycua/cua).
35 | - Add your agent and instructions in `samples/community/hack-the-north/<YOUR_TEAM_NAME>`.
36 | - Include a README with details on the approach and any required notes.  
37 | - Submit a PR.  
38 | 
39 | **Deadline: Sept 15, 8:00 AM EDT**
40 | 
41 | ### Global Online (Track B)
42 | Open to anyone, anywhere. Build on your own timeline and submit through the **Cua Discord form** by the deadline.
43 | 
44 | **Project Requirements:**
45 | - Your agent must integrate **Cua and Ollama** in some way
46 | - Your agent must be **easily runnable by judges**
47 | 
48 | Judged by **Cua** and **Ollama** teams on:  
49 | - **Creativity (30%)** – originality, usefulness, surprise factor  
50 | - **Technical Depth (30%)** – quality of engineering and agent design  
51 | - **Use of Ollama (30%)** – effective integration of local/hybrid inference  
52 | - **Polish (10%)** – presentation, clarity, demo readiness  
53 | 
54 | ### Submission Process
55 | Submissions will be collected via a **form link provided in the Cua Discord**. Your submission must contain:
56 | 
57 | - **GitHub repo** containing the agent source code and a clear README with instructions on how to use the agent
58 | - **Explanation** of the models and tools used, and what's local or hybrid about your design  
59 | - **Short demo video** (up to two minutes)
60 | 
61 | A **commit freeze** will be used to ensure that no changes are made after the deadline. Winners will be announced after judging is complete.
62 | 
63 | **Deadline: Sept 28, 11:59 PM UTC (extended due to popular demand!)**
64 | 
65 | ---
66 | 
67 | ## Join us
68 | 
69 | Bring a team, pick a model stack, and push what agents can do on real computers. We can’t wait to see what you build at **Hack the North 2025**.
70 | 
71 | **Discord channels**  
72 | - Join the Discord first: https://discord.gg/cua-ai
73 | - **#hack-the-north (on-site):** https://discord.com/channels/1328377437301641247/1409508526774157342  
74 | - **#global-online (Ollama × Cua):** https://discord.com/channels/1328377437301641247/1409518100491145226  
75 | 
76 | **Contact**  
77 | Questions on Hack the North? Email **[email protected]**.
78 | 
79 | *P.S. If you’re planning ahead, start with the Cua Agent Framework and OSWorld-Verified docs at docs.trycua.com; we’ll share office-hour times in both Discord channels.*
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/providers/base.py:
--------------------------------------------------------------------------------

```python
  1 | """Base provider interface for VM backends."""
  2 | 
  3 | import abc
  4 | from enum import StrEnum
  5 | from typing import Dict, Optional, Any, AsyncContextManager
  6 | 
  7 | from .types import ListVMsResponse
  8 | 
  9 | 
 10 | class VMProviderType(StrEnum):
 11 |     """Enum of supported VM provider types."""
 12 |     LUME = "lume"
 13 |     LUMIER = "lumier"
 14 |     CLOUD = "cloud"
 15 |     WINSANDBOX = "winsandbox"
 16 |     DOCKER = "docker"
 17 |     UNKNOWN = "unknown"
 18 | 
 19 | 
 20 | class BaseVMProvider(AsyncContextManager):
 21 |     """Base interface for VM providers.
 22 |     
 23 |     All VM provider implementations must implement this interface.
 24 |     """
 25 |     
 26 |     @property
 27 |     @abc.abstractmethod
 28 |     def provider_type(self) -> VMProviderType:
 29 |         """Get the provider type."""
 30 |         pass
 31 |         
 32 |     @abc.abstractmethod
 33 |     async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
 34 |         """Get VM information by name.
 35 |         
 36 |         Args:
 37 |             name: Name of the VM to get information for
 38 |             storage: Optional storage path override. If provided, this will be used
 39 |                     instead of the provider's default storage path.
 40 |         
 41 |         Returns:
 42 |             Dictionary with VM information including status, IP address, etc.
 43 |         """
 44 |         pass
 45 |         
 46 |     @abc.abstractmethod
 47 |     async def list_vms(self) -> ListVMsResponse:
 48 |         """List all available VMs.
 49 | 
 50 |         Returns:
 51 |             ListVMsResponse: A list of minimal VM objects as defined in
 52 |             `computer.providers.types.MinimalVM`.
 53 |         """
 54 |         pass
 55 |         
 56 |     @abc.abstractmethod
 57 |     async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
 58 |         """Run a VM by name with the given options.
 59 |         
 60 |         Args:
 61 |             image: Name/tag of the image to use
 62 |             name: Name of the VM to run
 63 |             run_opts: Dictionary of run options (memory, cpu, etc.)
 64 |             storage: Optional storage path override. If provided, this will be used
 65 |                     instead of the provider's default storage path.
 66 |         
 67 |         Returns:
 68 |             Dictionary with VM run status and information
 69 |         """
 70 |         pass
 71 |         
 72 |     @abc.abstractmethod
 73 |     async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
 74 |         """Stop a VM by name.
 75 |         
 76 |         Args:
 77 |             name: Name of the VM to stop
 78 |             storage: Optional storage path override. If provided, this will be used
 79 |                     instead of the provider's default storage path.
 80 |         
 81 |         Returns:
 82 |             Dictionary with VM stop status and information
 83 |         """
 84 |         pass
 85 |         
 86 |     @abc.abstractmethod
 87 |     async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
 88 |         """Restart a VM by name.
 89 |         
 90 |         Args:
 91 |             name: Name of the VM to restart
 92 |             storage: Optional storage path override. If provided, this will be used
 93 |                     instead of the provider's default storage path.
 94 |         
 95 |         Returns:
 96 |             Dictionary with VM restart status and information
 97 |         """
 98 |         pass
 99 |         
100 |     @abc.abstractmethod
101 |     async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
102 |         """Update VM configuration.
103 |         
104 |         Args:
105 |             name: Name of the VM to update
106 |             update_opts: Dictionary of update options (memory, cpu, etc.)
107 |             storage: Optional storage path override. If provided, this will be used
108 |                     instead of the provider's default storage path.
109 |         
110 |         Returns:
111 |             Dictionary with VM update status and information
112 |         """
113 |         pass
114 |         
115 |     @abc.abstractmethod
116 |     async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str:
117 |         """Get the IP address of a VM, waiting indefinitely until it's available.
118 |         
119 |         Args:
120 |             name: Name of the VM to get the IP for
121 |             storage: Optional storage path override. If provided, this will be used
122 |                     instead of the provider's default storage path.
123 |             retry_delay: Delay between retries in seconds (default: 2)
124 |             
125 |         Returns:
126 |             IP address of the VM when it becomes available
127 |         """
128 |         pass
129 | 
```

--------------------------------------------------------------------------------
/libs/lume/src/Virtualization/DHCPLeaseParser.swift:
--------------------------------------------------------------------------------

```swift
  1 | import Foundation
  2 | 
  3 | /// Represents a DHCP lease entry from the system's DHCP lease file
  4 | private struct DHCPLease {
  5 |     let macAddress: String
  6 |     let ipAddress: String
  7 |     let expirationDate: Date
  8 |     
  9 |     /// Creates a lease entry from raw DHCP lease file key-value pairs
 10 |     /// - Parameter dict: Dictionary containing the raw lease data
 11 |     /// - Returns: A DHCPLease instance if the data is valid, nil otherwise
 12 |     static func from(_ dict: [String: String]) -> DHCPLease? {
 13 |         guard let hwAddress = dict["hw_address"],
 14 |               let ipAddress = dict["ip_address"],
 15 |               let lease = dict["lease"] else {
 16 |             return nil
 17 |         }
 18 |         
 19 |         // Parse MAC address from hw_address field (format can be "1,xx:xx:xx:xx:xx:xx" or "ff,...")
 20 |         let hwParts = hwAddress.split(separator: ",")
 21 |         guard hwParts.count >= 2 else { return nil }
 22 |         
 23 |         // Get the MAC part after the prefix and normalize it
 24 |         let rawMacAddress = String(hwParts[1]).trimmingCharacters(in: .whitespaces)
 25 |         
 26 |         // Normalize the MAC address by ensuring each component is two digits
 27 |         let normalizedMacAddress = rawMacAddress.split(separator: ":")
 28 |             .map { component in
 29 |                 let hex = String(component)
 30 |                 return hex.count == 1 ? "0\(hex)" : hex
 31 |             }
 32 |             .joined(separator: ":")
 33 |         
 34 |         // Convert hex timestamp to Date
 35 |         let timestampHex = lease.trimmingCharacters(in: CharacterSet(charactersIn: "0x"))
 36 |         guard let timestamp = UInt64(timestampHex, radix: 16) else { return nil }
 37 |         let expirationDate = Date(timeIntervalSince1970: TimeInterval(timestamp))
 38 |         
 39 |         return DHCPLease(
 40 |             macAddress: normalizedMacAddress,
 41 |             ipAddress: ipAddress,
 42 |             expirationDate: expirationDate
 43 |         )
 44 |     }
 45 |     
 46 |     /// Checks if the lease is currently valid
 47 |     var isValid: Bool {
 48 |         expirationDate > Date()
 49 |     }
 50 | }
 51 | 
 52 | /// Parses DHCP lease files to retrieve IP addresses for VMs based on their MAC addresses
 53 | enum DHCPLeaseParser {
 54 |     private static let leasePath = "/var/db/dhcpd_leases"
 55 |     
 56 |     /// Retrieves the IP address for a given MAC address from the DHCP lease file
 57 |     /// - Parameter macAddress: The MAC address to look up
 58 |     /// - Returns: The IP address if found, nil otherwise
 59 |     static func getIPAddress(forMAC macAddress: String) -> String? {
 60 |         guard let leaseContents = try? String(contentsOfFile: leasePath, encoding: .utf8) else {
 61 |             return nil
 62 |         }
 63 | 
 64 |         // Normalize the input MAC address to ensure consistent format
 65 |         let normalizedMacAddress = macAddress.split(separator: ":").map { component in
 66 |             let hex = String(component)
 67 |             return hex.count == 1 ? "0\(hex)" : hex
 68 |         }.joined(separator: ":")
 69 |         
 70 |         let leases = try? parseDHCPLeases(leaseContents)
 71 |         return leases?.first { lease in 
 72 |             lease.macAddress == normalizedMacAddress
 73 |         }?.ipAddress
 74 |     }
 75 |     
 76 |     /// Parses the contents of a DHCP lease file into lease entries
 77 |     /// - Parameter contents: The raw contents of the lease file
 78 |     /// - Returns: Array of parsed lease entries
 79 |     private static func parseDHCPLeases(_ contents: String) throws -> [DHCPLease] {
 80 |         var leases: [DHCPLease] = []
 81 |         var currentLease: [String: String] = [:]
 82 |         var inLeaseBlock = false
 83 |         
 84 |         let lines = contents.components(separatedBy: .newlines)
 85 |         
 86 |         for line in lines {
 87 |             let trimmedLine = line.trimmingCharacters(in: .whitespaces)
 88 |             
 89 |             if trimmedLine == "{" {
 90 |                 inLeaseBlock = true
 91 |                 currentLease = [:]
 92 |             } else if trimmedLine == "}" {
 93 |                 if let lease = DHCPLease.from(currentLease) {
 94 |                     leases.append(lease)
 95 |                 }
 96 |                 inLeaseBlock = false
 97 |             } else if inLeaseBlock {
 98 |                 let parts = trimmedLine.split(separator: "=", maxSplits: 1)
 99 |                 if parts.count == 2 {
100 |                     let key = String(parts[0]).trimmingCharacters(in: .whitespaces)
101 |                     let value = String(parts[1]).trimmingCharacters(in: .whitespaces)
102 |                     currentLease[key] = value
103 |                 }
104 |             }
105 |         }
106 |         
107 |         return leases
108 |     }
109 | } 
```

--------------------------------------------------------------------------------
/examples/computer_examples.py:
--------------------------------------------------------------------------------

```python
  1 | import os
  2 | import asyncio
  3 | from pathlib import Path
  4 | import sys
  5 | import traceback
  6 | 
  7 | # Load environment variables from .env file
  8 | project_root = Path(__file__).parent.parent
  9 | env_file = project_root / ".env"
 10 | print(f"Loading environment from: {env_file}")
 11 | from dotenv import load_dotenv
 12 | 
 13 | load_dotenv(env_file)
 14 | 
 15 | # Add paths to sys.path if needed
 16 | pythonpath = os.environ.get("PYTHONPATH", "")
 17 | for path in pythonpath.split(":"):
 18 |     if path and path not in sys.path:
 19 |         sys.path.insert(0, path)  # Insert at beginning to prioritize
 20 |         print(f"Added to sys.path: {path}")
 21 | 
 22 | from computer.computer import Computer
 23 | from computer.providers.base import VMProviderType
 24 | from computer.logger import LogLevel
 25 | 
 26 | async def main():
 27 |     try:
 28 |         print("\n=== Using direct initialization ===")
 29 | 
 30 |         # Create a local macOS computer
 31 |         computer = Computer(
 32 |             display="1024x768", 
 33 |             memory="8GB", 
 34 |             cpu="4", 
 35 |             os_type="macos",
 36 |             name="macos",
 37 |             verbosity=LogLevel.VERBOSE,
 38 |             provider_type=VMProviderType.LUME,
 39 |             storage="/Users/<USER>/repos/trycua/computer/examples/storage",
 40 |             shared_directories=[
 41 |                 "/Users/<USER>/repos/trycua/computer/examples/shared"
 42 |             ],
 43 |             ephemeral=False,
 44 |         )
 45 | 
 46 |         # Create a remote Linux computer with Cua
 47 |         # computer = Computer(
 48 |         #     os_type="linux",
 49 |         #     api_key=os.getenv("CUA_API_KEY"),
 50 |         #     name=os.getenv("CONTAINER_NAME"),
 51 |         #     provider_type=VMProviderType.CLOUD,
 52 |         # )
 53 |         
 54 |         try:
 55 |             # Run the computer with default parameters
 56 |             await computer.run()
 57 |             
 58 |             screenshot = await computer.interface.screenshot()
 59 |             
 60 |             # Create output directory if it doesn't exist
 61 |             output_dir = Path("./output")
 62 |             output_dir.mkdir(exist_ok=True)
 63 |             
 64 |             screenshot_path = output_dir / "screenshot.png"
 65 |             with open(screenshot_path, "wb") as f:
 66 |                 f.write(screenshot)
 67 |             print(f"Screenshot saved to: {screenshot_path.absolute()}")
 68 |             
 69 |             # await computer.interface.hotkey("command", "space")
 70 | 
 71 |             # res = await computer.interface.run_command("touch ./Downloads/empty_file")
 72 |             # print(f"Run command result: {res}")
 73 | 
 74 |             accessibility_tree = await computer.interface.get_accessibility_tree()
 75 |             print(f"Accessibility tree: {accessibility_tree}")
 76 | 
 77 |             # Screen Actions Examples
 78 |             # print("\n===  Screen Actions ===")
 79 |             # screenshot = await computer.interface.screenshot()
 80 |             # with open("screenshot_direct.png", "wb") as f:
 81 |             #     f.write(screenshot)
 82 | 
 83 |             screen_size = await computer.interface.get_screen_size()
 84 |             print(f"Screen size: {screen_size}")
 85 | 
 86 |             # Demonstrate coordinate conversion
 87 |             center_x, center_y = 733, 736
 88 |             print(f"Center in screen coordinates: ({center_x}, {center_y})")
 89 | 
 90 |             screenshot_center = await computer.to_screenshot_coordinates(center_x, center_y)
 91 |             print(f"Center in screenshot coordinates: {screenshot_center}")
 92 | 
 93 |             screen_center = await computer.to_screen_coordinates(*screenshot_center)
 94 |             print(f"Back to screen coordinates: {screen_center}")
 95 | 
 96 |             # Mouse Actions Examples
 97 |             print("\n=== Mouse Actions ===")
 98 |             await computer.interface.move_cursor(100, 100)
 99 |             await computer.interface.left_click()
100 |             await computer.interface.right_click(300, 300)
101 |             await computer.interface.double_click(400, 400)
102 | 
103 |             # Keyboard Actions Examples
104 |             print("\n=== Keyboard Actions ===")
105 |             await computer.interface.type_text("Hello, World!")
106 |             await computer.interface.press_key("enter")
107 | 
108 |             # Clipboard Actions Examples
109 |             print("\n=== Clipboard Actions ===")
110 |             await computer.interface.set_clipboard("Test clipboard")
111 |             content = await computer.interface.copy_to_clipboard()
112 |             print(f"Clipboard content: {content}")
113 | 
114 |         finally:
115 |             # Important to clean up resources
116 |             await computer.stop()
117 |     except Exception as e:
118 |         print(f"Error in main: {e}")
119 |         traceback.print_exc()
120 | 
121 | 
122 | if __name__ == "__main__":
123 |     asyncio.run(main())
124 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/loops/opencua.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | OpenCUA agent loop implementation for click prediction using litellm.acompletion
  3 | Based on OpenCUA model for GUI grounding tasks.
  4 | """
  5 | 
  6 | import asyncio
  7 | import json
  8 | import re
  9 | import base64
 10 | from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple
 11 | from io import BytesIO
 12 | import uuid
 13 | from PIL import Image
 14 | import litellm
 15 | import math
 16 | 
 17 | from .composed_grounded import ComposedGroundedConfig
 18 | from ..decorators import register_agent
 19 | from ..types import Messages, AgentResponse, Tools, AgentCapability
 20 | from ..loops.base import AsyncAgentConfig
 21 | 
 22 | def extract_coordinates_from_pyautogui(text: str) -> Optional[Tuple[int, int]]:
 23 |     """Extract coordinates from pyautogui.click(x=..., y=...) format."""
 24 |     try:
 25 |         # Look for pyautogui.click(x=1443, y=343) pattern
 26 |         pattern = r"pyautogui\.click\(x=(\d+),\s*y=(\d+)\)"
 27 |         match = re.search(pattern, text)
 28 |         if match:
 29 |             x, y = int(match.group(1)), int(match.group(2))
 30 |             return (x, y)
 31 |         return None
 32 |     except Exception:
 33 |         return None
 34 | 
 35 | @register_agent(models=r"(?i).*OpenCUA.*")
 36 | class OpenCUAConfig(ComposedGroundedConfig):
 37 |     """OpenCUA agent configuration implementing AsyncAgentConfig protocol for click prediction."""
 38 |     
 39 |     def __init__(self):
 40 |         super().__init__()
 41 |         self.current_model = None
 42 |         self.last_screenshot_b64 = None
 43 | 
 44 |     async def predict_step(
 45 |         self,
 46 |         messages: List[Dict[str, Any]],
 47 |         model: str,
 48 |         tools: Optional[List[Dict[str, Any]]] = None,
 49 |         max_retries: Optional[int] = None,
 50 |         stream: bool = False,
 51 |         computer_handler=None,
 52 |         _on_api_start=None,
 53 |         _on_api_end=None,
 54 |         _on_usage=None,
 55 |         _on_screenshot=None,
 56 |         **kwargs
 57 |     ) -> Dict[str, Any]:
 58 |         """Fallback to a self-composed model"""
 59 |         return await super().predict_step(
 60 |             messages=messages,
 61 |             model=f"{model}+{model}",
 62 |             tools=tools,
 63 |             max_retries=max_retries,
 64 |             stream=stream,
 65 |             computer_handler=computer_handler,
 66 |             _on_api_start=_on_api_start,
 67 |             _on_api_end=_on_api_end,
 68 |             _on_usage=_on_usage,
 69 |             _on_screenshot=_on_screenshot,
 70 |             **kwargs
 71 |         )
 72 | 
 73 |     async def predict_click(
 74 |         self,
 75 |         model: str,
 76 |         image_b64: str,
 77 |         instruction: str,
 78 |         **kwargs
 79 |     ) -> Optional[Tuple[int, int]]:
 80 |         """
 81 |         Predict click coordinates using OpenCUA model via litellm.acompletion.
 82 |         
 83 |         Args:
 84 |             model: The OpenCUA model name
 85 |             image_b64: Base64 encoded image
 86 |             instruction: Instruction for where to click
 87 |             
 88 |         Returns:
 89 |             Tuple of (x, y) coordinates or None if prediction fails
 90 |         """
 91 |         # Prepare system message
 92 |         system_prompt = (
 93 |             "You are a GUI agent. You are given a task and a screenshot of the screen. "
 94 |             "You need to perform a series of pyautogui actions to complete the task."
 95 |         )
 96 |         
 97 |         system_message = {
 98 |             "role": "system",
 99 |             "content": system_prompt
100 |         }
101 |         
102 |         # Prepare user message with image and instruction
103 |         user_message = {
104 |             "role": "user",
105 |             "content": [
106 |                 {
107 |                     "type": "image_url",
108 |                     "image_url": {
109 |                         "url": f"data:image/png;base64,{image_b64}"
110 |                     }
111 |                 },
112 |                 {
113 |                     "type": "text",
114 |                     "text": f"Click on {instruction}"
115 |                 }
116 |             ]
117 |         }
118 |         
119 |         # Prepare API call kwargs
120 |         api_kwargs = {
121 |             "model": model,
122 |             "messages": [system_message, user_message],
123 |             "max_new_tokens": 2056,
124 |             "temperature": 0,
125 |             **kwargs
126 |         }
127 |         
128 |         # Use liteLLM acompletion
129 |         response = await litellm.acompletion(**api_kwargs)
130 |         
131 |         # Extract response text
132 |         output_text = response.choices[0].message.content
133 |         # print(output_text)
134 |         
135 |         # Extract coordinates from pyautogui format
136 |         coordinates = extract_coordinates_from_pyautogui(output_text)
137 |         
138 |         return coordinates
139 |     
140 |     def get_capabilities(self) -> List[AgentCapability]:
141 |         """Return the capabilities supported by this agent."""
142 |         return ["click"]
143 | 
```
Page 4/21FirstPrevNextLast