#
tokens: 48936/50000 54/497 files (page 3/16)
lines: off (toggle) GitHub
raw markdown copy
This is page 3 of 16. Use http://codebase.md/trycua/cua?page={x} to view the full context.

# Directory Structure

```
├── .all-contributorsrc
├── .cursorignore
├── .devcontainer
│   ├── devcontainer.json
│   ├── post-install.sh
│   └── README.md
├── .dockerignore
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── scripts
│   │   ├── get_pyproject_version.py
│   │   └── tests
│   │       ├── __init__.py
│   │       ├── README.md
│   │       └── test_get_pyproject_version.py
│   └── workflows
│       ├── ci-lume.yml
│       ├── docker-publish-kasm.yml
│       ├── docker-publish-xfce.yml
│       ├── docker-reusable-publish.yml
│       ├── npm-publish-computer.yml
│       ├── npm-publish-core.yml
│       ├── publish-lume.yml
│       ├── pypi-publish-agent.yml
│       ├── pypi-publish-computer-server.yml
│       ├── pypi-publish-computer.yml
│       ├── pypi-publish-core.yml
│       ├── pypi-publish-mcp-server.yml
│       ├── pypi-publish-pylume.yml
│       ├── pypi-publish-som.yml
│       ├── pypi-reusable-publish.yml
│       └── test-validation-script.yml
├── .gitignore
├── .vscode
│   ├── docs.code-workspace
│   ├── launch.json
│   ├── libs-ts.code-workspace
│   ├── lume.code-workspace
│   ├── lumier.code-workspace
│   └── py.code-workspace
├── blog
│   ├── app-use.md
│   ├── assets
│   │   ├── composite-agents.png
│   │   ├── docker-ubuntu-support.png
│   │   ├── hack-booth.png
│   │   ├── hack-closing-ceremony.jpg
│   │   ├── hack-cua-ollama-hud.jpeg
│   │   ├── hack-leaderboard.png
│   │   ├── hack-the-north.png
│   │   ├── hack-winners.jpeg
│   │   ├── hack-workshop.jpeg
│   │   ├── hud-agent-evals.png
│   │   └── trajectory-viewer.jpeg
│   ├── bringing-computer-use-to-the-web.md
│   ├── build-your-own-operator-on-macos-1.md
│   ├── build-your-own-operator-on-macos-2.md
│   ├── composite-agents.md
│   ├── cua-hackathon.md
│   ├── hack-the-north.md
│   ├── hud-agent-evals.md
│   ├── human-in-the-loop.md
│   ├── introducing-cua-cloud-containers.md
│   ├── lume-to-containerization.md
│   ├── sandboxed-python-execution.md
│   ├── training-computer-use-models-trajectories-1.md
│   ├── trajectory-viewer.md
│   ├── ubuntu-docker-support.md
│   └── windows-sandbox.md
├── CONTRIBUTING.md
├── Development.md
├── Dockerfile
├── docs
│   ├── .gitignore
│   ├── .prettierrc
│   ├── content
│   │   └── docs
│   │       ├── agent-sdk
│   │       │   ├── agent-loops.mdx
│   │       │   ├── benchmarks
│   │       │   │   ├── index.mdx
│   │       │   │   ├── interactive.mdx
│   │       │   │   ├── introduction.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── osworld-verified.mdx
│   │       │   │   ├── screenspot-pro.mdx
│   │       │   │   └── screenspot-v2.mdx
│   │       │   ├── callbacks
│   │       │   │   ├── agent-lifecycle.mdx
│   │       │   │   ├── cost-saving.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── logging.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── pii-anonymization.mdx
│   │       │   │   └── trajectories.mdx
│   │       │   ├── chat-history.mdx
│   │       │   ├── custom-computer-handlers.mdx
│   │       │   ├── custom-tools.mdx
│   │       │   ├── customizing-computeragent.mdx
│   │       │   ├── integrations
│   │       │   │   ├── hud.mdx
│   │       │   │   └── meta.json
│   │       │   ├── message-format.mdx
│   │       │   ├── meta.json
│   │       │   ├── migration-guide.mdx
│   │       │   ├── prompt-caching.mdx
│   │       │   ├── supported-agents
│   │       │   │   ├── composed-agents.mdx
│   │       │   │   ├── computer-use-agents.mdx
│   │       │   │   ├── grounding-models.mdx
│   │       │   │   ├── human-in-the-loop.mdx
│   │       │   │   └── meta.json
│   │       │   ├── supported-model-providers
│   │       │   │   ├── index.mdx
│   │       │   │   └── local-models.mdx
│   │       │   └── usage-tracking.mdx
│   │       ├── computer-sdk
│   │       │   ├── commands.mdx
│   │       │   ├── computer-ui.mdx
│   │       │   ├── computers.mdx
│   │       │   ├── meta.json
│   │       │   └── sandboxed-python.mdx
│   │       ├── index.mdx
│   │       ├── libraries
│   │       │   ├── agent
│   │       │   │   └── index.mdx
│   │       │   ├── computer
│   │       │   │   └── index.mdx
│   │       │   ├── computer-server
│   │       │   │   ├── Commands.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── REST-API.mdx
│   │       │   │   └── WebSocket-API.mdx
│   │       │   ├── core
│   │       │   │   └── index.mdx
│   │       │   ├── lume
│   │       │   │   ├── cli-reference.mdx
│   │       │   │   ├── faq.md
│   │       │   │   ├── http-api.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── prebuilt-images.mdx
│   │       │   ├── lumier
│   │       │   │   ├── building-lumier.mdx
│   │       │   │   ├── docker-compose.mdx
│   │       │   │   ├── docker.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   └── meta.json
│   │       │   ├── mcp-server
│   │       │   │   ├── client-integrations.mdx
│   │       │   │   ├── configuration.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── llm-integrations.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── tools.mdx
│   │       │   │   └── usage.mdx
│   │       │   └── som
│   │       │       ├── configuration.mdx
│   │       │       └── index.mdx
│   │       ├── meta.json
│   │       ├── quickstart-cli.mdx
│   │       ├── quickstart-devs.mdx
│   │       └── telemetry.mdx
│   ├── next.config.mjs
│   ├── package-lock.json
│   ├── package.json
│   ├── pnpm-lock.yaml
│   ├── postcss.config.mjs
│   ├── public
│   │   └── img
│   │       ├── agent_gradio_ui.png
│   │       ├── agent.png
│   │       ├── cli.png
│   │       ├── computer.png
│   │       ├── som_box_threshold.png
│   │       └── som_iou_threshold.png
│   ├── README.md
│   ├── source.config.ts
│   ├── src
│   │   ├── app
│   │   │   ├── (home)
│   │   │   │   ├── [[...slug]]
│   │   │   │   │   └── page.tsx
│   │   │   │   └── layout.tsx
│   │   │   ├── api
│   │   │   │   └── search
│   │   │   │       └── route.ts
│   │   │   ├── favicon.ico
│   │   │   ├── global.css
│   │   │   ├── layout.config.tsx
│   │   │   ├── layout.tsx
│   │   │   ├── llms.mdx
│   │   │   │   └── [[...slug]]
│   │   │   │       └── route.ts
│   │   │   └── llms.txt
│   │   │       └── route.ts
│   │   ├── assets
│   │   │   ├── discord-black.svg
│   │   │   ├── discord-white.svg
│   │   │   ├── logo-black.svg
│   │   │   └── logo-white.svg
│   │   ├── components
│   │   │   ├── iou.tsx
│   │   │   └── mermaid.tsx
│   │   ├── lib
│   │   │   ├── llms.ts
│   │   │   └── source.ts
│   │   └── mdx-components.tsx
│   └── tsconfig.json
├── examples
│   ├── agent_examples.py
│   ├── agent_ui_examples.py
│   ├── computer_examples_windows.py
│   ├── computer_examples.py
│   ├── computer_ui_examples.py
│   ├── computer-example-ts
│   │   ├── .env.example
│   │   ├── .gitignore
│   │   ├── .prettierrc
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── README.md
│   │   ├── src
│   │   │   ├── helpers.ts
│   │   │   └── index.ts
│   │   └── tsconfig.json
│   ├── docker_examples.py
│   ├── evals
│   │   ├── hud_eval_examples.py
│   │   └── wikipedia_most_linked.txt
│   ├── pylume_examples.py
│   ├── sandboxed_functions_examples.py
│   ├── som_examples.py
│   ├── utils.py
│   └── winsandbox_example.py
├── img
│   ├── agent_gradio_ui.png
│   ├── agent.png
│   ├── cli.png
│   ├── computer.png
│   ├── logo_black.png
│   └── logo_white.png
├── libs
│   ├── kasm
│   │   ├── Dockerfile
│   │   ├── LICENSE
│   │   ├── README.md
│   │   └── src
│   │       └── ubuntu
│   │           └── install
│   │               └── firefox
│   │                   ├── custom_startup.sh
│   │                   ├── firefox.desktop
│   │                   └── install_firefox.sh
│   ├── lume
│   │   ├── .cursorignore
│   │   ├── CONTRIBUTING.md
│   │   ├── Development.md
│   │   ├── img
│   │   │   └── cli.png
│   │   ├── Package.resolved
│   │   ├── Package.swift
│   │   ├── README.md
│   │   ├── resources
│   │   │   └── lume.entitlements
│   │   ├── scripts
│   │   │   ├── build
│   │   │   │   ├── build-debug.sh
│   │   │   │   ├── build-release-notarized.sh
│   │   │   │   └── build-release.sh
│   │   │   └── install.sh
│   │   ├── src
│   │   │   ├── Commands
│   │   │   │   ├── Clone.swift
│   │   │   │   ├── Config.swift
│   │   │   │   ├── Create.swift
│   │   │   │   ├── Delete.swift
│   │   │   │   ├── Get.swift
│   │   │   │   ├── Images.swift
│   │   │   │   ├── IPSW.swift
│   │   │   │   ├── List.swift
│   │   │   │   ├── Logs.swift
│   │   │   │   ├── Options
│   │   │   │   │   └── FormatOption.swift
│   │   │   │   ├── Prune.swift
│   │   │   │   ├── Pull.swift
│   │   │   │   ├── Push.swift
│   │   │   │   ├── Run.swift
│   │   │   │   ├── Serve.swift
│   │   │   │   ├── Set.swift
│   │   │   │   └── Stop.swift
│   │   │   ├── ContainerRegistry
│   │   │   │   ├── ImageContainerRegistry.swift
│   │   │   │   ├── ImageList.swift
│   │   │   │   └── ImagesPrinter.swift
│   │   │   ├── Errors
│   │   │   │   └── Errors.swift
│   │   │   ├── FileSystem
│   │   │   │   ├── Home.swift
│   │   │   │   ├── Settings.swift
│   │   │   │   ├── VMConfig.swift
│   │   │   │   ├── VMDirectory.swift
│   │   │   │   └── VMLocation.swift
│   │   │   ├── LumeController.swift
│   │   │   ├── Main.swift
│   │   │   ├── Server
│   │   │   │   ├── Handlers.swift
│   │   │   │   ├── HTTP.swift
│   │   │   │   ├── Requests.swift
│   │   │   │   ├── Responses.swift
│   │   │   │   └── Server.swift
│   │   │   ├── Utils
│   │   │   │   ├── CommandRegistry.swift
│   │   │   │   ├── CommandUtils.swift
│   │   │   │   ├── Logger.swift
│   │   │   │   ├── NetworkUtils.swift
│   │   │   │   ├── Path.swift
│   │   │   │   ├── ProcessRunner.swift
│   │   │   │   ├── ProgressLogger.swift
│   │   │   │   ├── String.swift
│   │   │   │   └── Utils.swift
│   │   │   ├── Virtualization
│   │   │   │   ├── DarwinImageLoader.swift
│   │   │   │   ├── DHCPLeaseParser.swift
│   │   │   │   ├── ImageLoaderFactory.swift
│   │   │   │   └── VMVirtualizationService.swift
│   │   │   ├── VM
│   │   │   │   ├── DarwinVM.swift
│   │   │   │   ├── LinuxVM.swift
│   │   │   │   ├── VM.swift
│   │   │   │   ├── VMDetails.swift
│   │   │   │   ├── VMDetailsPrinter.swift
│   │   │   │   ├── VMDisplayResolution.swift
│   │   │   │   └── VMFactory.swift
│   │   │   └── VNC
│   │   │       ├── PassphraseGenerator.swift
│   │   │       └── VNCService.swift
│   │   └── tests
│   │       ├── Mocks
│   │       │   ├── MockVM.swift
│   │       │   ├── MockVMVirtualizationService.swift
│   │       │   └── MockVNCService.swift
│   │       ├── VM
│   │       │   └── VMDetailsPrinterTests.swift
│   │       ├── VMTests.swift
│   │       ├── VMVirtualizationServiceTests.swift
│   │       └── VNCServiceTests.swift
│   ├── lumier
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   └── src
│   │       ├── bin
│   │       │   └── entry.sh
│   │       ├── config
│   │       │   └── constants.sh
│   │       ├── hooks
│   │       │   └── on-logon.sh
│   │       └── lib
│   │           ├── utils.sh
│   │           └── vm.sh
│   ├── python
│   │   ├── agent
│   │   │   ├── agent
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── adapters
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── huggingfacelocal_adapter.py
│   │   │   │   │   ├── human_adapter.py
│   │   │   │   │   ├── mlxvlm_adapter.py
│   │   │   │   │   └── models
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── generic.py
│   │   │   │   │       ├── internvl.py
│   │   │   │   │       ├── opencua.py
│   │   │   │   │       └── qwen2_5_vl.py
│   │   │   │   ├── agent.py
│   │   │   │   ├── callbacks
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── budget_manager.py
│   │   │   │   │   ├── image_retention.py
│   │   │   │   │   ├── logging.py
│   │   │   │   │   ├── operator_validator.py
│   │   │   │   │   ├── pii_anonymization.py
│   │   │   │   │   ├── prompt_instructions.py
│   │   │   │   │   ├── telemetry.py
│   │   │   │   │   └── trajectory_saver.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── computers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cua.py
│   │   │   │   │   └── custom.py
│   │   │   │   ├── decorators.py
│   │   │   │   ├── human_tool
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── server.py
│   │   │   │   │   └── ui.py
│   │   │   │   ├── integrations
│   │   │   │   │   └── hud
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── agent.py
│   │   │   │   │       └── proxy.py
│   │   │   │   ├── loops
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── anthropic.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── composed_grounded.py
│   │   │   │   │   ├── glm45v.py
│   │   │   │   │   ├── gta1.py
│   │   │   │   │   ├── holo.py
│   │   │   │   │   ├── internvl.py
│   │   │   │   │   ├── model_types.csv
│   │   │   │   │   ├── moondream3.py
│   │   │   │   │   ├── omniparser.py
│   │   │   │   │   ├── openai.py
│   │   │   │   │   ├── opencua.py
│   │   │   │   │   └── uitars.py
│   │   │   │   ├── proxy
│   │   │   │   │   ├── examples.py
│   │   │   │   │   └── handlers.py
│   │   │   │   ├── responses.py
│   │   │   │   ├── types.py
│   │   │   │   └── ui
│   │   │   │       ├── __init__.py
│   │   │   │       ├── __main__.py
│   │   │   │       └── gradio
│   │   │   │           ├── __init__.py
│   │   │   │           ├── app.py
│   │   │   │           └── ui_components.py
│   │   │   ├── benchmarks
│   │   │   │   ├── .gitignore
│   │   │   │   ├── contrib.md
│   │   │   │   ├── interactive.py
│   │   │   │   ├── models
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   └── gta1.py
│   │   │   │   ├── README.md
│   │   │   │   ├── ss-pro.py
│   │   │   │   ├── ss-v2.py
│   │   │   │   └── utils.py
│   │   │   ├── example.py
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── computer
│   │   │   ├── computer
│   │   │   │   ├── __init__.py
│   │   │   │   ├── computer.py
│   │   │   │   ├── diorama_computer.py
│   │   │   │   ├── helpers.py
│   │   │   │   ├── interface
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── logger.py
│   │   │   │   ├── models.py
│   │   │   │   ├── providers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cloud
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── docker
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── lume
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── lume_api.py
│   │   │   │   │   ├── lumier
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   └── winsandbox
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── provider.py
│   │   │   │   │       └── setup_script.ps1
│   │   │   │   ├── ui
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   └── gradio
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── app.py
│   │   │   │   └── utils.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── computer-server
│   │   │   ├── computer_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── diorama
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── diorama_computer.py
│   │   │   │   │   ├── diorama.py
│   │   │   │   │   ├── draw.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── safezone.py
│   │   │   │   ├── handlers
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── main.py
│   │   │   │   ├── server.py
│   │   │   │   └── watchdog.py
│   │   │   ├── examples
│   │   │   │   ├── __init__.py
│   │   │   │   └── usage_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   ├── run_server.py
│   │   │   └── test_connection.py
│   │   ├── core
│   │   │   ├── core
│   │   │   │   ├── __init__.py
│   │   │   │   └── telemetry
│   │   │   │       ├── __init__.py
│   │   │   │       └── posthog.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── mcp-server
│   │   │   ├── mcp_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   └── server.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── scripts
│   │   │       ├── install_mcp_server.sh
│   │   │       └── start_mcp_server.sh
│   │   ├── pylume
│   │   │   ├── __init__.py
│   │   │   ├── pylume
│   │   │   │   ├── __init__.py
│   │   │   │   ├── client.py
│   │   │   │   ├── exceptions.py
│   │   │   │   ├── lume
│   │   │   │   ├── models.py
│   │   │   │   ├── pylume.py
│   │   │   │   └── server.py
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   └── som
│   │       ├── LICENSE
│   │       ├── poetry.toml
│   │       ├── pyproject.toml
│   │       ├── README.md
│   │       ├── som
│   │       │   ├── __init__.py
│   │       │   ├── detect.py
│   │       │   ├── detection.py
│   │       │   ├── models.py
│   │       │   ├── ocr.py
│   │       │   ├── util
│   │       │   │   └── utils.py
│   │       │   └── visualization.py
│   │       └── tests
│   │           └── test_omniparser.py
│   ├── typescript
│   │   ├── .gitignore
│   │   ├── .nvmrc
│   │   ├── agent
│   │   │   ├── examples
│   │   │   │   ├── playground-example.html
│   │   │   │   └── README.md
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── client.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   └── client.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── biome.json
│   │   ├── computer
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── computer
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── providers
│   │   │   │   │   │   ├── base.ts
│   │   │   │   │   │   ├── cloud.ts
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── base.ts
│   │   │   │   │   ├── factory.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── linux.ts
│   │   │   │   │   ├── macos.ts
│   │   │   │   │   └── windows.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   ├── computer
│   │   │   │   │   └── cloud.test.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── factory.test.ts
│   │   │   │   │   ├── index.test.ts
│   │   │   │   │   ├── linux.test.ts
│   │   │   │   │   ├── macos.test.ts
│   │   │   │   │   └── windows.test.ts
│   │   │   │   └── setup.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── core
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── index.ts
│   │   │   │   └── telemetry
│   │   │   │       ├── clients
│   │   │   │       │   ├── index.ts
│   │   │   │       │   └── posthog.ts
│   │   │   │       └── index.ts
│   │   │   ├── tests
│   │   │   │   └── telemetry.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── pnpm-workspace.yaml
│   │   └── README.md
│   └── xfce
│       ├── .dockerignore
│       ├── .gitignore
│       ├── Dockerfile
│       ├── README.md
│       └── src
│           ├── scripts
│           │   ├── resize-display.sh
│           │   ├── start-computer-server.sh
│           │   ├── start-novnc.sh
│           │   ├── start-vnc.sh
│           │   └── xstartup.sh
│           ├── supervisor
│           │   └── supervisord.conf
│           └── xfce-config
│               ├── helpers.rc
│               ├── xfce4-power-manager.xml
│               └── xfce4-session.xml
├── LICENSE.md
├── notebooks
│   ├── agent_nb.ipynb
│   ├── blog
│   │   ├── build-your-own-operator-on-macos-1.ipynb
│   │   └── build-your-own-operator-on-macos-2.ipynb
│   ├── composite_agents_docker_nb.ipynb
│   ├── computer_nb.ipynb
│   ├── computer_server_nb.ipynb
│   ├── customizing_computeragent.ipynb
│   ├── eval_osworld.ipynb
│   ├── ollama_nb.ipynb
│   ├── pylume_nb.ipynb
│   ├── README.md
│   ├── sota_hackathon_cloud.ipynb
│   └── sota_hackathon.ipynb
├── pdm.lock
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── samples
│   └── community
│       ├── global-online
│       │   └── README.md
│       └── hack-the-north
│           └── README.md
├── scripts
│   ├── build-uv.sh
│   ├── build.ps1
│   ├── build.sh
│   ├── cleanup.sh
│   ├── playground-docker.sh
│   ├── playground.sh
│   └── run-docker-dev.sh
└── tests
    ├── pytest.ini
    ├── shell_cmd.py
    ├── test_files.py
    ├── test_shell_bash.py
    ├── test_telemetry.py
    ├── test_venv.py
    └── test_watchdog.py
```

# Files

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/callbacks/logging.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Logging
description: Agent logging and custom logger implementation
---

# Logging Callback

Built-in logging callback and custom logger creation for agent monitoring.

## Callbacks Example

```python
from agent.callbacks import LoggingCallback
import logging

agent = ComputerAgent(
    model="anthropic/claude-3-5-sonnet-20241022",
    tools=[computer],
    callbacks=[
        LoggingCallback(
            logger=logging.getLogger("cua"), 
            level=logging.INFO
        )
    ]
)
```

## Shorthand

```python
agent = ComputerAgent(
    model="anthropic/claude-3-5-sonnet-20241022",
    tools=[computer],
    verbosity=logging.INFO  # Auto-adds LoggingCallback
)
```

## Custom Logger

Create custom loggers by extending AsyncCallbackHandler:

```python
from agent.callbacks.base import AsyncCallbackHandler
import logging

class CustomLogger(AsyncCallbackHandler):
    def __init__(self, logger_name="agent"):
        self.logger = logging.getLogger(logger_name)
        self.logger.setLevel(logging.INFO)
        
        # Add console handler
        handler = logging.StreamHandler()
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
        )
        handler.setFormatter(formatter)
        self.logger.addHandler(handler)
    
    async def on_run_start(self, kwargs, old_items):
        self.logger.info(f"Agent run started with model: {kwargs.get('model')}")
    
    async def on_computer_call_start(self, item):
        action = item.get('action', {})
        self.logger.info(f"Computer action: {action.get('type')}")
    
    async def on_usage(self, usage):
        cost = usage.get('response_cost', 0)
        self.logger.info(f"API call cost: ${cost:.4f}")
    
    async def on_run_end(self, kwargs, old_items, new_items):
        self.logger.info("Agent run completed")

# Use custom logger
agent = ComputerAgent(
    model="anthropic/claude-3-5-sonnet-20241022",
    tools=[computer],
    callbacks=[CustomLogger("my_agent")]
)
```

## Available Hooks

Log any agent event using these callback methods:
- `on_run_start/end` - Run lifecycle
- `on_computer_call_start/end` - Computer actions
- `on_api_start/end` - LLM API calls
- `on_usage` - Cost tracking
- `on_screenshot` - Screenshot events

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/chat-history.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Chat History
description: Managing conversation history and message arrays
---

Managing conversation history is essential for multi-turn agent interactions. The agent maintains a messages array that tracks the entire conversation flow.

## Managing History

### Continuous Conversation

```python
history = []

while True:
    user_input = input("> ")
    history.append({"role": "user", "content": user_input})

    async for result in agent.run(history, stream=False):
        history += result["output"]
```

## Message Array Structure

The messages array contains different types of messages that represent the conversation state:

```python
messages = [
    # user input
    {
        "role": "user",
        "content": "go to trycua on gh"
    },
    # first agent turn adds the model output to the history
    {
        "summary": [
            {
                "text": "Searching Firefox for Trycua GitHub",
                "type": "summary_text"
            }
        ],
        "type": "reasoning"
    },
    {
        "action": {
            "text": "Trycua GitHub",
            "type": "type"
        },
        "call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq",
        "status": "completed",
        "type": "computer_call"
    },
    # second agent turn adds the computer output to the history
    {
        "type": "computer_call_output",
        "call_id": "call_QI6OsYkXxl6Ww1KvyJc4LKKq",
        "output": {
            "type": "input_image",
            "image_url": "data:image/png;base64,..."
        }
    },
    # final agent turn adds the agent output text to the history
    {
        "type": "message",
        "role": "assistant",
        "content": [
          {
            "text": "Success! The Trycua GitHub page has been opened.",
            "type": "output_text"
          }
        ]
    }
]
```

## Message Types

See the complete schema in [Message Format](./message-format).

### Memory Management

For long conversations, consider using the `only_n_most_recent_images` parameter to manage memory:

```python
agent = ComputerAgent(
    model="anthropic/claude-3-5-sonnet-20241022",
    tools=[computer],
    only_n_most_recent_images=3
)
```

This automatically removes old images from the conversation history to prevent context window overflow.

```

--------------------------------------------------------------------------------
/.github/workflows/pypi-publish-computer-server.yml:
--------------------------------------------------------------------------------

```yaml
name: Publish Computer Server Package

on:
  push:
    tags:
      - "computer-server-v*"
  workflow_dispatch:
    inputs:
      version:
        description: "Version to publish (without v prefix)"
        required: true
        default: "0.1.0"
  workflow_call:
    inputs:
      version:
        description: "Version to publish"
        required: true
        type: string
    outputs:
      version:
        description: "The version that was published"
        value: ${{ jobs.prepare.outputs.version }}

# Adding permissions at workflow level
permissions:
  contents: write

jobs:
  prepare:
    runs-on: macos-latest
    outputs:
      version: ${{ steps.get-version.outputs.version }}
    steps:
      - uses: actions/checkout@v4

      - name: Determine version
        id: get-version
        run: |
          if [ "${{ github.event_name }}" == "push" ]; then
            # Extract version from tag (for package-specific tags)
            if [[ "${{ github.ref }}" =~ ^refs/tags/computer-server-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then
              VERSION=${BASH_REMATCH[1]}
            else
              echo "Invalid tag format for computer-server"
              exit 1
            fi
          elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
            # Use version from workflow dispatch
            VERSION=${{ github.event.inputs.version }}
          else
            # Use version from workflow_call
            VERSION=${{ inputs.version }}
          fi
          echo "VERSION=$VERSION"
          echo "version=$VERSION" >> $GITHUB_OUTPUT

      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: "3.10"

  publish:
    needs: prepare
    uses: ./.github/workflows/pypi-reusable-publish.yml
    with:
      package_name: "computer-server"
      package_dir: "libs/python/computer-server"
      version: ${{ needs.prepare.outputs.version }}
      is_lume_package: false
      base_package_name: "cua-computer-server"
    secrets:
      PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}

  set-env-variables:
    needs: [prepare, publish]
    runs-on: macos-latest
    steps:
      - name: Set environment variables for use in other jobs
        run: |
          echo "COMPUTER_VERSION=${{ needs.prepare.outputs.version }}" >> $GITHUB_ENV

```

--------------------------------------------------------------------------------
/libs/lume/src/VNC/PassphraseGenerator.swift:
--------------------------------------------------------------------------------

```swift
import Foundation
import CryptoKit

final class PassphraseGenerator {
    private let words: [String]
    
    init(words: [String] = PassphraseGenerator.defaultWords) {
        self.words = words
    }
    
    func prefix(_ count: Int) -> [String] {
        guard count > 0 else { return [] }
        
        // Use secure random number generation
        var result: [String] = []
        for _ in 0..<count {
            let randomBytes = (0..<4).map { _ in UInt8.random(in: 0...255) }
            let randomNumber = Data(randomBytes).withUnsafeBytes { bytes in
                bytes.load(as: UInt32.self)
            }
            let index = Int(randomNumber % UInt32(words.count))
            result.append(words[index])
        }
        return result
    }
    
    // A much larger set of common, easy-to-type words
    private static let defaultWords = [
        "alpha", "bravo", "charlie", "delta", "echo", "foxtrot", "golf", "hotel",
        "india", "juliet", "kilo", "lima", "mike", "november", "oscar", "papa",
        "quebec", "romeo", "sierra", "tango", "uniform", "victor", "whiskey", "xray",
        "yankee", "zulu", "zero", "one", "two", "three", "four", "five",
        "six", "seven", "eight", "nine", "apple", "banana", "cherry", "date",
        "elder", "fig", "grape", "honey", "iris", "jade", "kiwi", "lemon",
        "mango", "nectarine", "orange", "peach", "quince", "raspberry", "strawberry", "tangerine",
        "red", "blue", "green", "yellow", "purple", "orange", "pink", "brown",
        "black", "white", "gray", "silver", "gold", "copper", "bronze", "steel",
        "north", "south", "east", "west", "spring", "summer", "autumn", "winter",
        "river", "ocean", "mountain", "valley", "forest", "desert", "island", "beach",
        "sun", "moon", "star", "cloud", "rain", "snow", "wind", "storm",
        "happy", "brave", "calm", "swift", "wise", "kind", "bold", "free",
        "safe", "strong", "bright", "clear", "light", "soft", "warm", "cool",
        "eagle", "falcon", "hawk", "owl", "robin", "sparrow", "swan", "dove",
        "tiger", "lion", "bear", "wolf", "deer", "horse", "dolphin", "whale",
        "maple", "oak", "pine", "birch", "cedar", "fir", "palm", "willow",
        "rose", "lily", "daisy", "tulip", "lotus", "orchid", "violet", "jasmine"
    ]
}
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/benchmarks/introduction.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Introduction
description: Overview of benchmarking in the c/ua agent framework
---

The c/ua agent framework uses benchmarks to test the performance of supported models and providers at various agentic tasks.

## Benchmark Types

Computer-Agent benchmarks evaluate two key capabilities:
- **Plan Generation**: Breaking down complex tasks into a sequence of actions
- **Coordinate Generation**: Predicting precise click locations on GUI elements

## Using State-of-the-Art Models

Let's see how to use the SOTA vision-language models in the c/ua agent framework.

### Plan Generation + Coordinate Generation

**[OS-World](https://os-world.github.io/)** - Benchmark for complete computer-use agents

This leaderboard tests models that can understand instructions and automatically perform the full sequence of actions needed to complete tasks.

```python
# UI-TARS-1.5 is a SOTA unified plan generation + coordinate generation VLM
# This makes it suitable for agentic loops for computer-use
agent = ComputerAgent("huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", tools=[computer])
agent.run("Open Firefox and go to github.com")
# Success! 🎉
```

### Coordinate Generation Only

**[GUI Agent Grounding Leaderboard](https://gui-agent.github.io/grounding-leaderboard/)** - Benchmark for click prediction accuracy  

This leaderboard tests models that specialize in finding exactly where to click on screen elements, but needs to be told what specific action to take.

```python
# GTA1-7B is a SOTA coordinate generation VLM
# It can only generate coordinates, not plan:
agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B", tools=[computer])
agent.predict_click("find the button to open the settings") # (27, 450)
# This will raise an error:
# agent.run("Open Firefox and go to github.com") 
```

### Composed Agent

The c/ua agent framework also supports composed agents, which combine a planning model with a clicking model for the best of both worlds. Any liteLLM model can be used as the plan generation model.

```python
# It can be paired with any LLM to form a composed agent:
# "gemini/gemini-1.5-pro" will be used as the plan generation LLM
agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B+gemini/gemini-1.5-pro", tools=[computer])
agent.run("Open Firefox and go to github.com")
# Success! 🎉
```

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/loops/base.py:
--------------------------------------------------------------------------------

```python
"""
Base protocol for async agent configurations
"""

from typing import Protocol, List, Dict, Any, Optional, Tuple, Union
from abc import abstractmethod
from ..types import AgentCapability

class AsyncAgentConfig(Protocol):
    """Protocol defining the interface for async agent configurations."""
    
    @abstractmethod
    async def predict_step(
        self,
        messages: List[Dict[str, Any]],
        model: str,
        tools: Optional[List[Dict[str, Any]]] = None,
        max_retries: Optional[int] = None,
        stream: bool = False,
        computer_handler=None,
        _on_api_start=None,
        _on_api_end=None,
        _on_usage=None,
        _on_screenshot=None,
        **kwargs
    ) -> Dict[str, Any]:
        """
        Predict the next step based on input items.
        
        Args:
            messages: Input items following Responses format (message, function_call, computer_call)
            model: Model name to use
            tools: Optional list of tool schemas
            max_retries: Maximum number of retries for failed API calls
            stream: Whether to stream responses
            computer_handler: Computer handler instance
            _on_api_start: Callback for API start
            _on_api_end: Callback for API end
            _on_usage: Callback for usage tracking
            _on_screenshot: Callback for screenshot events
            **kwargs: Additional arguments
            
        Returns:
            Dictionary with "output" (output items) and "usage" array
        """
        ...
    
    @abstractmethod
    async def predict_click(
        self,
        model: str,
        image_b64: str,
        instruction: str
    ) -> Optional[Tuple[int, int]]:
        """
        Predict click coordinates based on image and instruction.
        
        Args:
            model: Model name to use
            image_b64: Base64 encoded image
            instruction: Instruction for where to click
            
        Returns:
            None or tuple with (x, y) coordinates
        """
        ...
    
    @abstractmethod
    def get_capabilities(self) -> List[AgentCapability]:
        """
        Get list of capabilities supported by this agent config.
        
        Returns:
            List of capability strings (e.g., ["step", "click"])
        """
        ...

```

--------------------------------------------------------------------------------
/libs/typescript/computer/src/computer/providers/cloud.ts:
--------------------------------------------------------------------------------

```typescript
import pino from 'pino';
import {
  type BaseComputerInterface,
  InterfaceFactory,
} from '../../interface/index';
import type { CloudComputerConfig, VMProviderType } from '../types';
import { BaseComputer } from './base';

/**
 * Cloud-specific computer implementation
 */
export class CloudComputer extends BaseComputer {
  protected static vmProviderType: VMProviderType.CLOUD;
  protected apiKey: string;
  private iface?: BaseComputerInterface;
  private initialized = false;

  protected logger = pino({ name: 'computer.provider_cloud' });

  constructor(config: CloudComputerConfig) {
    super(config);
    this.apiKey = config.apiKey;
  }

  get ip() {
    return `${this.name}.containers.cloud.trycua.com`;
  }

  /**
   * Initialize the cloud VM and interface
   */
  async run(): Promise<void> {
    if (this.initialized) {
      this.logger.info('Computer already initialized, skipping initialization');
      return;
    }

    try {
      // For cloud provider, the VM is already running, we just need to connect
      const ipAddress = this.ip;
      this.logger.info(`Connecting to cloud VM at ${ipAddress}`);

      // Create the interface with API key authentication
      this.iface = InterfaceFactory.createInterfaceForOS(
        this.osType,
        ipAddress,
        this.apiKey,
        this.name
      );

      // Wait for the interface to be ready
      this.logger.info('Waiting for interface to be ready...');
      await this.iface.waitForReady();

      this.initialized = true;
      this.logger.info('Cloud computer ready');
    } catch (error) {
      this.logger.error(`Failed to initialize cloud computer: ${error}`);
      throw new Error(`Failed to initialize cloud computer: ${error}`);
    }
  }

  /**
   * Stop the cloud computer (disconnect interface)
   */
  async stop(): Promise<void> {
    this.logger.info('Disconnecting from cloud computer...');

    if (this.iface) {
      this.iface.disconnect();
      this.iface = undefined;
    }

    this.initialized = false;
    this.logger.info('Disconnected from cloud computer');
  }

  /**
   * Get the computer interface
   */
  get interface(): BaseComputerInterface {
    if (!this.iface) {
      throw new Error('Computer not initialized. Call run() first.');
    }
    return this.iface;
  }

  /**
   * Disconnect from the cloud computer
   */
  async disconnect(): Promise<void> {
    await this.stop();
  }
}

```

--------------------------------------------------------------------------------
/libs/lume/src/Commands/Push.swift:
--------------------------------------------------------------------------------

```swift
import ArgumentParser
import Foundation

struct Push: AsyncParsableCommand {
    static let configuration = CommandConfiguration(
        abstract: "Push a macOS VM to GitHub Container Registry"
    )

    @Argument(help: "Name of the VM to push")
    var name: String

    @Argument(help: "Image tag to push (format: name:tag)")
    var image: String

    @Option(parsing: .upToNextOption, help: "Additional tags to push the same image to")
    var additionalTags: [String] = []

    @Option(help: "Github Container Registry to push to. Defaults to ghcr.io")
    var registry: String = "ghcr.io"

    @Option(help: "Organization to push to. Defaults to trycua")
    var organization: String = "trycua"

    @Option(name: .customLong("storage"), help: "VM storage location to use")
    var storage: String?

    @Option(help: "Chunk size for large files in MB. Defaults to 512.")
    var chunkSizeMb: Int = 512

    @Flag(name: .long, help: "Enable verbose logging")
    var verbose: Bool = false

    @Flag(name: .long, help: "Prepare files without uploading to registry")
    var dryRun: Bool = false
    
    @Flag(name: .long, help: "In dry-run mode, also reassemble chunks to verify integrity")
    var reassemble: Bool = true

    init() {}

    @MainActor
    func run() async throws {
        let controller = LumeController()

        // Parse primary image name and tag
        let components = image.split(separator: ":")
        guard components.count == 2, let primaryTag = components.last else {
            throw ValidationError("Invalid primary image format. Expected format: name:tag")
        }
        let imageName = String(components.first!)
        
        // Combine primary and additional tags, ensuring uniqueness
        var allTags: Swift.Set<String> = []
        allTags.insert(String(primaryTag))
        allTags.formUnion(additionalTags)
        
        guard !allTags.isEmpty else {
             throw ValidationError("At least one tag must be provided.")
        }
        
        try await controller.pushImage(
            name: name,
            imageName: imageName, // Pass base image name
            tags: Array(allTags), // Pass array of all unique tags
            registry: registry,
            organization: organization,
            storage: storage,
            chunkSizeMb: chunkSizeMb,
            verbose: verbose,
            dryRun: dryRun,
            reassemble: reassemble
        )
    }
} 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: All‑in‑one CUA Models
description: Models that support full computer-use agent capabilities with ComputerAgent.run()
---

These models support complete computer-use agent functionality through `ComputerAgent.run()`. They can understand natural language instructions and autonomously perform sequences of actions to complete tasks.

All agent loops are compatible with any LLM provider supported by LiteLLM.

See [Running Models Locally](../local-models) for how to use Hugging Face and MLX models on your own machine.

## Anthropic CUAs

Claude models with computer-use capabilities:

- Claude 4.5: `claude-sonnet-4-5-20250929`
- Claude 4.1: `claude-opus-4-1-20250805`
- Claude 4: `claude-opus-4-20250514`, `claude-sonnet-4-20250514`
- Claude 3.7: `claude-3-7-sonnet-20250219`
- Claude 3.5: `claude-3-5-sonnet-20241022`

```python
agent = ComputerAgent("claude-3-5-sonnet-20241022", tools=[computer])
async for _ in agent.run("Open Firefox and navigate to github.com"):
    pass
```

## OpenAI CUA Preview

OpenAI's computer-use preview model:

- Computer-use-preview: `computer-use-preview`

```python
agent = ComputerAgent("openai/computer-use-preview", tools=[computer])
async for _ in agent.run("Take a screenshot and describe what you see"):
    pass
```

## GLM-4.5V

Zhipu AI's GLM-4.5V vision-language model with computer-use capabilities:

- `openrouter/z-ai/glm-4.5v`
- `huggingface-local/zai-org/GLM-4.5V`

```python
agent = ComputerAgent("openrouter/z-ai/glm-4.5v", tools=[computer])
async for _ in agent.run("Click on the search bar and type 'hello world'"):
    pass
```

## InternVL 3.5

InternVL 3.5 family:
- `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}`

```python
agent = ComputerAgent("huggingface-local/OpenGVLab/InternVL3_5-1B", tools=[computer])
async for _ in agent.run("Open Firefox and navigate to github.com"):
    pass
```

## UI-TARS 1.5

Unified vision-language model for computer-use:

- `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B`
- `huggingface/ByteDance-Seed/UI-TARS-1.5-7B` (requires TGI endpoint)

```python
agent = ComputerAgent("huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", tools=[computer])
async for _ in agent.run("Open the settings menu and change the theme to dark mode"):
    pass
```

---

CUAs also support direct click prediction. See [Grounding Models](./grounding-models) for details on `predict_click()`.

For details on agent loop behavior and usage, see [Agent Loops](../agent-loops).

```

--------------------------------------------------------------------------------
/.github/workflows/pypi-publish-pylume.yml:
--------------------------------------------------------------------------------

```yaml
name: Publish Pylume Package

on:
  push:
    tags:
      - "pylume-v*"
  workflow_dispatch:
    inputs:
      version:
        description: "Version to publish (without v prefix)"
        required: true
        default: "0.1.0"
  workflow_call:
    inputs:
      version:
        description: "Version to publish"
        required: true
        type: string
    outputs:
      version:
        description: "The version that was published"
        value: ${{ jobs.determine-version.outputs.version }}

# Adding permissions at workflow level
permissions:
  contents: write

jobs:
  determine-version:
    runs-on: macos-latest
    outputs:
      version: ${{ steps.get-version.outputs.version }}
    steps:
      - uses: actions/checkout@v4

      - name: Determine version
        id: get-version
        run: |
          if [ "${{ github.event_name }}" == "push" ]; then
            # Extract version from tag (for package-specific tags)
            if [[ "${{ github.ref }}" =~ ^refs/tags/pylume-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then
              VERSION=${BASH_REMATCH[1]}
            else
              echo "Invalid tag format for pylume"
              exit 1
            fi
          elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
            # Use version from workflow dispatch
            VERSION=${{ github.event.inputs.version }}
          else
            # Use version from workflow_call
            VERSION=${{ inputs.version }}
          fi
          echo "VERSION=$VERSION"
          echo "version=$VERSION" >> $GITHUB_OUTPUT

  validate-version:
    runs-on: macos-latest
    needs: determine-version
    steps:
      - uses: actions/checkout@v4
      - name: Validate version
        id: validate-version
        run: |
          CODE_VERSION=$(grep '__version__' libs/python/pylume/pylume/__init__.py | cut -d'"' -f2)
          if [ "${{ needs.determine-version.outputs.version }}" != "$CODE_VERSION" ]; then
            echo "Version mismatch: expected $CODE_VERSION, got ${{ needs.determine-version.outputs.version }}"
            exit 1
          fi
          echo "Version validated: $CODE_VERSION"

  publish:
    needs: determine-version
    uses: ./.github/workflows/pypi-reusable-publish.yml
    with:
      package_name: "pylume"
      package_dir: "libs/python/pylume"
      version: ${{ needs.determine-version.outputs.version }}
      is_lume_package: true
      base_package_name: "pylume"
    secrets:
      PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}

```

--------------------------------------------------------------------------------
/libs/lume/src/FileSystem/VMLocation.swift:
--------------------------------------------------------------------------------

```swift
import Foundation

/// Represents a location where VMs can be stored
struct VMLocation: Codable, Equatable, Sendable {
    let name: String
    let path: String

    var expandedPath: String {
        (path as NSString).expandingTildeInPath
    }

    /// Validates the location path exists and is writable
    func validate() throws {
        let fullPath = expandedPath
        var isDir: ObjCBool = false

        if FileManager.default.fileExists(atPath: fullPath, isDirectory: &isDir) {
            if !isDir.boolValue {
                throw VMLocationError.notADirectory(path: fullPath)
            }

            if !FileManager.default.isWritableFile(atPath: fullPath) {
                throw VMLocationError.directoryNotWritable(path: fullPath)
            }
        } else {
            // Try to create the directory
            do {
                try FileManager.default.createDirectory(
                    atPath: fullPath,
                    withIntermediateDirectories: true
                )
            } catch {
                throw VMLocationError.directoryCreationFailed(path: fullPath, error: error)
            }
        }
    }
}

// MARK: - Errors

enum VMLocationError: Error, LocalizedError {
    case notADirectory(path: String)
    case directoryNotWritable(path: String)
    case directoryCreationFailed(path: String, error: Error)
    case locationNotFound(name: String)
    case duplicateLocationName(name: String)
    case invalidLocationName(name: String)
    case defaultLocationCannotBeRemoved(name: String)

    var errorDescription: String? {
        switch self {
        case .notADirectory(let path):
            return "Path is not a directory: \(path)"
        case .directoryNotWritable(let path):
            return "Directory is not writable: \(path)"
        case .directoryCreationFailed(let path, let error):
            return "Failed to create directory at \(path): \(error.localizedDescription)"
        case .locationNotFound(let name):
            return "VM location not found: \(name)"
        case .duplicateLocationName(let name):
            return "VM location with name '\(name)' already exists"
        case .invalidLocationName(let name):
            return
                "Invalid location name: \(name). Names should be alphanumeric with underscores or dashes."
        case .defaultLocationCannotBeRemoved(let name):
            return "Cannot remove the default location '\(name)'. Set a new default location first."
        }
    }
}

```

--------------------------------------------------------------------------------
/docs/content/docs/computer-sdk/computer-ui.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Computer UI
---

The computer module includes a Gradio UI for creating and sharing demonstration data. We make it easy for people to build community datasets for better computer use models with an upload to Huggingface feature.

```bash
# Install with UI support
pip install "cua-computer[ui]"
```

<Callout title="Note">
For precise control of the computer, we recommend using VNC or Screen Sharing instead of the Computer Gradio UI.
</Callout>

### Building and Sharing Demonstrations with Huggingface

Follow these steps to contribute your own demonstrations:

#### 1. Set up Huggingface Access

Set your HF_TOKEN in a .env file or in your environment variables:

```bash
# In .env file
HF_TOKEN=your_huggingface_token
```

#### 2. Launch the Computer UI

```python
# launch_ui.py
from computer.ui.gradio.app import create_gradio_ui
from dotenv import load_dotenv
load_dotenv('.env')

app = create_gradio_ui()
app.launch(share=False)
```

For examples, see [Computer UI Examples](https://github.com/trycua/cua/tree/main/examples/computer_ui_examples.py)

#### 3. Record Your Tasks

<details open>
<summary>View demonstration video</summary>
<video src="https://github.com/user-attachments/assets/de3c3477-62fe-413c-998d-4063e48de176" controls width="600"></video>
</details>

Record yourself performing various computer tasks using the UI.

#### 4. Save Your Demonstrations

<details open>
<summary>View demonstration video</summary>
<video src="https://github.com/user-attachments/assets/5ad1df37-026a-457f-8b49-922ae805faef" controls width="600"></video>
</details>

Save each task by picking a descriptive name and adding relevant tags (e.g., "office", "web-browsing", "coding").

#### 5. Record Additional Demonstrations

Repeat steps 3 and 4 until you have a good amount of demonstrations covering different tasks and scenarios.

#### 6. Upload to Huggingface

<details open>
<summary>View demonstration video</summary>
<video src="https://github.com/user-attachments/assets/c586d460-3877-4b5f-a736-3248886d2134" controls width="600"></video>
</details>

Upload your dataset to Huggingface by:
- Naming it as `{your_username}/{dataset_name}`
- Choosing public or private visibility
- Optionally selecting specific tags to upload only tasks with certain tags

#### Examples and Resources

- Example Dataset: [ddupont/test-dataset](https://huggingface.co/datasets/ddupont/test-dataset)
- Find Community Datasets: 🔍 [Browse CUA Datasets on Huggingface](https://huggingface.co/datasets?other=cua)
```

--------------------------------------------------------------------------------
/libs/xfce/src/xfce-config/xfce4-session.xml:
--------------------------------------------------------------------------------

```
<?xml version="1.0" encoding="UTF-8"?>
<channel name="xfce4-session" version="1.0">
  <property name="general" type="empty">
    <property name="FailsafeSessionName" type="string" value="Failsafe"/>
    <property name="SessionName" type="string" value="Default"/>
    <property name="SaveOnExit" type="bool" value="false"/>
  </property>
  <property name="sessions" type="empty">
    <property name="Failsafe" type="empty">
      <property name="IsFailsafe" type="bool" value="true"/>
      <property name="Count" type="int" value="5"/>
      <property name="Client0_Command" type="array">
        <value type="string" value="xfwm4"/>
      </property>
      <property name="Client0_Priority" type="int" value="15"/>
      <property name="Client0_PerScreen" type="bool" value="false"/>
      <property name="Client1_Command" type="array">
        <value type="string" value="xfce4-panel"/>
      </property>
      <property name="Client1_Priority" type="int" value="25"/>
      <property name="Client1_PerScreen" type="bool" value="false"/>
      <property name="Client2_Command" type="array">
        <value type="string" value="xfdesktop"/>
      </property>
      <property name="Client2_Priority" type="int" value="35"/>
      <property name="Client2_PerScreen" type="bool" value="false"/>
      <property name="Client3_Command" type="array">
        <value type="string" value="xfsettingsd"/>
      </property>
      <property name="Client3_Priority" type="int" value="10"/>
      <property name="Client3_PerScreen" type="bool" value="false"/>
      <property name="Client4_Command" type="array">
        <value type="string" value="xfce4-notifyd"/>
      </property>
      <property name="Client4_Priority" type="int" value="20"/>
      <property name="Client4_PerScreen" type="bool" value="false"/>
    </property>
  </property>
  <property name="splash" type="empty">
    <property name="Engine" type="string" value=""/>
  </property>
  <property name="compat" type="empty">
    <property name="LaunchGNOME" type="bool" value="false"/>
  </property>
  <property name="shutdown" type="empty">
    <property name="ShowSuspend" type="bool" value="false"/>
    <property name="ShowHibernate" type="bool" value="false"/>
    <property name="ShowHybridSleep" type="bool" value="false"/>
    <property name="ShowSwitchUser" type="bool" value="false"/>
  </property>
  <property name="screensaver" type="empty">
    <property name="enabled" type="bool" value="false"/>
    <property name="lock-enabled" type="bool" value="false"/>
  </property>
</channel>

```

--------------------------------------------------------------------------------
/examples/pylume_examples.py:
--------------------------------------------------------------------------------

```python
import asyncio
from pylume import PyLume, ImageRef, VMRunOpts, SharedDirectory, VMConfig, VMUpdateOpts


async def main():
    """Example usage of PyLume."""
    async with PyLume(port=7777, use_existing_server=False, debug=True) as pylume:

        # Get latest IPSW URL
        print("\n=== Getting Latest IPSW URL ===")
        url = await pylume.get_latest_ipsw_url()
        print("Latest IPSW URL:", url)

        # Create a new VM
        print("\n=== Creating a new VM ===")
        vm_config = VMConfig(
            name="lume-vm-new",
            os="macOS",
            cpu=2,
            memory="4GB",
            disk_size="64GB",  # type: ignore
            display="1024x768",
            ipsw="latest",
        )
        await pylume.create_vm(vm_config)

        # Get latest IPSW URL
        print("\n=== Getting Latest IPSW URL ===")
        url = await pylume.get_latest_ipsw_url()
        print("Latest IPSW URL:", url)

        # List available images
        print("\n=== Listing Available Images ===")
        images = await pylume.get_images()
        print("Available Images:", images)

        # List all VMs to verify creation
        print("\n=== Listing All VMs ===")
        vms = await pylume.list_vms()
        print("VMs:", vms)

        # Get specific VM details
        print("\n=== Getting VM Details ===")
        vm = await pylume.get_vm("lume-vm")
        print("VM Details:", vm)

        # Update VM settings
        print("\n=== Updating VM Settings ===")
        update_opts = VMUpdateOpts(cpu=8, memory="4GB")
        await pylume.update_vm("lume-vm", update_opts)

        # Pull an image
        image_ref = ImageRef(
            image="macos-sequoia-vanilla", tag="latest", registry="ghcr.io", organization="trycua"
        )
        await pylume.pull_image(image_ref, name="lume-vm-pulled")

        # Run with shared directory
        run_opts = VMRunOpts(
            no_display=False,  # type: ignore
            shared_directories=[  # type: ignore
                SharedDirectory(host_path="~/shared", read_only=False)  # type: ignore
            ],
        )
        await pylume.run_vm("lume-vm", run_opts)

        # Or simpler:
        await pylume.run_vm("lume-vm")

        # Clone VM
        print("\n=== Cloning VM ===")
        await pylume.clone_vm("lume-vm", "lume-vm-cloned")

        # Stop VM
        print("\n=== Stopping VM ===")
        await pylume.stop_vm("lume-vm")

        # Delete VM
        print("\n=== Deleting VM ===")
        await pylume.delete_vm("lume-vm-cloned")


if __name__ == "__main__":
    asyncio.run(main())

```

--------------------------------------------------------------------------------
/scripts/cleanup.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash

# Exit on error
set -e

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

# Function to print step information
print_step() {
    echo -e "${BLUE}==> $1${NC}"
}

# Function to print success message
print_success() {
    echo -e "${GREEN}==> Success: $1${NC}"
}

# Function to print error message
print_error() {
    echo -e "${RED}==> Error: $1${NC}" >&2
}

# Get the script's directory
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
PROJECT_ROOT="$SCRIPT_DIR/.."

# Change to project root
cd "$PROJECT_ROOT"

print_step "Starting cleanup of all caches and virtual environments..."

# Remove all virtual environments
print_step "Removing virtual environments..."
find . -type d -name ".venv" -exec rm -rf {} +
print_success "Virtual environments removed"

# Remove all Python cache files and directories
print_step "Removing Python cache files and directories..."
find . -type d -name "__pycache__" -exec rm -rf {} +
find . -type d -name ".pytest_cache" -exec rm -rf {} +
find . -type d -name ".mypy_cache" -exec rm -rf {} +
find . -type d -name ".ruff_cache" -exec rm -rf {} +
find . -name "*.pyc" -delete
find . -name "*.pyo" -delete
find . -name "*.pyd" -delete
print_success "Python cache files removed"

# Remove all build artifacts
print_step "Removing build artifacts..."
find . -type d -name "build" -exec rm -rf {} +
find . -type d -name "dist" -exec rm -rf {} +
find . -type d -name "*.egg-info" -exec rm -rf {} +
find . -type d -name "*.egg" -exec rm -rf {} +
print_success "Build artifacts removed"

# Remove PDM-related files and directories
print_step "Removing PDM-related files and directories..."
find . -name "pdm.lock" -delete
find . -type d -name ".pdm-build" -exec rm -rf {} +
find . -name ".pdm-python" -delete  # .pdm-python is a file, not a directory
print_success "PDM-related files removed"

# Remove MCP-related files
print_step "Removing MCP-related files..."
find . -name "mcp_server.log" -delete
print_success "MCP-related files removed"

# Remove .env file
print_step "Removing .env file..."
rm -f .env
print_success ".env file removed"

# Remove typings directory
print_step "Removing typings directory..."
rm -rf .vscode/typings
print_success "Typings directory removed"

# Clean up any temporary files
print_step "Removing temporary files..."
find . -name "*.tmp" -delete
find . -name "*.bak" -delete
find . -name "*.swp" -delete
print_success "Temporary files removed"

print_success "Cleanup complete! All caches and virtual environments have been removed."
print_step "To rebuild the project, run: bash scripts/build.sh"

```

--------------------------------------------------------------------------------
/tests/shell_cmd.py:
--------------------------------------------------------------------------------

```python
"""
Shell Command Tests (CMD)
Tests for the run_command method of the Computer interface using cmd.exe commands.
Required environment variables:
- CUA_API_KEY: API key for Cua cloud provider
- CUA_CONTAINER_NAME: Name of the container to use
"""

import os
import asyncio
import pytest
from pathlib import Path
import sys
import traceback

# Load environment variables from .env file
project_root = Path(__file__).parent.parent
env_file = project_root / ".env"
print(f"Loading environment from: {env_file}")
from dotenv import load_dotenv

load_dotenv(env_file)

# Add paths to sys.path if needed
pythonpath = os.environ.get("PYTHONPATH", "")
for path in pythonpath.split(":"):
    if path and path not in sys.path:
        sys.path.insert(0, path)  # Insert at beginning to prioritize
        print(f"Added to sys.path: {path}")

from computer import Computer, VMProviderType

@pytest.fixture(scope="session")
async def computer():
    """Shared Computer instance for all test cases."""
    # Create a remote Windows computer with Cua
    computer = Computer(
        os_type="windows",
        api_key=os.getenv("CUA_API_KEY"),
        name=str(os.getenv("CUA_CONTAINER_NAME")),
        provider_type=VMProviderType.CLOUD,
    )
    
    try:
        await computer.run()
        yield computer
    finally:
        await computer.disconnect()


# Sample test cases
@pytest.mark.asyncio(loop_scope="session")
async def test_cmd_echo_command(computer):
    """Test basic echo command with cmd.exe."""
    result = await computer.interface.run_command("echo Hello World")
    
    assert result.stdout.strip() == "Hello World"
    assert result.stderr == ""
    assert result.returncode == 0


@pytest.mark.asyncio(loop_scope="session")
async def test_cmd_dir_command(computer):
    """Test dir command to list directory contents."""
    result = await computer.interface.run_command("dir C:\\")
    
    assert result.returncode == 0
    assert result.stderr == ""
    assert "Directory of C:\\" in result.stdout
    assert "bytes" in result.stdout.lower()  # dir typically shows file sizes


@pytest.mark.asyncio(loop_scope="session")
async def test_cmd_command_with_error(computer):
    """Test command that produces an error."""
    result = await computer.interface.run_command("dir C:\\nonexistent_directory_12345")
    
    assert result.returncode != 0
    assert result.stdout == ""
    assert ("File Not Found" in result.stderr or 
            "cannot find the path" in result.stderr or
            "The system cannot find" in result.stderr)


if __name__ == "__main__":
    # Run tests directly
    pytest.main([__file__, "-v"])

```

--------------------------------------------------------------------------------
/tests/test_shell_bash.py:
--------------------------------------------------------------------------------

```python
"""
Shell Command Tests (Bash)
Tests for the run_command method of the Computer interface using bash commands.
Required environment variables:
- CUA_API_KEY: API key for Cua cloud provider
- CUA_CONTAINER_NAME: Name of the container to use
"""

import os
import asyncio
import pytest
from pathlib import Path
import sys
import traceback

# Load environment variables from .env file
project_root = Path(__file__).parent.parent
env_file = project_root / ".env"
print(f"Loading environment from: {env_file}")
from dotenv import load_dotenv

load_dotenv(env_file)

# Add paths to sys.path if needed
pythonpath = os.environ.get("PYTHONPATH", "")
for path in pythonpath.split(":"):
    if path and path not in sys.path:
        sys.path.insert(0, path)  # Insert at beginning to prioritize
        print(f"Added to sys.path: {path}")

from computer import Computer, VMProviderType

@pytest.fixture(scope="session")
async def computer():
    """Shared Computer instance for all test cases."""
    # Create a remote Linux computer with Cua
    computer = Computer(
        os_type="linux",
        api_key=os.getenv("CUA_API_KEY"),
        name=str(os.getenv("CUA_CONTAINER_NAME")),
        provider_type=VMProviderType.CLOUD,
    )
    
    try:
        await computer.run()
        yield computer
    finally:
        await computer.disconnect()


# Sample test cases
@pytest.mark.asyncio(loop_scope="session")
async def test_bash_echo_command(computer):
    """Test basic echo command with bash."""
    result = await computer.interface.run_command("echo 'Hello World'")
    
    assert result.stdout.strip() == "Hello World"
    assert result.stderr == ""
    assert result.returncode == 0


@pytest.mark.asyncio(loop_scope="session")
async def test_bash_ls_command(computer):
    """Test ls command to list directory contents."""
    result = await computer.interface.run_command("ls -la /tmp")
    
    assert result.returncode == 0
    assert result.stderr == ""
    assert "total" in result.stdout  # ls -la typically starts with "total"
    assert "." in result.stdout      # Current directory entry
    assert ".." in result.stdout     # Parent directory entry


@pytest.mark.asyncio(loop_scope="session")
async def test_bash_command_with_error(computer):
    """Test command that produces an error."""
    result = await computer.interface.run_command("ls /nonexistent_directory_12345")
    
    assert result.returncode != 0
    assert result.stdout == ""
    assert "No such file or directory" in result.stderr or "cannot access" in result.stderr


if __name__ == "__main__":
    # Run tests directly
    pytest.main([__file__, "-v"])

```

--------------------------------------------------------------------------------
/examples/evals/hud_eval_examples.py:
--------------------------------------------------------------------------------

```python
"""
hud_eval_examples.py — minimal HUD evaluation runner

- Auto-discovers .env anywhere up the directory tree (via find_dotenv)
- Requires HUD_API_KEY in the resolved environment
- No Docker/local computer usage
"""

#imports
import asyncio
import logging
import os
import uuid
from pathlib import Path
from pprint import pprint

from dotenv import load_dotenv, find_dotenv
from agent import ComputerAgent
from agent.integrations.hud import run_full_dataset

"""
Loading env
"""
def load_env_or_fail() -> None:
    # Walk up from CWD / file dir to find nearest .env
    env_path = find_dotenv(usecwd=False)
    if not env_path:
        raise FileNotFoundError(
            "❌ .env not found. Place a .env at your repo root (or export HUD_API_KEY)."
        )
    load_dotenv(env_path, override=True)
    if not os.getenv("HUD_API_KEY"):
        raise EnvironmentError("❌ HUD_API_KEY is missing in the loaded environment")

"""
Build Agent Config
- customize agent behavior, tool integration, callbacks, resource management, and more
- https://docs.trycua.com/docs/agent-sdk/agent-loops#parameters
- https://docs.trycua.com/docs/agent-sdk/supported-model-providers
"""
def build_agent_config() -> dict:

    instruction = "You are a computer-using agent graded by deterministic checkers."


    return {
        "model": "openai/computer-use-preview",
        "trajectory_dir": str(Path("trajectories")),
        "only_n_most_recent_images": 3,
        "verbosity": logging.INFO,
        "instruction": instruction,
    }

"""
Hud Eval
"""
async def run_hud_eval() -> None:
    #load env and agent config
    load_env_or_fail()
    agent_config = build_agent_config()

    # Initialize to ensure config is valid (tools, verbosity, etc.)
    _ = ComputerAgent(**agent_config)

    job_name = f"osworld-test-{str(uuid.uuid4())[:4]}" #job name (each run of your task is a job on hud)
    print(f"🚀 Running HUD eval: {job_name}")


    """
    Customize your hud eval below, check the doc for additional params
    - https://docs.trycua.com/docs/agent-sdk/integrations/hud#parameters-1
    - recommend low max steps (5-10) for testing, then max 100 for benchmarking
    - also select specific tasks to run by using splitting the dataset
    """
    results = await run_full_dataset(
        dataset="ddupont/OSWorld-Tiny-Public",
        job_name=job_name,
        **agent_config,
        max_concurrent=20,
        max_steps=50,
        # split="train[0:1]"
    )

    print(f"\n📊 Job: {job_name}")
    print(f"Total results: {len(results)}")
    pprint(results[:3])


def main() -> None:
    logging.basicConfig(level=logging.INFO)
    asyncio.run(run_hud_eval())


if __name__ == "__main__":
    main()

```

--------------------------------------------------------------------------------
/libs/typescript/computer/src/computer/providers/base.ts:
--------------------------------------------------------------------------------

```typescript
import os from "node:os";
import { Telemetry } from "@trycua/core";
import pino from "pino";
import type { OSType } from "../../types";
import type { BaseComputerConfig, Display, VMProviderType } from "../types";

const logger = pino({ name: "computer.provider_base" });

/**
 * Base Computer class with shared functionality
 */
export abstract class BaseComputer {
	protected name: string;
	protected osType: OSType;
	protected vmProvider?: VMProviderType;
	protected telemetry: Telemetry;

	constructor(config: BaseComputerConfig) {
		this.name = config.name;
		this.osType = config.osType;
		this.telemetry = new Telemetry();
		this.telemetry.recordEvent("module_init", {
			module: "computer",
			version: process.env.npm_package_version,
			node_version: process.version,
		});

		this.telemetry.recordEvent("computer_initialized", {
			os: os.platform(),
			os_version: os.version(),
			node_version: process.version,
		});
	}

	/**
	 * Get the name of the computer
	 */
	getName(): string {
		return this.name;
	}

	/**
	 * Get the OS type of the computer
	 */
	getOSType(): OSType {
		return this.osType;
	}

	/**
	 * Get the VM provider type
	 */
	getVMProviderType(): VMProviderType | undefined {
		return this.vmProvider;
	}

	/**
	 * Shared method available to all computer types
	 */
	async disconnect(): Promise<void> {
		logger.info(`Disconnecting from ${this.name}`);
		// Implementation would go here
	}

	/**
	 * Parse display string into Display object
	 * @param display Display string in format "WIDTHxHEIGHT"
	 * @returns Display object
	 */
	public static parseDisplayString(display: string): Display {
		const match = display.match(/^(\d+)x(\d+)$/);
		if (!match) {
			throw new Error(
				`Invalid display format: ${display}. Expected format: WIDTHxHEIGHT`,
			);
		}

		return {
			width: Number.parseInt(match[1], 10),
			height: Number.parseInt(match[2], 10),
		};
	}

	/**
	 * Parse memory string to MB integer.
	 *
	 * Examples:
	 *   "8GB" -> 8192
	 *   "1024MB" -> 1024
	 *   "512" -> 512
	 *
	 * @param memoryStr - Memory string to parse
	 * @returns Memory value in MB
	 */
	public static parseMemoryString(memoryStr: string): number {
		if (!memoryStr) {
			return 0;
		}

		// Convert to uppercase for case-insensitive matching
		const upperStr = memoryStr.toUpperCase().trim();

		// Extract numeric value and unit
		const match = upperStr.match(/^(\d+(?:\.\d+)?)\s*(GB|MB)?$/);
		if (!match) {
			throw new Error(`Invalid memory format: ${memoryStr}`);
		}

		const value = Number.parseFloat(match[1]);
		const unit = match[2] || "MB"; // Default to MB if no unit specified

		// Convert to MB
		if (unit === "GB") {
			return Math.round(value * 1024);
		}
		return Math.round(value);
	}
}

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/adapters/models/generic.py:
--------------------------------------------------------------------------------

```python
from typing import List, Dict, Any, Optional

# Hugging Face imports are local to avoid hard dependency at module import
try:
    import torch  # type: ignore
    from transformers import AutoModel, AutoProcessor  # type: ignore
    HF_AVAILABLE = True
except Exception:
    HF_AVAILABLE = False


class GenericHFModel:
    """Generic Hugging Face vision-language model handler.
    Loads an AutoModelForImageTextToText and AutoProcessor and generates text.
    """

    def __init__(self, model_name: str, device: str = "auto", trust_remote_code: bool = False) -> None:
        if not HF_AVAILABLE:
            raise ImportError(
                "HuggingFace transformers dependencies not found. Install with: pip install \"cua-agent[uitars-hf]\""
            )
        self.model_name = model_name
        self.device = device
        self.model = None
        self.processor = None
        self.trust_remote_code = trust_remote_code
        self._load()

    def _load(self) -> None:
        # Load model
        self.model = AutoModel.from_pretrained(
            self.model_name,
            torch_dtype=torch.float16,
            device_map=self.device,
            attn_implementation="sdpa",
            trust_remote_code=self.trust_remote_code,
        )
        # Load processor
        self.processor = AutoProcessor.from_pretrained(
            self.model_name,
            min_pixels=3136,
            max_pixels=4096 * 2160,
            device_map=self.device,
            trust_remote_code=self.trust_remote_code,
        )

    def generate(self, messages: List[Dict[str, Any]], max_new_tokens: int = 128) -> str:
        """Generate text for the given HF-format messages.
        messages: [{ role, content: [{type:'text'|'image', text|image}] }]
        """
        assert self.model is not None and self.processor is not None
        # Apply chat template and tokenize
        inputs = self.processor.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt",
        )
        # Move inputs to the same device as model
        inputs = inputs.to(self.model.device)
        # Generate
        with torch.no_grad():
            generated_ids = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
        # Trim prompt tokens from output
        generated_ids_trimmed = [
            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]
        # Decode
        output_text = self.processor.batch_decode(
            generated_ids_trimmed,
            skip_special_tokens=True,
            clean_up_tokenization_spaces=False,
        )
        return output_text[0] if output_text else ""

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/adapters/models/qwen2_5_vl.py:
--------------------------------------------------------------------------------

```python
from typing import List, Dict, Any, Optional

# Hugging Face imports are local to avoid hard dependency at module import
try:
    import torch  # type: ignore
    from transformers import AutoModelForImageTextToText, AutoProcessor  # type: ignore
    HF_AVAILABLE = True
except Exception:
    HF_AVAILABLE = False


class Qwen2_5_VLModel:
    """Qwen2.5-VL Hugging Face vision-language model handler.
    Loads an AutoModelForImageTextToText and AutoProcessor and generates text.
    """

    def __init__(self, model_name: str, device: str = "auto", trust_remote_code: bool = False) -> None:
        if not HF_AVAILABLE:
            raise ImportError(
                "HuggingFace transformers dependencies not found. Install with: pip install \"cua-agent[uitars-hf]\""
            )
        self.model_name = model_name
        self.device = device
        self.model = None
        self.processor = None
        self.trust_remote_code = trust_remote_code
        self._load()

    def _load(self) -> None:
        # Load model
        self.model = AutoModelForImageTextToText.from_pretrained(
            self.model_name,
            torch_dtype=torch.bfloat16,
            device_map=self.device,
            attn_implementation="sdpa",
            trust_remote_code=self.trust_remote_code,
        )
        # Load processor
        self.processor = AutoProcessor.from_pretrained(
            self.model_name,
            min_pixels=3136,
            max_pixels=4096 * 2160,
            device_map=self.device,
            trust_remote_code=self.trust_remote_code,
        )

    def generate(self, messages: List[Dict[str, Any]], max_new_tokens: int = 128) -> str:
        """Generate text for the given HF-format messages.
        messages: [{ role, content: [{type:'text'|'image', text|image}] }]
        """
        assert self.model is not None and self.processor is not None
        # Apply chat template and tokenize
        inputs = self.processor.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt",
        )
        # Move inputs to the same device as model
        inputs = inputs.to(self.model.device)
        # Generate
        with torch.no_grad():
            generated_ids = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
        # Trim prompt tokens from output
        generated_ids_trimmed = [
            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]
        # Decode
        output_text = self.processor.batch_decode(
            generated_ids_trimmed,
            skip_special_tokens=True,
            clean_up_tokenization_spaces=False,
        )
        return output_text[0] if output_text else ""

```

--------------------------------------------------------------------------------
/libs/python/computer/computer/utils.py:
--------------------------------------------------------------------------------

```python
import base64
from typing import Tuple, Optional, Dict, Any
from PIL import Image, ImageDraw
import io

def decode_base64_image(base64_str: str) -> bytes:
    """Decode a base64 string into image bytes."""
    return base64.b64decode(base64_str)

def encode_base64_image(image_bytes: bytes) -> str:
    """Encode image bytes to base64 string."""
    return base64.b64encode(image_bytes).decode('utf-8')

def bytes_to_image(image_bytes: bytes) -> Image.Image:
    """Convert bytes to PIL Image.
    
    Args:
        image_bytes: Raw image bytes
        
    Returns:
        PIL.Image: The converted image
    """
    return Image.open(io.BytesIO(image_bytes))

def image_to_bytes(image: Image.Image, format: str = 'PNG') -> bytes:
    """Convert PIL Image to bytes."""
    buf = io.BytesIO()
    image.save(buf, format=format)
    return buf.getvalue()

def resize_image(image_bytes: bytes, scale_factor: float) -> bytes:
    """Resize an image by a scale factor.
    
    Args:
        image_bytes: The original image as bytes
        scale_factor: Factor to scale the image by (e.g., 0.5 for half size, 2.0 for double)
        
    Returns:
        bytes: The resized image as bytes
    """
    image = bytes_to_image(image_bytes)
    if scale_factor != 1.0:
        new_size = (int(image.width * scale_factor), int(image.height * scale_factor))
        image = image.resize(new_size, Image.Resampling.LANCZOS)
    return image_to_bytes(image)

def draw_box(
    image_bytes: bytes,
    x: int,
    y: int,
    width: int,
    height: int,
    color: str = "#FF0000",
    thickness: int = 2
) -> bytes:
    """Draw a box on an image.
    
    Args:
        image_bytes: The original image as bytes
        x: X coordinate of top-left corner
        y: Y coordinate of top-left corner
        width: Width of the box
        height: Height of the box
        color: Color of the box in hex format
        thickness: Thickness of the box border in pixels
        
    Returns:
        bytes: The modified image as bytes
    """
    # Convert bytes to PIL Image
    image = bytes_to_image(image_bytes)
    
    # Create drawing context
    draw = ImageDraw.Draw(image)
    
    # Draw rectangle
    draw.rectangle(
        [(x, y), (x + width, y + height)],
        outline=color,
        width=thickness
    )
    
    # Convert back to bytes
    return image_to_bytes(image)

def get_image_size(image_bytes: bytes) -> Tuple[int, int]:
    """Get the dimensions of an image.
    
    Args:
        image_bytes: The image as bytes
        
    Returns:
        Tuple[int, int]: Width and height of the image
    """
    image = bytes_to_image(image_bytes)
    return image.size

def parse_vm_info(vm_info: Dict[str, Any]) -> Optional[Dict[str, Any]]:
    """Parse VM info from pylume response."""
    if not vm_info:
        return None 
```

--------------------------------------------------------------------------------
/examples/computer-example-ts/src/index.ts:
--------------------------------------------------------------------------------

```typescript
import { Computer, OSType } from "@trycua/computer";
import OpenAI from "openai";
import { executeAction } from "./helpers";

import "dotenv/config";

const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });

const COMPUTER_USE_PROMPT = "Open firefox and go to trycua.com";

// Initialize the Computer Connection
const computer = new Computer({
	apiKey: process.env.CUA_API_KEY!,
	name: process.env.CUA_CONTAINER_NAME!,
	osType: OSType.LINUX,
});

await computer.run();
// Take the initial screenshot
const screenshot = await computer.interface.screenshot();
const screenshotBase64 = screenshot.toString("base64");

// Setup openai config for computer use
const computerUseConfig: OpenAI.Responses.ResponseCreateParamsNonStreaming = {
	model: "computer-use-preview",
	tools: [
		{
			type: "computer_use_preview",
			display_width: 1024,
			display_height: 768,
			environment: "linux", // we're using a linux vm
		},
	],
	truncation: "auto",
};

// Send initial screenshot to the openai computer use model
let res = await openai.responses.create({
	...computerUseConfig,
	input: [
		{
			role: "user",
			content: [
				// what we want the ai to do
				{ type: "input_text", text: COMPUTER_USE_PROMPT },
				// current screenshot of the vm
				{
					type: "input_image",
					image_url: `data:image/png;base64,${screenshotBase64}`,
					detail: "auto",
				},
			],
		},
	],
});

// Loop until there are no more computer use actions.
while (true) {
	const computerCalls = res.output.filter((o) => o.type === "computer_call");
	if (computerCalls.length < 1) {
		console.log("No more computer calls. Loop complete.");
		break;
	}
	// Get the first call
	const call = computerCalls[0];
	const action = call.action;
	console.log("Received action from OpenAI Responses API:", action);
	let ackChecks: OpenAI.Responses.ResponseComputerToolCall.PendingSafetyCheck[] =
		[];
	if (call.pending_safety_checks.length > 0) {
		console.log("Safety checks pending:", call.pending_safety_checks);
		// In a real implementation, you would want to get user confirmation here
		ackChecks = call.pending_safety_checks;
	}

	// Execute the action in the container
	await executeAction(computer, action);
	// Wait for changes to process within the container (1sec)
	await new Promise((resolve) => setTimeout(resolve, 1000));

	// Capture new screenshot
	const newScreenshot = await computer.interface.screenshot();
	const newScreenshotBase64 = newScreenshot.toString("base64");

	// Screenshot back as computer_call_output

	res = await openai.responses.create({
		...computerUseConfig,
		previous_response_id: res.id,
		input: [
			{
				type: "computer_call_output",
				call_id: call.call_id,
				acknowledged_safety_checks: ackChecks,
				output: {
					type: "computer_screenshot",
					image_url: `data:image/png;base64,${newScreenshotBase64}`,
				},
			},
		],
	});
}

process.exit();

```

--------------------------------------------------------------------------------
/libs/python/computer/computer/logger.py:
--------------------------------------------------------------------------------

```python
"""Logging utilities for the Computer module."""

import logging
from enum import IntEnum


# Keep LogLevel for backward compatibility, but it will be deprecated
class LogLevel(IntEnum):
    """Log levels for logging. Deprecated - use standard logging levels instead."""

    QUIET = 0  # Only warnings and errors
    NORMAL = 1  # Info level, standard output
    VERBOSE = 2  # More detailed information
    DEBUG = 3  # Full debug information


# Map LogLevel to standard logging levels for backward compatibility
LOGLEVEL_MAP = {
    LogLevel.QUIET: logging.WARNING,
    LogLevel.NORMAL: logging.INFO,
    LogLevel.VERBOSE: logging.DEBUG,
    LogLevel.DEBUG: logging.DEBUG,
}


class Logger:
    """Logger class for Computer."""

    def __init__(self, name: str, verbosity: int):
        """Initialize the logger.

        Args:
            name: The name of the logger.
            verbosity: The log level (use standard logging levels like logging.INFO).
                       For backward compatibility, LogLevel enum values are also accepted.
        """
        self.logger = logging.getLogger(name)

        # Convert LogLevel enum to standard logging level if needed
        if isinstance(verbosity, LogLevel):
            self.verbosity = LOGLEVEL_MAP.get(verbosity, logging.INFO)
        else:
            self.verbosity = verbosity

        self._configure()

    def _configure(self):
        """Configure the logger based on log level."""
        # Set the logging level directly
        self.logger.setLevel(self.verbosity)

        # Log the verbosity level that was set
        if self.verbosity <= logging.DEBUG:
            self.logger.info("Logger set to DEBUG level")
        elif self.verbosity <= logging.INFO:
            self.logger.info("Logger set to INFO level")
        elif self.verbosity <= logging.WARNING:
            self.logger.warning("Logger set to WARNING level")
        elif self.verbosity <= logging.ERROR:
            self.logger.warning("Logger set to ERROR level")
        elif self.verbosity <= logging.CRITICAL:
            self.logger.warning("Logger set to CRITICAL level")

    def debug(self, message: str):
        """Log a debug message if log level is DEBUG or lower."""
        self.logger.debug(message)

    def info(self, message: str):
        """Log an info message if log level is INFO or lower."""
        self.logger.info(message)

    def verbose(self, message: str):
        """Log a verbose message between INFO and DEBUG levels."""
        # Since there's no standard verbose level,
        # use debug level with [VERBOSE] prefix for backward compatibility
        self.logger.debug(f"[VERBOSE] {message}")

    def warning(self, message: str):
        """Log a warning message."""
        self.logger.warning(message)

    def error(self, message: str):
        """Log an error message."""
        self.logger.error(message)

```

--------------------------------------------------------------------------------
/docs/content/docs/computer-sdk/sandboxed-python.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Sandboxed Python
slug: sandboxed-python
---

<Callout>A corresponding <a href="https://github.com/trycua/cua/blob/main/examples/sandboxed_functions_examples.py" target="_blank">Python example</a> is available for this documentation.</Callout>

You can run Python functions securely inside a sandboxed virtual environment on a remote Cua Computer. This is useful for executing untrusted user code, isolating dependencies, or providing a safe environment for automation tasks.

## How It Works

The `sandboxed` decorator from the Computer SDK wraps a Python function so that it is executed remotely in a specified virtual environment on the target Computer. The function and its arguments are serialized, sent to the remote, and executed in isolation. Results or errors are returned to the caller.

## Example Usage

```python
from computer import Computer
from computer.helpers import sandboxed

@sandboxed()
def read_file(location: str) -> str:
    """Read contents of a file"""
    with open(location, 'r') as f:
        return f.read()

async def main():
    async with Computer(os_type="linux", provider_type="cloud", name="my-container", api_key="...") as computer:
        # Call the sandboxed function (runs remotely)
        result = await read_file("/etc/hostname")
        print(result)
```

## Installing Python Packages

You can specify the virtual environment name and target computer:

```python
@sandboxed(venv_name="myenv", computer=my_computer, max_retries=5)
def my_function(...):
    ...
```

You can also install packages in the virtual environment using the `venv_install` method:

```python
await my_computer.venv_install("myenv", ["requests"])
```

## Example: Interacting with macOS Applications

You can use sandboxed functions to interact with macOS applications on a local Cua Computer (requires `os_type="darwin"`). This is particularly useful for automation tasks that involve GUI applications.

```python
# Example: Use sandboxed functions to execute code in a Cua Container
from computer.helpers import sandboxed

await computer.venv_install("demo_venv", ["macos-pyxa"]) # Install packages in a virtual environment

@sandboxed("demo_venv")
def greet_and_print(name):
    """Get the HTML of the current Safari tab"""
    import PyXA
    safari = PyXA.Application("Safari")
    html = safari.current_document.source()
    print(f"Hello from inside the container, {name}!")
    return {"greeted": name, "safari_html": html}

# When a @sandboxed function is called, it will execute in the container
result = await greet_and_print("Cua")
# Result: {"greeted": "Cua", "safari_html": "<html>...</html>"}
# stdout and stderr are also captured and printed / raised
print("Result from sandboxed function:", result)
```

## Error Handling

If the remote execution fails, the decorator will retry up to `max_retries` times. If all attempts fail, the last exception is raised locally.

```

--------------------------------------------------------------------------------
/libs/python/computer/computer/providers/cloud/provider.py:
--------------------------------------------------------------------------------

```python
"""Cloud VM provider implementation.

This module contains a stub implementation for a future cloud VM provider.
"""

import logging
from typing import Dict, List, Optional, Any

from ..base import BaseVMProvider, VMProviderType

# Setup logging
logger = logging.getLogger(__name__)

import asyncio
import aiohttp
from urllib.parse import urlparse

class CloudProvider(BaseVMProvider):
    """Cloud VM Provider implementation."""
    def __init__(
        self,
        api_key: str,
        verbose: bool = False,
        **kwargs,
    ):
        """
        Args:
            api_key: API key for authentication
            name: Name of the VM
            verbose: Enable verbose logging
        """
        assert api_key, "api_key required for CloudProvider"
        self.api_key = api_key
        self.verbose = verbose

    @property
    def provider_type(self) -> VMProviderType:
        return VMProviderType.CLOUD

    async def __aenter__(self):
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        pass

    async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
        """Get VM VNC URL by name using the cloud API."""
        return {"name": name, "hostname": f"{name}.containers.cloud.trycua.com"}

    async def list_vms(self) -> List[Dict[str, Any]]:
        logger.warning("CloudProvider.list_vms is not implemented")
        return []

    async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
        # logger.warning("CloudProvider.run_vm is not implemented")
        return {"name": name, "status": "unavailable", "message": "CloudProvider.run_vm is not implemented"}

    async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
        logger.warning("CloudProvider.stop_vm is not implemented. To clean up resources, please use Computer.disconnect()")
        return {"name": name, "status": "stopped", "message": "CloudProvider is not implemented"}

    async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
        logger.warning("CloudProvider.update_vm is not implemented")
        return {"name": name, "status": "unchanged", "message": "CloudProvider is not implemented"}

    async def get_ip(self, name: Optional[str] = None, storage: Optional[str] = None, retry_delay: int = 2) -> str:
        """
        Return the VM's IP address as '{container_name}.containers.cloud.trycua.com'.
        Uses the provided 'name' argument (the VM name requested by the caller),
        falling back to self.name only if 'name' is None.
        Retries up to 3 times with retry_delay seconds if hostname is not available.
        """
        if name is None:
            raise ValueError("VM name is required for CloudProvider.get_ip")
        return f"{name}.containers.cloud.trycua.com"

```

--------------------------------------------------------------------------------
/docs/content/docs/libraries/computer-server/Commands.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Supported Commands
description: List of all commands supported by the Computer Server API (WebSocket and REST).
---

# Commands Reference

This page lists all supported commands for the Computer Server, available via both WebSocket and REST API endpoints.

| Command             | Description                                |
|---------------------|--------------------------------------------|
| version             | Get protocol and package version info       |
| run_command         | Run a shell command                        |
| screenshot          | Capture a screenshot                       |
| get_screen_size     | Get the screen size                        |
| get_cursor_position | Get the current mouse cursor position      |
| mouse_down          | Mouse button down                          |
| mouse_up            | Mouse button up                            |
| left_click          | Left mouse click                           |
| right_click         | Right mouse click                          |
| double_click        | Double mouse click                         |
| move_cursor         | Move mouse cursor to coordinates           |
| drag_to             | Drag mouse to coordinates                  |
| drag                | Drag mouse by offset                       |
| key_down            | Keyboard key down                          |
| key_up              | Keyboard key up                            |
| type_text           | Type text                                  |
| press_key           | Press a single key                         |
| hotkey              | Press a hotkey combination                 |
| scroll              | Scroll the screen                          |
| scroll_down         | Scroll down                                |
| scroll_up           | Scroll up                                  |
| copy_to_clipboard   | Copy text to clipboard                     |
| set_clipboard       | Set clipboard content                      |
| file_exists         | Check if a file exists                     |
| directory_exists    | Check if a directory exists                |
| list_dir            | List files/directories in a directory      |
| read_text           | Read text from a file                      |
| write_text          | Write text to a file                       |
| read_bytes          | Read bytes from a file                     |
| write_bytes         | Write bytes to a file                      |
| get_file_size       | Get file size                              |
| delete_file         | Delete a file                              |
| create_dir          | Create a directory                         |
| delete_dir          | Delete a directory                         |
| get_accessibility_tree | Get accessibility tree (if supported)    |
| find_element        | Find element in accessibility tree         |
| diorama_cmd         | Run a diorama command (if supported)       |

```

--------------------------------------------------------------------------------
/libs/lume/tests/VNCServiceTests.swift:
--------------------------------------------------------------------------------

```swift
import Foundation
import Testing
@testable import lume

@Test("VNCService starts correctly")
func testVNCServiceStart() async throws {
    let tempDir = try createTempDirectory()
    let vmDir = VMDirectory(Path(tempDir.path))
    let service = await MockVNCService(vmDirectory: vmDir)
    
    // Initial state
    let isRunning = await service.isRunning
    let url = await service.url
    #expect(!isRunning)
    #expect(url == nil)
    
    // Start service
    try await service.start(port: 5900, virtualMachine: nil)
    #expect(await service.isRunning)
    #expect(await service.url?.contains("5900") ?? false)
}

@Test("VNCService stops correctly")
func testVNCServiceStop() async throws {
    let tempDir = try createTempDirectory()
    let vmDir = VMDirectory(Path(tempDir.path))
    let service = await MockVNCService(vmDirectory: vmDir)
    try await service.start(port: 5900, virtualMachine: nil)
    
    await service.stop()
    let isRunning = await service.isRunning
    let url = await service.url
    #expect(!isRunning)
    #expect(url == nil)
}

@Test("VNCService handles client operations")
func testVNCServiceClient() async throws {
    let tempDir = try createTempDirectory()
    let vmDir = VMDirectory(Path(tempDir.path))
    let service = await MockVNCService(vmDirectory: vmDir)
    
    // Should fail when not started
    do {
        try await service.openClient(url: "vnc://localhost:5900")
        #expect(Bool(false), "Expected openClient to throw when not started")
    } catch VMError.vncNotConfigured {
        // Expected error
    } catch {
        #expect(Bool(false), "Expected vncNotConfigured error but got \(error)")
    }
    
    // Start and try client operations
    try await service.start(port: 5900, virtualMachine: nil)
    try await service.openClient(url: "vnc://localhost:5900")
    #expect(await service.clientOpenCount == 1)
    
    // Stop and verify client operations fail
    await service.stop()
    do {
        try await service.openClient(url: "vnc://localhost:5900")
        #expect(Bool(false), "Expected openClient to throw after stopping")
    } catch VMError.vncNotConfigured {
        // Expected error
    } catch {
        #expect(Bool(false), "Expected vncNotConfigured error but got \(error)")
    }
}

@Test("VNCService handles virtual machine attachment")
func testVNCServiceVMAttachment() async throws {
    let tempDir = try createTempDirectory()
    let vmDir = VMDirectory(Path(tempDir.path))
    let service = await MockVNCService(vmDirectory: vmDir)
    let mockVM = "mock_vm"
    
    try await service.start(port: 5900, virtualMachine: mockVM)
    let attachedVM = await service.attachedVM
    #expect(attachedVM == mockVM)
}

private func createTempDirectory() throws -> URL {
    let tempDir = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString)
    try FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: true)
    return tempDir
} 
```

--------------------------------------------------------------------------------
/libs/python/computer-server/computer_server/handlers/factory.py:
--------------------------------------------------------------------------------

```python
import platform
import subprocess
from typing import Tuple, Type
from .base import BaseAccessibilityHandler, BaseAutomationHandler, BaseFileHandler
from computer_server.diorama.base import BaseDioramaHandler

# Conditionally import platform-specific handlers
system = platform.system().lower()
if system == 'darwin':
    from .macos import MacOSAccessibilityHandler, MacOSAutomationHandler
    from computer_server.diorama.macos import MacOSDioramaHandler
elif system == 'linux':
    from .linux import LinuxAccessibilityHandler, LinuxAutomationHandler
elif system == 'windows':
    from .windows import WindowsAccessibilityHandler, WindowsAutomationHandler

from .generic import GenericFileHandler

class HandlerFactory:
    """Factory for creating OS-specific handlers."""
    
    @staticmethod
    def _get_current_os() -> str:
        """Determine the current OS.
        
        Returns:
            str: The OS type ('darwin' for macOS, 'linux' for Linux, or 'windows' for Windows)
            
        Raises:
            RuntimeError: If unable to determine the current OS
        """
        try:
            # Use platform.system() as primary method
            system = platform.system().lower()
            if system in ['darwin', 'linux', 'windows']:
                return system
                
            # Fallback to uname if platform.system() doesn't return expected values (Unix-like systems only)
            result = subprocess.run(['uname', '-s'], capture_output=True, text=True)
            if result.returncode == 0:
                return result.stdout.strip().lower()
            
            raise RuntimeError(f"Unsupported OS: {system}")
        except Exception as e:
            raise RuntimeError(f"Failed to determine current OS: {str(e)}")
    
    @staticmethod
    def create_handlers() -> Tuple[BaseAccessibilityHandler, BaseAutomationHandler, BaseDioramaHandler, BaseFileHandler]:
        """Create and return appropriate handlers for the current OS.
        
        Returns:
            Tuple[BaseAccessibilityHandler, BaseAutomationHandler, BaseDioramaHandler, BaseFileHandler]: A tuple containing
            the appropriate accessibility, automation, diorama, and file handlers for the current OS.
        
        Raises:
            NotImplementedError: If the current OS is not supported
            RuntimeError: If unable to determine the current OS
        """
        os_type = HandlerFactory._get_current_os()
        
        if os_type == 'darwin':
            return MacOSAccessibilityHandler(), MacOSAutomationHandler(), MacOSDioramaHandler(), GenericFileHandler()
        elif os_type == 'linux':
            return LinuxAccessibilityHandler(), LinuxAutomationHandler(), BaseDioramaHandler(), GenericFileHandler()
        elif os_type == 'windows':
            return WindowsAccessibilityHandler(), WindowsAutomationHandler(), BaseDioramaHandler(), GenericFileHandler()
        else:
            raise NotImplementedError(f"OS '{os_type}' is not supported")

```

--------------------------------------------------------------------------------
/libs/lume/tests/VM/VMDetailsPrinterTests.swift:
--------------------------------------------------------------------------------

```swift
import Foundation
import Testing

@testable import lume

struct VMDetailsPrinterTests {

    @Test func printStatus_whenJSON() throws {
        // Given
        let vms: [VMDetails] = [
            VMDetails(
                name: "name",
                os: "os",
                cpuCount: 2,
                memorySize: 1024,
                diskSize: .init(allocated: 24, total: 30),
                display: "1024x768",
                status: "status",
                vncUrl: "vncUrl",
                ipAddress: "0.0.0.0",
                locationName: "mockLocation")
        ]
        let jsonEncoder = JSONEncoder()
        jsonEncoder.outputFormatting = .prettyPrinted
        let expectedOutput = try String(data: jsonEncoder.encode(vms), encoding: .utf8)!

        // When
        var printedStatus: String?
        try VMDetailsPrinter.printStatus(vms, format: .json, print: { printedStatus = $0 })

        // Then
        // Decode both JSONs and compare the actual data structures
        let jsonDecoder = JSONDecoder()
        let printedVMs = try jsonDecoder.decode(
            [VMDetails].self, from: printedStatus!.data(using: .utf8)!)
        let expectedVMs = try jsonDecoder.decode(
            [VMDetails].self, from: expectedOutput.data(using: .utf8)!)

        #expect(printedVMs.count == expectedVMs.count)
        for (printed, expected) in zip(printedVMs, expectedVMs) {
            #expect(printed.name == expected.name)
            #expect(printed.os == expected.os)
            #expect(printed.cpuCount == expected.cpuCount)
            #expect(printed.memorySize == expected.memorySize)
            #expect(printed.diskSize.allocated == expected.diskSize.allocated)
            #expect(printed.diskSize.total == expected.diskSize.total)
            #expect(printed.status == expected.status)
            #expect(printed.vncUrl == expected.vncUrl)
            #expect(printed.ipAddress == expected.ipAddress)
        }
    }

    @Test func printStatus_whenNotJSON() throws {
        // Given
        let vms: [VMDetails] = [
            VMDetails(
                name: "name",
                os: "os",
                cpuCount: 2,
                memorySize: 1024,
                diskSize: .init(allocated: 24, total: 30),
                display: "1024x768",
                status: "status",
                vncUrl: "vncUrl",
                ipAddress: "0.0.0.0",
                locationName: "mockLocation")
        ]

        // When
        var printedLines: [String] = []
        try VMDetailsPrinter.printStatus(vms, format: .text, print: { printedLines.append($0) })

        // Then
        #expect(printedLines.count == 2)

        let headerParts = printedLines[0].split(whereSeparator: \.isWhitespace)
        #expect(
            headerParts == [
                "name", "os", "cpu", "memory", "disk", "display", "status", "storage", "shared_dirs", "ip", "vnc",
            ])

        #expect(
            printedLines[1].split(whereSeparator: \.isWhitespace).map(String.init) == [
                "name", "os", "2", "0.00G", "24.0B/30.0B", "1024x768", "status", "mockLocation",
                "-",
                "0.0.0.0",
                "vncUrl",
            ])
    }
}

```

--------------------------------------------------------------------------------
/libs/lume/src/Server/HTTP.swift:
--------------------------------------------------------------------------------

```swift
import Foundation
import Network

enum HTTPError: Error {
    case internalError
}

struct HTTPRequest {
    let method: String
    let path: String
    let headers: [String: String]
    let body: Data?
    
    init?(data: Data) {
        guard let requestString = String(data: data, encoding: .utf8) else { return nil }
        let components = requestString.components(separatedBy: "\r\n\r\n")
        guard components.count >= 1 else { return nil }
        
        let headerLines = components[0].components(separatedBy: "\r\n")
        guard !headerLines.isEmpty else { return nil }
        
        // Parse request line
        let requestLine = headerLines[0].components(separatedBy: " ")
        guard requestLine.count >= 2 else { return nil }
        
        self.method = requestLine[0]
        self.path = requestLine[1]
        
        // Parse headers
        var headers: [String: String] = [:]
        for line in headerLines.dropFirst() {
            let headerComponents = line.split(separator: ":", maxSplits: 1).map(String.init)
            if headerComponents.count == 2 {
                headers[headerComponents[0].trimmingCharacters(in: .whitespaces)] = 
                    headerComponents[1].trimmingCharacters(in: .whitespaces)
            }
        }
        self.headers = headers
        
        // Parse body if present
        if components.count > 1 {
            self.body = components[1].data(using: .utf8)
        } else {
            self.body = nil
        }
    }
}

struct HTTPResponse {
    enum StatusCode: Int {
        case ok = 200
        case accepted = 202
        case badRequest = 400
        case notFound = 404
        case internalServerError = 500
        
        var description: String {
            switch self {
            case .ok: return "OK"
            case .accepted: return "Accepted"
            case .badRequest: return "Bad Request"
            case .notFound: return "Not Found"
            case .internalServerError: return "Internal Server Error"
            }
        }
    }
    
    let statusCode: StatusCode
    let headers: [String: String]
    let body: Data?
    
    init(statusCode: StatusCode, headers: [String: String] = [:], body: Data? = nil) {
        self.statusCode = statusCode
        self.headers = headers
        self.body = body
    }
    
    init(statusCode: StatusCode, body: String) {
        self.statusCode = statusCode
        self.headers = ["Content-Type": "text/plain"]
        self.body = body.data(using: .utf8)
    }
    
    func serialize() -> Data {
        var response = "HTTP/1.1 \(statusCode.rawValue) \(statusCode.description)\r\n"
        
        var headers = self.headers
        if let body = body {
            headers["Content-Length"] = "\(body.count)"
        }
        
        for (key, value) in headers {
            response += "\(key): \(value)\r\n"
        }
        
        response += "\r\n"
        
        var responseData = response.data(using: .utf8) ?? Data()
        if let body = body {
            responseData.append(body)
        }
        
        return responseData
    }
}

final class HTTPServer {
    let port: UInt16
    
    init(port: UInt16) {
        self.port = port
    }
} 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/callbacks/pii_anonymization.py:
--------------------------------------------------------------------------------

```python
"""
PII anonymization callback handler using Microsoft Presidio for text and image redaction.
"""

from typing import List, Dict, Any, Optional, Tuple
from .base import AsyncCallbackHandler
import base64
import io
import logging

try:
    # TODO: Add Presidio dependencies
    from PIL import Image
    PRESIDIO_AVAILABLE = True
except ImportError:
    PRESIDIO_AVAILABLE = False

logger = logging.getLogger(__name__)

class PIIAnonymizationCallback(AsyncCallbackHandler):
    """
    Callback handler that anonymizes PII in text and images using Microsoft Presidio.
    
    This handler:
    1. Anonymizes PII in messages before sending to the agent loop
    2. Deanonymizes PII in tool calls and message outputs after the agent loop
    3. Redacts PII from images in computer_call_output messages
    """
    
    def __init__(
        self,
        # TODO: Any extra kwargs if needed
    ):
        """
        Initialize the PII anonymization callback.
        
        Args:
            anonymize_text: Whether to anonymize text content
            anonymize_images: Whether to redact images
            entities_to_anonymize: List of entity types to anonymize (None for all)
            anonymization_operator: Presidio operator to use ("replace", "mask", "redact", etc.)
            image_redaction_color: RGB color for image redaction
        """
        if not PRESIDIO_AVAILABLE:
            raise ImportError(
                "Presidio is not available. Install with: "
                "pip install cua-agent[pii-anonymization]"
            )
        
        # TODO: Implement __init__
    
    async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """
        Anonymize PII in messages before sending to agent loop.
        
        Args:
            messages: List of message dictionaries
            
        Returns:
            List of messages with PII anonymized
        """
        anonymized_messages = []
        for msg in messages:
            anonymized_msg = await self._anonymize_message(msg)
            anonymized_messages.append(anonymized_msg)
        
        return anonymized_messages
    
    async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """
        Deanonymize PII in tool calls and message outputs after agent loop.
        
        Args:
            output: List of output dictionaries
            
        Returns:
            List of output with PII deanonymized for tool calls
        """
        deanonymized_output = []
        for item in output:
            # Only deanonymize tool calls and computer_call messages
            if item.get("type") in ["computer_call", "computer_call_output"]:
                deanonymized_item = await self._deanonymize_item(item)
                deanonymized_output.append(deanonymized_item)
            else:
                deanonymized_output.append(item)
        
        return deanonymized_output
    
    async def _anonymize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
        # TODO: Implement _anonymize_message
        return message
    
    async def _deanonymize_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
        # TODO: Implement _deanonymize_item
        return item

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Grounding Models
description: Models that support click prediction with ComputerAgent.predict_click()
---

These models specialize in UI element grounding and click prediction. They can identify precise coordinates for UI elements based on natural language descriptions, but cannot perform autonomous task planning.

Use `ComputerAgent.predict_click()` to get coordinates for specific UI elements.

All models that support `ComputerAgent.run()` also support `ComputerAgent.predict_click()`. See [All‑in‑one CUAs](./computer-use-agents).

### Anthropic CUAs

- Claude 4.1: `claude-opus-4-1-20250805`
- Claude 4: `claude-opus-4-20250514`, `claude-sonnet-4-20250514`
- Claude 3.7: `claude-3-7-sonnet-20250219`
- Claude 3.5: `claude-3-5-sonnet-20241022`

### OpenAI CUA Preview
- Computer-use-preview: `computer-use-preview`

### UI-TARS 1.5 (Unified VLM with grounding support)
- `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B`
- `huggingface/ByteDance-Seed/UI-TARS-1.5-7B` (requires TGI endpoint)

## Specialized Grounding Models

These models are optimized specifically for click prediction and UI element grounding:

### OpenCUA
- `huggingface-local/xlangai/OpenCUA-{7B,32B}`

### GTA1 Family
- `huggingface-local/HelloKKMe/GTA1-{7B,32B,72B}`

### Holo 1.5 Family
- `huggingface-local/Hcompany/Holo1.5-{3B,7B,72B}`

### InternVL 3.5 Family
- `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}`

### OmniParser (OCR)

OCR-focused set-of-marks model that requires an LLM for click prediction:

- `omniparser` (requires combination with any LiteLLM vision model)

### Moondream3 (Local Grounding)

Moondream3 is a powerful small model that can perform UI grounding and click prediction.

- `moondream3`

## Usage Examples

```python
# Using any grounding model for click prediction
agent = ComputerAgent("claude-3-5-sonnet-20241022", tools=[computer])

# Predict coordinates for specific elements
login_coords = agent.predict_click("find the login button")
search_coords = agent.predict_click("locate the search text field")
menu_coords = agent.predict_click("find the hamburger menu icon")

print(f"Login button: {login_coords}")
print(f"Search field: {search_coords}")
print(f"Menu icon: {menu_coords}")
```

```python
# OmniParser is just for OCR, so it requires an LLM for predict_click
agent = ComputerAgent("omniparser+anthropic/claude-3-5-sonnet-20241022", tools=[computer])

# Predict click coordinates using composed agent
coords = agent.predict_click("find the submit button")
print(f"Click coordinates: {coords}")  # (450, 320)

# Note: Cannot use omniparser alone for click prediction
# This will raise an error:
# agent = ComputerAgent("omniparser", tools=[computer])
# coords = agent.predict_click("find button")  # Error!
```

```python
agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B", tools=[computer])

# Predict click coordinates for UI elements
coords = agent.predict_click("find the submit button")
print(f"Click coordinates: {coords}")  # (450, 320)

# Note: GTA1 cannot perform autonomous task planning
# This will raise an error:
# agent.run("Fill out the form and submit it")
```

---

For information on combining grounding models with planning capabilities, see [Composed Agents](./composed-agents) and [All‑in‑one CUAs](./computer-use-agents).

```

--------------------------------------------------------------------------------
/libs/python/computer-server/computer_server/server.py:
--------------------------------------------------------------------------------

```python
"""
Server interface for Computer API.
Provides a clean API for starting and stopping the server.
"""

import asyncio
import logging
import uvicorn
from typing import Optional
from fastapi import FastAPI

from .main import app as fastapi_app

logger = logging.getLogger(__name__)


class Server:
    """
    Server interface for Computer API.

    Usage:
        from computer_api import Server

        # Synchronous usage
        server = Server()
        server.start()  # Blocks until server is stopped

        # Asynchronous usage
        server = Server()
        await server.start_async()  # Starts server in background
        # Do other things
        await server.stop()  # Stop the server
    """

    def __init__(self, host: str = "0.0.0.0", port: int = 8000, log_level: str = "info", 
                 ssl_keyfile: Optional[str] = None, ssl_certfile: Optional[str] = None):
        """
        Initialize the server.

        Args:
            host: Host to bind the server to
            port: Port to bind the server to
            log_level: Logging level (debug, info, warning, error, critical)
            ssl_keyfile: Path to SSL private key file (for HTTPS)
            ssl_certfile: Path to SSL certificate file (for HTTPS)
        """
        self.host = host
        self.port = port
        self.log_level = log_level
        self.ssl_keyfile = ssl_keyfile
        self.ssl_certfile = ssl_certfile
        self.app = fastapi_app
        self._server_task: Optional[asyncio.Task] = None
        self._should_exit = asyncio.Event()

    def start(self) -> None:
        """
        Start the server synchronously. This will block until the server is stopped.
        """
        uvicorn.run(
            self.app, 
            host=self.host, 
            port=self.port, 
            log_level=self.log_level,
            ssl_keyfile=self.ssl_keyfile,
            ssl_certfile=self.ssl_certfile
        )

    async def start_async(self) -> None:
        """
        Start the server asynchronously. This will return immediately and the server
        will run in the background.
        """
        server_config = uvicorn.Config(
            self.app, 
            host=self.host, 
            port=self.port, 
            log_level=self.log_level,
            ssl_keyfile=self.ssl_keyfile,
            ssl_certfile=self.ssl_certfile
        )

        self._should_exit.clear()
        server = uvicorn.Server(server_config)

        # Create a task to run the server
        self._server_task = asyncio.create_task(server.serve())

        # Wait a short time to ensure the server starts
        await asyncio.sleep(0.5)

        protocol = "https" if self.ssl_certfile else "http"
        logger.info(f"Server started at {protocol}://{self.host}:{self.port}")

    async def stop(self) -> None:
        """
        Stop the server if it's running asynchronously.
        """
        if self._server_task and not self._server_task.done():
            # Signal the server to exit
            self._should_exit.set()

            # Cancel the server task
            self._server_task.cancel()

            try:
                await self._server_task
            except asyncio.CancelledError:
                logger.info("Server stopped")

            self._server_task = None

```

--------------------------------------------------------------------------------
/libs/lume/src/VM/VMDetailsPrinter.swift:
--------------------------------------------------------------------------------

```swift
import Foundation

/// Prints VM status information in a formatted table
enum VMDetailsPrinter {
    /// Represents a column in the VM status table
    private struct Column: Sendable {
        let header: String
        let width: Int
        let getValue: @Sendable (VMDetails) -> String
    }

    /// Configuration for all columns in the status table
    private static let columns: [Column] = [
        Column(header: "name", width: 34, getValue: { $0.name }),
        Column(header: "os", width: 8, getValue: { $0.os }),
        Column(header: "cpu", width: 8, getValue: { String($0.cpuCount) }),
        Column(
            header: "memory", width: 8,
            getValue: {
                String(format: "%.2fG", Float($0.memorySize) / (1024 * 1024 * 1024))
            }),
        Column(
            header: "disk", width: 16,
            getValue: {
                "\($0.diskSize.formattedAllocated)/\($0.diskSize.formattedTotal)"
            }),
        Column(header: "display", width: 12, getValue: { $0.display }),
        Column(
            header: "status", width: 16,
            getValue: {
                $0.status
            }),
        Column(header: "storage", width: 16, getValue: { $0.locationName }),
        Column(
            header: "shared_dirs", width: 54,
            getValue: { vm in
                // Only show shared directories if the VM is running
                if vm.status == "running", let dirs = vm.sharedDirectories, !dirs.isEmpty {
                    return dirs.map { "\($0.hostPath) (\($0.readOnly ? "ro" : "rw"))" }.joined(separator: ", ")
                } else {
                    return "-"
                }
            }),
        Column(
            header: "ip", width: 16,
            getValue: {
                $0.ipAddress ?? "-"
            }),
        Column(
            header: "vnc", width: 50,
            getValue: {
                $0.vncUrl ?? "-"
            }),
    ]

    /// Prints the status of all VMs in a formatted table
    /// - Parameter vms: Array of VM status objects to display
    static func printStatus(
        _ vms: [VMDetails], format: FormatOption, print: (String) -> Void = { print($0) }
    ) throws {
        if format == .json {
            let jsonEncoder = JSONEncoder()
            jsonEncoder.outputFormatting = .prettyPrinted
            let jsonData = try jsonEncoder.encode(vms)
            let jsonString = String(data: jsonData, encoding: .utf8)!
            print(jsonString)
        } else {
            printHeader(print: print)
            vms.forEach({ vm in 
                printVM(vm, print: print)
            })
        }
    }

    private static func printHeader(print: (String) -> Void = { print($0) }) {
        let paddedHeaders = columns.map { $0.header.paddedToWidth($0.width) }
        print(paddedHeaders.joined())
    }

    private static func printVM(_ vm: VMDetails, print: (String) -> Void = { print($0) }) {
        let paddedColumns = columns.map { column in
            column.getValue(vm).paddedToWidth(column.width)
        }
        print(paddedColumns.joined())
    }
}

extension String {
    /// Pads the string to the specified width with spaces
    /// - Parameter width: Target width for padding
    /// - Returns: Padded string
    fileprivate func paddedToWidth(_ width: Int) -> String {
        padding(toLength: width, withPad: " ", startingAt: 0)
    }
}

```

--------------------------------------------------------------------------------
/libs/lume/src/VM/DarwinVM.swift:
--------------------------------------------------------------------------------

```swift
import Foundation

/// macOS-specific virtual machine implementation
@MainActor
final class DarwinVM: VM {
    private let imageLoader: ImageLoader

    init(
        vmDirContext: VMDirContext,
        virtualizationServiceFactory: @escaping (VMVirtualizationServiceContext) throws -> VMVirtualizationService = { try DarwinVirtualizationService(configuration: $0) },
        vncServiceFactory: @escaping (VMDirectory) -> VNCService = { DefaultVNCService(vmDirectory: $0) },
        imageLoader: ImageLoader
    ) {
        self.imageLoader = imageLoader
        super.init(
            vmDirContext: vmDirContext,
            virtualizationServiceFactory: virtualizationServiceFactory,
            vncServiceFactory: vncServiceFactory
        )
    }

    override func getOSType() -> String {
        return "macOS"
    }

    // MARK: - Installation and Configuration
    
    override func setup(ipswPath: String, cpuCount: Int, memorySize: UInt64, diskSize: UInt64, display: String) async throws {
        let imagePath: Path
        if ipswPath == "latest" {
            Logger.info("Downloading latest supported Image...")
            let downloadedPath = try await self.imageLoader.downloadLatestImage()
            imagePath = Path(downloadedPath.path)
        } else {
            imagePath = Path(ipswPath)
        }

        let requirements = try await imageLoader.loadImageRequirements(from: imagePath.url)
        try setDiskSize(diskSize)

        let finalCpuCount = max(cpuCount, requirements.minimumSupportedCPUCount)
        try setCpuCount(finalCpuCount)
        if finalCpuCount != cpuCount {
            Logger.info("CPU count overridden due to minimum image requirements", metadata: ["original": "\(cpuCount)", "final": "\(finalCpuCount)"])
        }

        let finalMemorySize = max(memorySize, requirements.minimumSupportedMemorySize)
        try setMemorySize(finalMemorySize)
        if finalMemorySize != memorySize {
            Logger.info("Memory size overridden due to minimum image requirements", metadata: ["original": "\(memorySize)", "final": "\(finalMemorySize)"])
        }

        try updateVMConfig(
            vmConfig: try VMConfig(
                os: getOSType(),
                cpuCount: finalCpuCount,
                memorySize: finalMemorySize,
                diskSize: diskSize,
                macAddress: DarwinVirtualizationService.generateMacAddress(),
                display: display,
                hardwareModel: requirements.hardwareModel,
                machineIdentifier: DarwinVirtualizationService.generateMachineIdentifier()
            )
        )

        let service: any VMVirtualizationService = try virtualizationServiceFactory(
            try createVMVirtualizationServiceContext(
                cpuCount: finalCpuCount,
                memorySize: finalMemorySize,
                display: display
            )
        )
        guard let darwinService = service as? DarwinVirtualizationService else {
            throw VMError.internalError("Installation requires DarwinVirtualizationService")
        }

        // Create auxiliary storage with hardware model
        try darwinService.createAuxiliaryStorage(at: vmDirContext.nvramPath, hardwareModel: requirements.hardwareModel)

        try await darwinService.installMacOS(imagePath: imagePath) { progress in
            Logger.info("Installing macOS", metadata: ["progress": "\(Int(progress * 100))%"])
        }
    }
}

```

--------------------------------------------------------------------------------
/scripts/build.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash

# Exit on error
set -e

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

# Function to print step information
print_step() {
    echo -e "${BLUE}==> $1${NC}"
}

# Function to print success message
print_success() {
    echo -e "${GREEN}==> Success: $1${NC}"
}

# Function to print error message
print_error() {
    echo -e "${RED}==> Error: $1${NC}" >&2
}

# Get the script's directory
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
PROJECT_ROOT="$( cd "${SCRIPT_DIR}/.." && pwd )"

# Change to project root
cd "$PROJECT_ROOT"

# Load environment variables from .env.local
if [ -f .env.local ]; then
    print_step "Loading environment variables from .env.local..."
    set -a
    source .env.local
    set +a
    print_success "Environment variables loaded"
else
    print_error ".env.local file not found"
    exit 1
fi

# Clean up existing environments and cache
print_step "Cleaning up existing environments..."
find . -type d -name "__pycache__" -exec rm -rf {} +
find . -type d -name ".pytest_cache" -exec rm -rf {} +
find . -type d -name "dist" -exec rm -rf {} +
find . -type d -name ".venv" -exec rm -rf {} +
find . -type d -name "*.egg-info" -exec rm -rf {} +
print_success "Environment cleanup complete"

# Create and activate virtual environment
print_step "Creating virtual environment..."
python -m venv .venv
source .venv/bin/activate

# Upgrade pip and install build tools
print_step "Upgrading pip and installing build tools..."
python -m pip install --upgrade pip setuptools wheel

# Function to install a package and its dependencies
install_package() {
    local package_dir=$1
    local package_name=$2
    local extras=$3
    print_step "Installing ${package_name}..."
    cd "$package_dir"
    
    if [ -f "pyproject.toml" ]; then
        if [ -n "$extras" ]; then
            pip install -e ".[${extras}]"
        else
            pip install -e .
        fi
    else
        print_error "No pyproject.toml found in ${package_dir}"
        return 1
    fi
    
    cd "$PROJECT_ROOT"
}

# Install packages in order of dependency
print_step "Installing packages in development mode..."

# Install core first (base package with telemetry support)
install_package "libs/python/core" "core"

# Install pylume (base dependency)
install_package "libs/python/pylume" "pylume"

# Install computer with all its dependencies and extras
install_package "libs/python/computer" "computer" "all"

# Install omniparser
install_package "libs/python/som" "som"

# Install agent with all its dependencies and extras
install_package "libs/python/agent" "agent" "all"

# Install computer-server
install_package "libs/python/computer-server" "computer-server"

# Install mcp-server
install_package "libs/python/mcp-server" "mcp-server"

# Install development tools from root project
print_step "Installing development dependencies..."
pip install -e ".[dev,test,docs]"

# Create a .env file for VS Code to use the virtual environment
print_step "Creating .env file for VS Code..."
echo "PYTHONPATH=${PROJECT_ROOT}/libs/python/core:${PROJECT_ROOT}/libs/python/computer:${PROJECT_ROOT}/libs/python/agent:${PROJECT_ROOT}/libs/python/som:${PROJECT_ROOT}/libs/python/pylume:${PROJECT_ROOT}/libs/python/computer-server:${PROJECT_ROOT}/libs/python/mcp-server" > .env

print_success "All packages installed successfully!"
print_step "Your virtual environment is ready. To activate it:"
echo "  source .venv/bin/activate"

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/callbacks/image_retention.py:
--------------------------------------------------------------------------------

```python
"""
Image retention callback handler that limits the number of recent images in message history.
"""

from typing import List, Dict, Any, Optional
from .base import AsyncCallbackHandler


class ImageRetentionCallback(AsyncCallbackHandler):
    """
    Callback handler that applies image retention policy to limit the number
    of recent images in message history to prevent context window overflow.
    """
    
    def __init__(self, only_n_most_recent_images: Optional[int] = None):
        """
        Initialize the image retention callback.
        
        Args:
            only_n_most_recent_images: If set, only keep the N most recent images in message history
        """
        self.only_n_most_recent_images = only_n_most_recent_images
    
    async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """
        Apply image retention policy to messages before sending to agent loop.
        
        Args:
            messages: List of message dictionaries
            
        Returns:
            List of messages with image retention policy applied
        """
        if self.only_n_most_recent_images is None:
            return messages
        
        return self._apply_image_retention(messages)
    
    def _apply_image_retention(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Apply image retention policy to keep only the N most recent images.
        
        Removes computer_call_output items with image_url and their corresponding computer_call items,
        keeping only the most recent N image pairs based on only_n_most_recent_images setting.
        
        Args:
            messages: List of message dictionaries
            
        Returns:
            Filtered list of messages with image retention applied
        """
        if self.only_n_most_recent_images is None:
            return messages

        # Gather indices of all computer_call_output messages that contain an image_url
        output_indices: List[int] = []
        for idx, msg in enumerate(messages):
            if msg.get("type") == "computer_call_output":
                out = msg.get("output")
                if isinstance(out, dict) and ("image_url" in out):
                    output_indices.append(idx)

        # Nothing to trim
        if len(output_indices) <= self.only_n_most_recent_images:
            return messages

        # Determine which outputs to keep (most recent N)
        keep_output_indices = set(output_indices[-self.only_n_most_recent_images :])

        # Build set of indices to remove in one pass
        to_remove: set[int] = set()

        for idx in output_indices:
            if idx in keep_output_indices:
                continue  # keep this screenshot and its context

            to_remove.add(idx)  # remove the computer_call_output itself

            # Remove the immediately preceding computer_call with matching call_id (if present)
            call_id = messages[idx].get("call_id")
            prev_idx = idx - 1
            if prev_idx >= 0 and messages[prev_idx].get("type") == "computer_call" and messages[prev_idx].get("call_id") == call_id:
                to_remove.add(prev_idx)
                # Check a single reasoning immediately before that computer_call
                r_idx = prev_idx - 1
                if r_idx >= 0 and messages[r_idx].get("type") == "reasoning":
                    to_remove.add(r_idx)

        # Construct filtered list
        filtered = [m for i, m in enumerate(messages) if i not in to_remove]
        return filtered
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/interface/models.py:
--------------------------------------------------------------------------------

```python
from enum import Enum
from typing import Dict, List, Any, TypedDict, Union, Literal
from dataclasses import dataclass

@dataclass
class CommandResult:
    stdout: str
    stderr: str  
    returncode: int
    
    def __init__(self, stdout: str, stderr: str, returncode: int):
        self.stdout = stdout
        self.stderr = stderr
        self.returncode = returncode

# Navigation key literals
NavigationKey = Literal['pagedown', 'pageup', 'home', 'end', 'left', 'right', 'up', 'down']

# Special key literals
SpecialKey = Literal['enter', 'esc', 'tab', 'space', 'backspace', 'del']

# Modifier key literals
ModifierKey = Literal['ctrl', 'alt', 'shift', 'win', 'command', 'option']

# Function key literals
FunctionKey = Literal['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12']

class Key(Enum):
    """Keyboard keys that can be used with press_key.
    
    These key names map to PyAutoGUI's expected key names.
    """
    # Navigation
    PAGE_DOWN = 'pagedown'
    PAGE_UP = 'pageup'
    HOME = 'home'
    END = 'end'
    LEFT = 'left'
    RIGHT = 'right'
    UP = 'up'
    DOWN = 'down'
    
    # Special keys
    RETURN = 'enter'
    ENTER = 'enter'
    ESCAPE = 'esc'
    ESC = 'esc'
    TAB = 'tab'
    SPACE = 'space'
    BACKSPACE = 'backspace'
    DELETE = 'del'
    
    # Modifier keys
    ALT = 'alt'
    CTRL = 'ctrl'
    SHIFT = 'shift'
    WIN = 'win'
    COMMAND = 'command'
    OPTION = 'option'
    
    # Function keys
    F1 = 'f1'
    F2 = 'f2'
    F3 = 'f3'
    F4 = 'f4'
    F5 = 'f5'
    F6 = 'f6'
    F7 = 'f7'
    F8 = 'f8'
    F9 = 'f9'
    F10 = 'f10'
    F11 = 'f11'
    F12 = 'f12'

    @classmethod
    def from_string(cls, key: str) -> 'Key | str':
        """Convert a string key name to a Key enum value.
        
        Args:
            key: String key name to convert
            
        Returns:
            Key enum value if the string matches a known key,
            otherwise returns the original string for single character keys
        """
        # Map common alternative names to enum values
        key_mapping = {
            'page_down': cls.PAGE_DOWN,
            'page down': cls.PAGE_DOWN,
            'pagedown': cls.PAGE_DOWN,
            'page_up': cls.PAGE_UP,
            'page up': cls.PAGE_UP,
            'pageup': cls.PAGE_UP,
            'return': cls.RETURN,
            'enter': cls.ENTER,
            'escape': cls.ESCAPE,
            'esc': cls.ESC,
            'delete': cls.DELETE,
            'del': cls.DELETE,
            # Modifier key mappings
            'alt': cls.ALT,
            'ctrl': cls.CTRL,
            'control': cls.CTRL,
            'shift': cls.SHIFT,
            'win': cls.WIN,
            'windows': cls.WIN,
            'super': cls.WIN,
            'command': cls.COMMAND,
            'cmd': cls.COMMAND,
            '⌘': cls.COMMAND,
            'option': cls.OPTION,
            '⌥': cls.OPTION,
        }
        
        normalized = key.lower().strip()
        return key_mapping.get(normalized, key)

# Combined key type
KeyType = Union[Key, NavigationKey, SpecialKey, ModifierKey, FunctionKey, str]

# Key type for mouse actions
MouseButton = Literal['left', 'right', 'middle']

class AccessibilityWindow(TypedDict):
    """Information about a window in the accessibility tree."""
    app_name: str
    pid: int
    frontmost: bool
    has_windows: bool
    windows: List[Dict[str, Any]]

class AccessibilityTree(TypedDict):
    """Complete accessibility tree information."""
    success: bool
    frontmost_application: str
    windows: List[AccessibilityWindow] 
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/providers/base.py:
--------------------------------------------------------------------------------

```python
"""Base provider interface for VM backends."""

import abc
from enum import StrEnum
from typing import Dict, List, Optional, Any, AsyncContextManager


class VMProviderType(StrEnum):
    """Enum of supported VM provider types."""
    LUME = "lume"
    LUMIER = "lumier"
    CLOUD = "cloud"
    WINSANDBOX = "winsandbox"
    DOCKER = "docker"
    UNKNOWN = "unknown"


class BaseVMProvider(AsyncContextManager):
    """Base interface for VM providers.
    
    All VM provider implementations must implement this interface.
    """
    
    @property
    @abc.abstractmethod
    def provider_type(self) -> VMProviderType:
        """Get the provider type."""
        pass
        
    @abc.abstractmethod
    async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
        """Get VM information by name.
        
        Args:
            name: Name of the VM to get information for
            storage: Optional storage path override. If provided, this will be used
                    instead of the provider's default storage path.
        
        Returns:
            Dictionary with VM information including status, IP address, etc.
        """
        pass
        
    @abc.abstractmethod
    async def list_vms(self) -> List[Dict[str, Any]]:
        """List all available VMs."""
        pass
        
    @abc.abstractmethod
    async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
        """Run a VM by name with the given options.
        
        Args:
            image: Name/tag of the image to use
            name: Name of the VM to run
            run_opts: Dictionary of run options (memory, cpu, etc.)
            storage: Optional storage path override. If provided, this will be used
                    instead of the provider's default storage path.
        
        Returns:
            Dictionary with VM run status and information
        """
        pass
        
    @abc.abstractmethod
    async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
        """Stop a VM by name.
        
        Args:
            name: Name of the VM to stop
            storage: Optional storage path override. If provided, this will be used
                    instead of the provider's default storage path.
        
        Returns:
            Dictionary with VM stop status and information
        """
        pass
        
    @abc.abstractmethod
    async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
        """Update VM configuration.
        
        Args:
            name: Name of the VM to update
            update_opts: Dictionary of update options (memory, cpu, etc.)
            storage: Optional storage path override. If provided, this will be used
                    instead of the provider's default storage path.
        
        Returns:
            Dictionary with VM update status and information
        """
        pass
        
    @abc.abstractmethod
    async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str:
        """Get the IP address of a VM, waiting indefinitely until it's available.
        
        Args:
            name: Name of the VM to get the IP for
            storage: Optional storage path override. If provided, this will be used
                    instead of the provider's default storage path.
            retry_delay: Delay between retries in seconds (default: 2)
            
        Returns:
            IP address of the VM when it becomes available
        """
        pass

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/migration-guide.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Migration Guide
---

This guide lists **breaking changes** when migrating from the original `ComputerAgent` (v0.3.x) to the rewritten `ComputerAgent` (v0.4.x) and shows old vs new usage for all four agent loops.

## Breaking Changes

- **Initialization:**
  - `ComputerAgent` (v0.4.x) uses `model` as a string (e.g. "anthropic/claude-3-5-sonnet-20241022") instead of `LLM` and `AgentLoop` objects.
  - `tools` is a list (can include multiple computers and decorated functions).
  - `callbacks` are now first-class for extensibility (image retention, budget, trajectory, logging, etc).
- **No explicit `loop` parameter:**
  - Loop is inferred from the `model` string (e.g. `anthropic/`, `openai/`, `omniparser+`, `ui-tars`).
- **No explicit `computer` parameter:**
  - Computers are added to `tools` list.

---

## Usage Examples: Old vs New

### 1. Anthropic Loop
**Old:**
```python
async with Computer() as computer:
    agent = ComputerAgent(
        computer=computer,
        loop=AgentLoop.ANTHROPIC,
        model=LLM(provider=LLMProvider.ANTHROPIC)
    )
    async for result in agent.run("Take a screenshot"):
        print(result)
```
**New:**
```python
async with Computer() as computer:
    agent = ComputerAgent(
        model="anthropic/claude-3-5-sonnet-20241022",
        tools=[computer]
    )
    messages = [{"role": "user", "content": "Take a screenshot"}]
    async for result in agent.run(messages):
        for item in result["output"]:
            if item["type"] == "message":
                print(item["content"][0]["text"])
```

### 2. OpenAI Loop
**Old:**
```python
async with Computer() as computer:
    agent = ComputerAgent(
        computer=computer,
        loop=AgentLoop.OPENAI,
        model=LLM(provider=LLMProvider.OPENAI)
    )
    async for result in agent.run("Take a screenshot"):
        print(result)
```
**New:**
```python
async with Computer() as computer:
    agent = ComputerAgent(
        model="openai/computer-use-preview",
        tools=[computer]
    )
    messages = [{"role": "user", "content": "Take a screenshot"}]
    async for result in agent.run(messages):
        for item in result["output"]:
            if item["type"] == "message":
                print(item["content"][0]["text"])
```

### 3. UI-TARS Loop
**Old:**
```python
async with Computer() as computer:
    agent = ComputerAgent(
        computer=computer,
        loop=AgentLoop.UITARS,
        model=LLM(provider=LLMProvider.OAICOMPAT, name="ByteDance-Seed/UI-TARS-1.5-7B", provider_base_url="https://.../v1")
    )
    async for result in agent.run("Take a screenshot"):
        print(result)
```
**New:**
```python
async with Computer() as computer:
    agent = ComputerAgent(
        model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B",
        tools=[computer]
    )
    messages = [{"role": "user", "content": "Take a screenshot"}]
    async for result in agent.run(messages):
        for item in result["output"]:
            if item["type"] == "message":
                print(item["content"][0]["text"])
```

### 4. Omni Loop
**Old:**
```python
async with Computer() as computer:
    agent = ComputerAgent(
        computer=computer,
        loop=AgentLoop.OMNI,
        model=LLM(provider=LLMProvider.OLLAMA, name="gemma3")
    )
    async for result in agent.run("Take a screenshot"):
        print(result)
```
**New:**
```python
async with Computer() as computer:
    agent = ComputerAgent(
        model="omniparser+ollama_chat/gemma3",
        tools=[computer]
    )
    messages = [{"role": "user", "content": "Take a screenshot"}]
    async for result in agent.run(messages):
        for item in result["output"]:
            if item["type"] == "message":
                print(item["content"][0]["text"])
```

```

--------------------------------------------------------------------------------
/docs/content/docs/libraries/lume/faq.md:
--------------------------------------------------------------------------------

```markdown
---
title: FAQ
---

### Where are the VMs stored?

VMs are stored in `~/.lume` by default. You can configure additional storage locations using the `lume config` command.

### How are images cached?

Images are cached in `~/.lume/cache`. When doing `lume pull <image>`, it will check if the image is already cached. If not, it will download the image and cache it, removing any older versions.

### Where is the configuration file stored?

Lume follows the XDG Base Directory specification for the configuration file:

- Configuration is stored in `$XDG_CONFIG_HOME/lume/config.yaml` (defaults to `~/.config/lume/config.yaml`)

By default, other data is stored in:
- VM data: `~/.lume`
- Cache files: `~/.lume/cache`

The config file contains settings for:
- VM storage locations and the default location
- Cache directory location
- Whether caching is enabled

You can view and modify these settings using the `lume config` commands:

```bash
# View current configuration
lume config get

# Manage VM storage locations
lume config storage list                 # List all VM storage locations
lume config storage add <name> <path>    # Add a new VM storage location
lume config storage remove <name>        # Remove a VM storage location
lume config storage default <name>       # Set the default VM storage location

# Manage cache settings
lume config cache get                    # Get current cache directory
lume config cache set <path>             # Set cache directory

# Manage image caching settings
lume config caching get                  # Show current caching status
lume config caching set <boolean>        # Enable or disable image caching
```

### How do I use multiple VM storage locations?

Lume supports storing VMs in different locations (e.g., internal drive, external SSD). After configuring storage locations, you can specify which location to use with the `--storage` parameter in various commands:

```bash
# Create a VM in a specific storage location
lume create my-vm --os macos --ipsw latest --storage ssd

# Run a VM from a specific storage location
lume run my-vm --storage ssd

# Delete a VM from a specific storage location
lume delete my-vm --storage ssd

# Pull an image to a specific storage location
lume pull macos-sequoia-vanilla:latest --name my-vm --storage ssd

# Clone a VM between storage locations
lume clone source-vm cloned-vm --source-storage default --dest-storage ssd
```

If you don't specify a storage location, Lume will use the default one or search across all configured locations.

### Are VM disks taking up all the disk space?

No, macOS uses sparse files, which only allocate space as needed. For example, VM disks totaling 50 GB may only use 20 GB on disk.

### How do I get the latest macOS restore image URL?

```bash
lume ipsw
```

### How do I delete a VM?

```bash
lume delete <name>
```

### How to Install macOS from an IPSW Image

#### Create a new macOS VM using the latest supported IPSW image:
Run the following command to create a new macOS virtual machine using the latest available IPSW image:

```bash
lume create <name> --os macos --ipsw latest
```

#### Create a new macOS VM using a specific IPSW image:
To create a macOS virtual machine from an older or specific IPSW file, first download the desired IPSW (UniversalMac) from a trusted source.

Then, use the downloaded IPSW path:

```bash
lume create <name> --os macos --ipsw <downloaded_ipsw_path>
```

### How do I install a custom Linux image?

The process for creating a custom Linux image differs than macOS, with IPSW restore files not being used. You need to create a linux VM first, then mount a setup image file to the VM for the first boot.

```bash
lume create <name> --os linux

lume run <name> --mount <path-to-setup-image>

lume run <name>
```

```

--------------------------------------------------------------------------------
/scripts/run-docker-dev.sh:
--------------------------------------------------------------------------------

```bash
#!/bin/bash

# Colors for output
GREEN='\033[0;32m'
BLUE='\033[0;34m'
RED='\033[0;31m'
NC='\033[0m' # No Color

# Print with color
print_info() {
    echo -e "${BLUE}==> $1${NC}"
}

print_success() {
    echo -e "${GREEN}==> $1${NC}"
}

print_error() {
    echo -e "${RED}==> $1${NC}"
}

# Docker image name
IMAGE_NAME="cua-dev-image"
CONTAINER_NAME="cua-dev-container"
PLATFORM="linux/arm64"

# Detect platform based on architecture
arch=$(uname -m)

if [[ $arch == x86_64* ]]; then
    PLATFORM="linux/amd64"
    print_info "X64 Architecture detected, using platform: ${PLATFORM}"
elif [[ $arch == i*86 ]]; then
    PLATFORM="linux/386"
    print_info "X32 Architecture detected, using platform: ${PLATFORM}"
elif [[ $arch == arm* ]] || [[ $arch == aarch64 ]]; then
    PLATFORM="linux/arm64"
    print_info "ARM Architecture detected, using platform: ${PLATFORM}"
else
    # Fallback to amd64 for unknown architectures
    PLATFORM="linux/amd64"
    print_info "Unknown architecture ($arch), defaulting to platform: ${PLATFORM}"
fi

# Environment variables
PYTHONPATH="/app/libs/python/core:/app/libs/python/computer:/app/libs/python/agent:/app/libs/python/som:/app/libs/python/pylume:/app/libs/python/computer-server:/app/libs/python/mcp-server"

# Check if Docker is installed
if ! command -v docker &> /dev/null; then
    print_error "Docker is not installed. Please install Docker first."
    exit 1
fi

# Command options
case "$1" in
    build)
        print_info "Building the development Docker image..."
        print_info "This will install all dependencies but won't include source code"
        docker build -f Dockerfile --platform=${PLATFORM} -t ${IMAGE_NAME} .
        print_success "Development Docker image built successfully!"
        ;;
    
    run)
        # Check for interactive flag
        if [ "$2" == "--interactive" ]; then
            print_info "Running the development Docker container with interactive shell..."
            print_info "Mounting source code from host"
            print_info "Connecting to host.docker.internal:7777"
            
            docker run -it --rm \
                --platform=${PLATFORM} \
                --name ${CONTAINER_NAME} \
                -v "$(pwd):/app" \
                -e PYTHONPATH=${PYTHONPATH} \
                -e DISPLAY=${DISPLAY:-:0} \
                -e PYLUME_HOST="host.docker.internal" \
                -p 7860:7860 \
                ${IMAGE_NAME} bash
        else
            # Run the specified example
            if [ -z "$2" ]; then
                print_error "Please specify an example file, e.g., ./run-docker-dev.sh run computer_examples.py"
                exit 1
            fi
            print_info "Running example: $2"
            print_info "Connecting to host.docker.internal:7777"
            
            docker run -it --rm \
                --platform=${PLATFORM} \
                --name ${CONTAINER_NAME} \
                -v "$(pwd):/app" \
                -e PYTHONPATH=${PYTHONPATH} \
                -e DISPLAY=${DISPLAY:-:0} \
                -e PYLUME_HOST="host.docker.internal" \
                -p 7860:7860 \
                ${IMAGE_NAME} python "/app/examples/$2"
        fi
        ;;
    
    stop)
        print_info "Stopping any running containers..."
        docker stop ${CONTAINER_NAME} 2>/dev/null || true
        print_success "Done!"
        ;;
        
    *)
        echo "Usage: $0 {build|run [--interactive] [filename]|stop}"
        echo ""
        echo "Commands:"
        echo "  build                      Build the development Docker image with dependencies"
        echo "  run [example_filename]     Run the specified example file in the container"
        echo "  run --interactive          Run the container with mounted code and get an interactive shell"
        echo "  stop                       Stop the container"
        exit 1
esac

exit 0 
```

--------------------------------------------------------------------------------
/libs/lume/src/Commands/Run.swift:
--------------------------------------------------------------------------------

```swift
import ArgumentParser
import Foundation
import Virtualization

struct Run: AsyncParsableCommand {
    static let configuration = CommandConfiguration(
        abstract: "Run a virtual machine"
    )

    @Argument(
        help: "Name of the virtual machine or image to pull and run (format: name or name:tag)",
        completion: .custom(completeVMName))
    var name: String

    @Flag(name: [.short, .long], help: "Do not start the VNC client")
    var noDisplay: Bool = false

    @Option(
        name: [.customLong("shared-dir")],
        help:
            "Directory to share with the VM. Can be just a path for read-write access (e.g. ~/src) or path:tag where tag is 'ro' for read-only or 'rw' for read-write (e.g. ~/src:ro)"
    )
    var sharedDirectories: [String] = []

    @Option(
        help:
            "For Linux VMs only, a read-only disk image to attach to the VM (e.g. --mount=\"ubuntu.iso\")",
        completion: .file())
    var mount: String?

    @Option(
        name: [.customLong("usb-storage")],
        help: "Disk image to attach as a USB mass storage device (e.g. --usb-storage=\"disk.img\")",
        completion: .file())
    var usbStorageDevices: [String] = []

    @Option(help: "Github Container Registry to pull the images from. Defaults to ghcr.io")
    var registry: String = "ghcr.io"

    @Option(help: "Organization to pull the images from. Defaults to trycua")
    var organization: String = "trycua"

    @Option(
        name: [.customLong("vnc-port")],
        help: "Port to use for the VNC server. Defaults to 0 (auto-assign)")
    var vncPort: Int = 0

    @Option(help: "For MacOS VMs only, boot into the VM in recovery mode")
    var recoveryMode: Bool = false

    @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
    var storage: String?

    private var parsedSharedDirectories: [SharedDirectory] {
        get throws {
            try sharedDirectories.map { dirString -> SharedDirectory in
                let components = dirString.split(separator: ":", maxSplits: 1)
                let hostPath = String(components[0])

                // If no tag is provided, default to read-write
                if components.count == 1 {
                    return SharedDirectory(
                        hostPath: hostPath,
                        tag: VZVirtioFileSystemDeviceConfiguration.macOSGuestAutomountTag,
                        readOnly: false
                    )
                }

                // Parse the tag if provided
                let tag = String(components[1])
                let readOnly: Bool
                switch tag.lowercased() {
                case "ro":
                    readOnly = true
                case "rw":
                    readOnly = false
                default:
                    throw ValidationError(
                        "Invalid tag value. Must be either 'ro' for read-only or 'rw' for read-write"
                    )
                }

                return SharedDirectory(
                    hostPath: hostPath,
                    tag: VZVirtioFileSystemDeviceConfiguration.macOSGuestAutomountTag,
                    readOnly: readOnly
                )
            }
        }
    }

    private var parsedUSBStorageDevices: [Path] {
        usbStorageDevices.map { Path($0) }
    }

    init() {
    }

    @MainActor
    func run() async throws {
        try await LumeController().runVM(
            name: name,
            noDisplay: noDisplay,
            sharedDirectories: parsedSharedDirectories,
            mount: mount.map { Path($0) },
            registry: registry,
            organization: organization,
            vncPort: vncPort,
            recoveryMode: recoveryMode,
            storage: storage,
            usbMassStoragePaths: parsedUSBStorageDevices.isEmpty ? nil : parsedUSBStorageDevices
        )
    }
}

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/adapters/models/opencua.py:
--------------------------------------------------------------------------------

```python
from typing import List, Dict, Any
import re
import base64
from io import BytesIO

try:
    import torch  # type: ignore
    from transformers import AutoTokenizer, AutoModel, AutoImageProcessor  # type: ignore
    from PIL import Image  # type: ignore
    import blobfile as _ # assert blobfile is installed
    OPENCUA_AVAILABLE = True
except Exception:
    OPENCUA_AVAILABLE = False


class OpenCUAModel:
    """OpenCUA model handler using AutoTokenizer, AutoModel and AutoImageProcessor."""

    def __init__(self, model_name: str, device: str = "auto", trust_remote_code: bool = False) -> None:
        if not OPENCUA_AVAILABLE:
            raise ImportError(
                "OpenCUA requirements not found. Install with: pip install \"cua-agent[opencua-hf]\""
            )
        self.model_name = model_name
        self.device = device
        self.model = None
        self.tokenizer = None
        self.image_processor = None
        self.trust_remote_code = trust_remote_code
        self._load()

    def _load(self) -> None:
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.model_name, trust_remote_code=self.trust_remote_code
        )
        self.model = AutoModel.from_pretrained(
            self.model_name,
            torch_dtype="auto",
            device_map=self.device,
            trust_remote_code=self.trust_remote_code,
            attn_implementation="sdpa",
        )
        self.image_processor = AutoImageProcessor.from_pretrained(
            self.model_name, trust_remote_code=self.trust_remote_code
        )

    @staticmethod
    def _extract_last_image_b64(messages: List[Dict[str, Any]]) -> str:
        # Expect HF-format messages with content items type: "image" with data URL
        for msg in reversed(messages):
            for item in reversed(msg.get("content", [])):
                if isinstance(item, dict) and item.get("type") == "image":
                    url = item.get("image", "")
                    if isinstance(url, str) and url.startswith("data:image/"):
                        return url.split(",", 1)[1]
        return ""

    def generate(self, messages: List[Dict[str, Any]], max_new_tokens: int = 512) -> str:
        assert self.model is not None and self.tokenizer is not None and self.image_processor is not None

        # Tokenize text side using chat template
        input_ids = self.tokenizer.apply_chat_template(
            messages, tokenize=True, add_generation_prompt=True
        )
        input_ids = torch.tensor([input_ids]).to(self.model.device)

        # Prepare image inputs from last data URL image
        image_b64 = self._extract_last_image_b64(messages)
        pixel_values = None
        grid_thws = None
        if image_b64:
            image = Image.open(BytesIO(base64.b64decode(image_b64))).convert("RGB")
            image_info = self.image_processor.preprocess(images=[image])
            pixel_values = torch.tensor(image_info["pixel_values"]).to(
                dtype=torch.bfloat16, device=self.model.device
            )
            grid_thws = torch.tensor(image_info["image_grid_thw"]) if "image_grid_thw" in image_info else None

        gen_kwargs: Dict[str, Any] = {
            "max_new_tokens": max_new_tokens,
            "temperature": 0,
        }
        if pixel_values is not None:
            gen_kwargs["pixel_values"] = pixel_values
        if grid_thws is not None:
            gen_kwargs["grid_thws"] = grid_thws

        with torch.no_grad():
            generated_ids = self.model.generate(
                input_ids,
                **gen_kwargs,
            )

        # Remove prompt tokens
        prompt_len = input_ids.shape[1]
        generated_ids = generated_ids[:, prompt_len:]
        output_text = self.tokenizer.batch_decode(
            generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )[0]
        return output_text

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/custom-computer-handlers.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Custom Computers
slug: custom-computer-handlers
---

The Agent SDK supports defining custom computer handlers using a simple dictionary interface. This enables integration with custom automation backends, testing frameworks, or specialized computer control systems.

## Example: Defining a Custom Computer Handler

```python
import asyncio
from PIL import Image

# Define your custom computer functions
async def take_screenshot():
    """Your custom screenshot implementation"""
    # Return PIL Image, bytes, or base64 string
    return Image.new('RGB', (1920, 1080), color='white')

# Create dict-based computer handler - only 'screenshot' is required
custom_computer = {
    'screenshot': take_screenshot, # required

    # everything below is optional
    'environment': 'linux', # linux, mac, windows, browser
    'dimensions': (1920, 1080), # (width, height)
    'click': lambda x, y, button: print(f"Clicking at ({x}, {y}) with {button} button"),
}
```

You can then use this as a tool for your agent:

```python
from agent import ComputerAgent

agent = ComputerAgent(
    model="anthropic/claude-3-5-sonnet-20241022",
    tools=[custom_computer],
)

# Agent will automatically convert dict to agent.computers.CustomComputerHandler
await agent.run("Take a screenshot and click at coordinates 100, 200")
```

## Class-Based Implementation

For more complex implementations, you can create a custom class by inheriting from `AsyncComputerHandler`:

```python
from agent.computers import AsyncComputerHandler
from PIL import Image
from typing import Literal, List, Dict, Union, Optional

class MyCustomComputer(AsyncComputerHandler):
    """Custom computer handler implementation."""
    
    def __init__(self):
        # Initialize your custom computer interface here
        pass
    
    # ==== Computer-Use-Preview Action Space ==== 

    async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
        """Get the current environment type."""
        ...
    
    async def get_dimensions(self) -> tuple[int, int]:
        """Get screen dimensions as (width, height)."""
        ...
    
    async def screenshot(self) -> str:
        """Take a screenshot and return as base64 string."""
        ...
    
    async def click(self, x: int, y: int, button: str = "left") -> None:
        """Click at coordinates with specified button."""
        ...
    
    async def double_click(self, x: int, y: int) -> None:
        """Double click at coordinates."""
        ...
    
    async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
        """Scroll at coordinates with specified scroll amounts."""
        ...
    
    async def type(self, text: str) -> None:
        """Type text."""
        ...
    
    async def wait(self, ms: int = 1000) -> None:
        """Wait for specified milliseconds."""
        ...
    
    async def move(self, x: int, y: int) -> None:
        """Move cursor to coordinates."""
        ...
    
    async def keypress(self, keys: Union[List[str], str]) -> None:
        """Press key combination."""
        ...
    
    async def drag(self, path: List[Dict[str, int]]) -> None:
        """Drag along specified path."""
        ...
    
    async def get_current_url(self) -> str:
        """Get current URL (for browser environments)."""
        ...
    
    # ==== Anthropic Action Space ==== 

    async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
        """Left mouse down at coordinates."""
        ...
    
    async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
        """Left mouse up at coordinates."""
        ...

# Use with agent
custom_computer = MyCustomComputer()

agent = ComputerAgent(
    model="anthropic/claude-3-5-sonnet-20241022",
    tools=[custom_computer],
)

await agent.run("Take a screenshot and click at coordinates 100, 200")
```
```

--------------------------------------------------------------------------------
/libs/python/som/som/models.py:
--------------------------------------------------------------------------------

```python
from typing import List, Tuple, Optional, Literal, Dict, Any, Union
from pydantic import BaseModel, Field, validator


class BoundingBox(BaseModel):
    """Normalized bounding box coordinates."""

    x1: float = Field(..., description="Normalized left coordinate")
    y1: float = Field(..., description="Normalized top coordinate")
    x2: float = Field(..., description="Normalized right coordinate")
    y2: float = Field(..., description="Normalized bottom coordinate")

    @property
    def coordinates(self) -> List[float]:
        """Get coordinates as a list [x1, y1, x2, y2]."""
        return [self.x1, self.y1, self.x2, self.y2]


class UIElement(BaseModel):
    """Base class for UI elements."""

    id: Optional[int] = Field(None, description="Unique identifier for the element (1-indexed)")
    type: Literal["icon", "text"]
    bbox: BoundingBox
    interactivity: bool = Field(default=False, description="Whether the element is interactive")
    confidence: float = Field(default=1.0, description="Detection confidence score")


class IconElement(UIElement):
    """An interactive icon element."""

    type: Literal["icon"] = "icon"
    interactivity: bool = True
    scale: Optional[int] = Field(None, description="Detection scale used")


class TextElement(UIElement):
    """A text element."""

    type: Literal["text"] = "text"
    content: str = Field(..., description="The text content")
    interactivity: bool = False


class ImageData(BaseModel):
    """Image data with dimensions."""

    base64: str = Field(..., description="Base64 encoded image data")
    width: int = Field(..., description="Image width in pixels")
    height: int = Field(..., description="Image height in pixels")

    @validator("width", "height")
    def dimensions_must_be_positive(cls, v):
        if v <= 0:
            raise ValueError("Dimensions must be positive")
        return v


class ParserMetadata(BaseModel):
    """Metadata about the parsing process."""

    image_size: Tuple[int, int] = Field(
        ..., description="Original image dimensions (width, height)"
    )
    num_icons: int = Field(..., description="Number of icons detected")
    num_text: int = Field(..., description="Number of text elements detected")
    device: str = Field(..., description="Device used for detection (cpu/cuda/mps)")
    ocr_enabled: bool = Field(..., description="Whether OCR was enabled")
    latency: float = Field(..., description="Total processing time in seconds")

    @property
    def width(self) -> int:
        """Get image width from image_size."""
        return self.image_size[0]

    @property
    def height(self) -> int:
        """Get image height from image_size."""
        return self.image_size[1]


class ParseResult(BaseModel):
    """Result of parsing a UI screenshot."""

    elements: List[UIElement] = Field(..., description="Detected UI elements")
    annotated_image_base64: str = Field(..., description="Base64 encoded annotated image")
    metadata: ParserMetadata = Field(..., description="Processing metadata")
    screen_info: Optional[List[str]] = Field(
        None, description="Human-readable descriptions of elements"
    )
    parsed_content_list: Optional[List[Dict[str, Any]]] = Field(
        None, description="Parsed elements as dictionaries"
    )

    @property
    def image(self) -> ImageData:
        """Get image data as a convenience property."""
        return ImageData(
            base64=self.annotated_image_base64,
            width=self.metadata.width,
            height=self.metadata.height,
        )

    @property
    def width(self) -> int:
        """Get image width from metadata."""
        return self.metadata.width

    @property
    def height(self) -> int:
        """Get image height from metadata."""
        return self.metadata.height

    def model_dump(self) -> Dict[str, Any]:
        """Convert model to dict for compatibility with older code."""
        result = super().model_dump()
        # Add image data dict for backward compatibility
        result["image"] = self.image.model_dump()
        return result

```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/customizing-computeragent.mdx:
--------------------------------------------------------------------------------

```markdown
---
title: Customizing Your ComputerAgent
---

<Callout>A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/customizing_computeragent.ipynb" target="_blank">Jupyter Notebook</a> is available for this documentation.</Callout>

The `ComputerAgent` interface provides an easy proxy to any computer-using model configuration, and it is a powerful framework for extending and building your own agentic systems.

This guide shows four proven ways to increase capabilities and success rate:

- 1 — Simple: Prompt engineering
- 2 — Easy: Tools
- 3 — Intermediate: Callbacks
- 4 — Expert: Custom `@register_agent`

## 1) Simple: Prompt engineering

Provide guiding instructions to shape behavior. `ComputerAgent` accepts an optional `instructions: str | None` which acts like a system-style preface. Internally, this uses a callback that pre-pends a user message before each LLM call.

```python
from agent.agent import ComputerAgent

agent = ComputerAgent(
    model="openai/computer-use-preview",
    tools=[computer],
    instructions=(
        "You are a meticulous software operator. Prefer safe, deterministic actions. "
        "Always confirm via on-screen text before proceeding."
    ),
)
```

## 2) Easy: Tools

Expose deterministic capabilities as tools (Python functions or custom computer handlers). The agent will call them when appropriate.

```python
def calculate_percentage(numerator: float, denominator: float) -> str:
    """Calculate percentage as a string.

    Args:
        numerator: Numerator value
        denominator: Denominator value
    Returns:
        A formatted percentage string (e.g., '75.00%').
    """
    if denominator == 0:
        return "0.00%"
    return f"{(numerator/denominator)*100:.2f}%"

agent = ComputerAgent(
    model="openai/computer-use-preview",
    tools=[computer, calculate_percentage],
)
```

- See `docs/agent-sdk/custom-tools` for authoring function tools.
- See `docs/agent-sdk/custom-computer-handlers` for building full computer interfaces.

## 3) Intermediate: Callbacks

Callbacks provide lifecycle hooks to preprocess messages, postprocess outputs, record trajectories, manage costs, and more.

```python
from agent.callbacks import ImageRetentionCallback, TrajectorySaverCallback, BudgetManagerCallback

agent = ComputerAgent(
    model="anthropic/claude-3-5-sonnet-20241022",
    tools=[computer],
    callbacks=[
        ImageRetentionCallback(only_n_most_recent_images=3),
        TrajectorySaverCallback("./trajectories"),
        BudgetManagerCallback(max_budget=10.0, raise_error=True),
    ],
)
```

- Browse implementations in `libs/python/agent/agent/loops/`.

## 4) Expert: Custom `@register_agent`

Build your own agent configuration class to control prompting, message shaping, and tool handling. This is the most flexible option for specialized domains.

- Register your own `model=...` loop using `@register_agent`
- Browse implementations in `libs/python/agent/agent/loops/`.
- Implement `predict_step()` (and optionally `predict_click()`) and return the standardized output schema.

```python
from agent.decorators import register_agent

@register_agent(models=r".*my-special-model.*", priority=10)
class MyCustomAgentConfig:
    async def predict_step(self, messages, model, tools, **kwargs):
        # 1) Format messages for your provider
        # 2) Call provider
        # 3) Convert responses to the agent output schema
        return {"output": [], "usage": {}}

    async def predict_click(self, model, image_b64, instruction):
        # Optional: click-only capability
        return None

    def get_capabilities(self):
        return ["step"]
```

## HUD integration (optional)

When using the HUD evaluation integration (`agent/integrations/hud/`), you can pass `instructions`, `tools`, and `callbacks` directly

```python
from agent.integrations.hud import run_single_task

await run_single_task(
    dataset="username/dataset-name",
    model="openai/computer-use-preview",
    instructions="Operate carefully. Always verify on-screen text before actions.",
    # tools=[your_custom_function],
    # callbacks=[YourCustomCallback()],
)
```
```

--------------------------------------------------------------------------------
/libs/python/pylume/pylume/client.py:
--------------------------------------------------------------------------------

```python
import json
import asyncio
import subprocess
from typing import Optional, Any, Dict
import shlex

from .exceptions import (
    LumeError,
    LumeServerError,
    LumeConnectionError,
    LumeTimeoutError,
    LumeNotFoundError,
    LumeConfigError,
)

class LumeClient:
    def __init__(self, base_url: str, timeout: float = 60.0, debug: bool = False):
        self.base_url = base_url
        self.timeout = timeout
        self.debug = debug

    def _log_debug(self, message: str, **kwargs) -> None:
        """Log debug information if debug mode is enabled."""
        if self.debug:
            print(f"DEBUG: {message}")
            if kwargs:
                print(json.dumps(kwargs, indent=2))

    async def _run_curl(self, method: str, path: str, data: Optional[Dict[str, Any]] = None, params: Optional[Dict[str, Any]] = None) -> Any:
        """Execute a curl command and return the response."""
        url = f"{self.base_url}{path}"
        if params:
            param_str = "&".join(f"{k}={v}" for k, v in params.items())
            url = f"{url}?{param_str}"

        cmd = ["curl", "-X", method, "-s", "-w", "%{http_code}", "-m", str(self.timeout)]
        
        if data is not None:
            cmd.extend(["-H", "Content-Type: application/json", "-d", json.dumps(data)])
        
        cmd.append(url)
        
        self._log_debug(f"Running curl command: {' '.join(map(shlex.quote, cmd))}")
        
        try:
            process = await asyncio.create_subprocess_exec(
                *cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            )
            stdout, stderr = await process.communicate()
            
            if process.returncode != 0:
                raise LumeConnectionError(f"Curl command failed: {stderr.decode()}")
            
            # The last 3 characters are the status code
            response = stdout.decode()
            status_code = int(response[-3:])
            response_body = response[:-3]  # Remove status code from response
            
            if status_code >= 400:
                if status_code == 404:
                    raise LumeNotFoundError(f"Resource not found: {path}")
                elif status_code == 400:
                    raise LumeConfigError(f"Invalid request: {response_body}")
                elif status_code >= 500:
                    raise LumeServerError(f"Server error: {response_body}")
                else:
                    raise LumeError(f"Request failed with status {status_code}: {response_body}")
            
            return json.loads(response_body) if response_body.strip() else None
            
        except asyncio.TimeoutError:
            raise LumeTimeoutError(f"Request timed out after {self.timeout} seconds")

    async def get(self, path: str, params: Optional[Dict[str, Any]] = None) -> Any:
        """Make a GET request."""
        return await self._run_curl("GET", path, params=params)

    async def post(self, path: str, data: Optional[Dict[str, Any]] = None, timeout: Optional[float] = None) -> Any:
        """Make a POST request."""
        old_timeout = self.timeout
        if timeout is not None:
            self.timeout = timeout
        try:
            return await self._run_curl("POST", path, data=data)
        finally:
            self.timeout = old_timeout

    async def patch(self, path: str, data: Dict[str, Any]) -> None:
        """Make a PATCH request."""
        await self._run_curl("PATCH", path, data=data)

    async def delete(self, path: str) -> None:
        """Make a DELETE request."""
        await self._run_curl("DELETE", path)

    def print_curl(self, method: str, path: str, data: Optional[Dict[str, Any]] = None) -> None:
        """Print equivalent curl command for debugging."""
        curl_cmd = f"""curl -X {method} \\
  '{self.base_url}{path}'"""
        
        if data:
            curl_cmd += f" \\\n  -H 'Content-Type: application/json' \\\n  -d '{json.dumps(data)}'"
        
        print("\nEquivalent curl command:")
        print(curl_cmd)
        print()

    async def close(self) -> None:
        """Close the client resources."""
        pass  # No shared resources to clean up
```

--------------------------------------------------------------------------------
/docs/src/components/iou.tsx:
--------------------------------------------------------------------------------

```typescript
'use client';
import React, { useRef, useEffect, useState, useCallback } from 'react';

/**
 * Represents a rectangle with position, dimensions, styling, and identification
 */
interface Rectangle {
  /** The x-coordinate of the rectangle's left edge */
  left: number;
  /** The y-coordinate of the rectangle's top edge */
  top: number;
  /** The width of the rectangle */
  width: number;
  /** The height of the rectangle */
  height: number;
  /** The fill color of the rectangle */
  fill: string;
  /** The display name of the rectangle */
  name: string;
}

/**
 * Props for the IOU component
 */
interface IOUProps {
  /** The title to display above the visualization */
  title: string;
  /** The description text to display below the IOU value */
  description: string;
  /** The first rectangle for IOU calculation */
  rect1: Rectangle;
  /** The second rectangle for IOU calculation */
  rect2: Rectangle;
}

/**
 * A React component that visualizes and calculates the Intersection over Union (IOU) 
 * of two rectangles on a canvas
 * @param props - The component props
 * @returns The rendered IOU visualization component
 */
export default function IOU({ title, description, rect1, rect2 }: IOUProps) {
  const canvasRef = useRef<HTMLCanvasElement>(null);
  const [actualIOU, setActualIOU] = useState<number>(0);

  /**
   * Converts a rectangle to a bounding box with left, right, top, and bottom coordinates
   * @param rect - The rectangle to convert
   * @returns An object containing the bounding box coordinates
   */
  const getBbox = (rect: Rectangle) => ({
    left: rect.left,
    right: rect.left + rect.width,
    top: rect.top,
    bottom: rect.top + rect.height,
  });

  /**
   * Calculates the intersection area between two bounding boxes
   * @param bbox1 - The first bounding box
   * @param bbox2 - The second bounding box
   * @returns The area of intersection between the two bounding boxes
   */
  const calcIntersection = (bbox1: any, bbox2: any): number => {
    const x1 = Math.max(bbox1.left, bbox2.left);
    const x2 = Math.min(bbox1.right, bbox2.right);
    const y1 = Math.max(bbox1.top, bbox2.top);
    const y2 = Math.min(bbox1.bottom, bbox2.bottom);

    // Check if there's actually an overlap
    if (x2 <= x1 || y2 <= y1) {
      return 0;
    }

    const intersection = (x2 - x1) * (y2 - y1);
    return intersection;
  };

  /**
   * Calculates the area of a rectangle
   * @param rect - The rectangle to calculate area for
   * @returns The area of the rectangle
   */
  const calcArea = (rect: Rectangle): number => {
    return rect.width * rect.height;
  };

  /**
   * Draws the rectangles on the canvas and calculates the IOU value
   */
  const drawCanvas = useCallback(() => {
    const canvas = canvasRef.current;
    if (!canvas) return;

    const ctx = canvas.getContext('2d');
    if (!ctx) return;

    // Clear canvas
    ctx.clearRect(0, 0, canvas.width, canvas.height);

    // Calculate IOU
    const bbox1 = getBbox(rect1);
    const bbox2 = getBbox(rect2);
    const intersection = calcIntersection(bbox1, bbox2);
    const union = calcArea(rect1) + calcArea(rect2) - intersection;
    const iou = intersection / union;
    setActualIOU(iou);

    // Draw rectangles
    [rect1, rect2].forEach((rect) => {
      ctx.fillStyle = rect.fill;
      ctx.fillRect(rect.left, rect.top, rect.width, rect.height);

      ctx.strokeStyle = '#000';
      ctx.lineWidth = 2;
      ctx.strokeRect(rect.left, rect.top, rect.width, rect.height);

      ctx.fillStyle = '#000';
      ctx.font = '12px';
      ctx.fillText(rect.name, rect.left + 5, rect.top + 15);
    });
  }, [rect1, rect2]);

  useEffect(() => {
    drawCanvas();
  }, [drawCanvas]);

  return (
    <div className="">
      <h3 className="text-sm font-semibold ">{title}</h3>
      <div className="flex items-start gap-6">
        <div>
          <canvas
            ref={canvasRef}
            width={200}
            height={150}
            className="border bg-white rounded-md"
          />
          <div className="mt-2 text-sm">
            <div className="font-mono mb-2">IOU = {actualIOU.toFixed(3)}</div>
            <span className="">{description}</span>
          </div>
        </div>
      </div>
    </div>
  );
}

```

--------------------------------------------------------------------------------
/blog/cua-hackathon.md:
--------------------------------------------------------------------------------

```markdown
# Computer-Use Agents SOTA Challenge: Hack the North + Global Online

*Published on August 25, 2025 by Francesco Bonacci*

We’re bringing something new to [Hack the North](https://hackthenorth.com), Canada’s largest hackathon, this year: a head-to-head competition for **Computer-Use Agents** - on-site at Waterloo and a **Global online challenge**. From September 12–14, 2025, teams build on the **Cua Agent Framework** and are scored in **HUD’s OSWorld-Verified** environment to push past today’s SOTA on [OS-World](https://os-world.github.io).

<img src="./assets/hack-the-north.png">

## Track A: On-site @ Hack the North

There’s one global leaderboard: **Cua - Best State-of-the-Art Computer-Use Agent**. Use any model setup you like (cloud or local). After projects are submitted, [HUD](https://www.hud.so) runs the official benchmark; the top team earns a **guaranteed YC partner interview (W26 batch)**. We’ll also feature winners on our blog and socials and kit the team out with swag.

## Track B: Cua Global Online Hackathon

**Cua** and [**Ollama**](https://ollama.com) organize a global hackathon to find the **most creative uses of local and hybrid computer-use agents**. There are no geographic restrictions on who can join — this is a worldwide competition focused on **originality, impact, and inventive applications** that showcase what's possible with local and hybrid inference.

**Prizes:** 
- 1st **MacBook Air M4 (or equivalent value)** + features in Cua & Ollama channels
- 2nd **$500 CAD + swag**
- 3rd **swag + public feature**

---

## How it works

Two different tracks, two different processes:

### On-site (Track A)
Build during the weekend and submit a repo with a one-line start command. **HUD** executes your command in a clean environment and runs **OSWorld-Verified**. Scores come from official benchmark results; ties break by median, then wall-clock time, then earliest submission. Any model setup is allowed (cloud or local).

**HUD** runs official evaluations immediately after submission. Winners are announced at the **closing ceremony**.

### Rules
- Fork and star the [Cua repo](https://github.com/trycua/cua).
- Add your agent and instructions in `samples/community/hack-the-north/<YOUR_TEAM_NAME>`.
- Include a README with details on the approach and any required notes.  
- Submit a PR.  

**Deadline: Sept 15, 8:00 AM EDT**

### Global Online (Track B)
Open to anyone, anywhere. Build on your own timeline and submit through the **Cua Discord form** by the deadline.

**Project Requirements:**
- Your agent must integrate **Cua and Ollama** in some way
- Your agent must be **easily runnable by judges**

Judged by **Cua** and **Ollama** teams on:  
- **Creativity (30%)** – originality, usefulness, surprise factor  
- **Technical Depth (30%)** – quality of engineering and agent design  
- **Use of Ollama (30%)** – effective integration of local/hybrid inference  
- **Polish (10%)** – presentation, clarity, demo readiness  

### Submission Process
Submissions will be collected via a **form link provided in the Cua Discord**. Your submission must contain:

- **GitHub repo** containing the agent source code and a clear README with instructions on how to use the agent
- **Explanation** of the models and tools used, and what's local or hybrid about your design  
- **Short demo video** (up to two minutes)

A **commit freeze** will be used to ensure that no changes are made after the deadline. Winners will be announced after judging is complete.

**Deadline: Sept 28, 11:59 PM UTC (extended due to popular demand!)**

---

## Join us

Bring a team, pick a model stack, and push what agents can do on real computers. We can’t wait to see what you build at **Hack the North 2025**.

**Discord channels**  
- Join the Discord first: https://discord.gg/cua-ai
- **#hack-the-north (on-site):** https://discord.com/channels/1328377437301641247/1409508526774157342  
- **#global-online (Ollama × Cua):** https://discord.com/channels/1328377437301641247/1409518100491145226  

**Contact**  
Questions on Hack the North? Email **[email protected]**.

*P.S. If you’re planning ahead, start with the Cua Agent Framework and OSWorld-Verified docs at docs.trycua.com; we’ll share office-hour times in both Discord channels.*
```

--------------------------------------------------------------------------------
/libs/lume/src/Virtualization/DHCPLeaseParser.swift:
--------------------------------------------------------------------------------

```swift
import Foundation

/// Represents a DHCP lease entry from the system's DHCP lease file
private struct DHCPLease {
    let macAddress: String
    let ipAddress: String
    let expirationDate: Date
    
    /// Creates a lease entry from raw DHCP lease file key-value pairs
    /// - Parameter dict: Dictionary containing the raw lease data
    /// - Returns: A DHCPLease instance if the data is valid, nil otherwise
    static func from(_ dict: [String: String]) -> DHCPLease? {
        guard let hwAddress = dict["hw_address"],
              let ipAddress = dict["ip_address"],
              let lease = dict["lease"] else {
            return nil
        }
        
        // Parse MAC address from hw_address field (format can be "1,xx:xx:xx:xx:xx:xx" or "ff,...")
        let hwParts = hwAddress.split(separator: ",")
        guard hwParts.count >= 2 else { return nil }
        
        // Get the MAC part after the prefix and normalize it
        let rawMacAddress = String(hwParts[1]).trimmingCharacters(in: .whitespaces)
        
        // Normalize the MAC address by ensuring each component is two digits
        let normalizedMacAddress = rawMacAddress.split(separator: ":")
            .map { component in
                let hex = String(component)
                return hex.count == 1 ? "0\(hex)" : hex
            }
            .joined(separator: ":")
        
        // Convert hex timestamp to Date
        let timestampHex = lease.trimmingCharacters(in: CharacterSet(charactersIn: "0x"))
        guard let timestamp = UInt64(timestampHex, radix: 16) else { return nil }
        let expirationDate = Date(timeIntervalSince1970: TimeInterval(timestamp))
        
        return DHCPLease(
            macAddress: normalizedMacAddress,
            ipAddress: ipAddress,
            expirationDate: expirationDate
        )
    }
    
    /// Checks if the lease is currently valid
    var isValid: Bool {
        expirationDate > Date()
    }
}

/// Parses DHCP lease files to retrieve IP addresses for VMs based on their MAC addresses
enum DHCPLeaseParser {
    private static let leasePath = "/var/db/dhcpd_leases"
    
    /// Retrieves the IP address for a given MAC address from the DHCP lease file
    /// - Parameter macAddress: The MAC address to look up
    /// - Returns: The IP address if found, nil otherwise
    static func getIPAddress(forMAC macAddress: String) -> String? {
        guard let leaseContents = try? String(contentsOfFile: leasePath, encoding: .utf8) else {
            return nil
        }

        // Normalize the input MAC address to ensure consistent format
        let normalizedMacAddress = macAddress.split(separator: ":").map { component in
            let hex = String(component)
            return hex.count == 1 ? "0\(hex)" : hex
        }.joined(separator: ":")
        
        let leases = try? parseDHCPLeases(leaseContents)
        return leases?.first { lease in 
            lease.macAddress == normalizedMacAddress
        }?.ipAddress
    }
    
    /// Parses the contents of a DHCP lease file into lease entries
    /// - Parameter contents: The raw contents of the lease file
    /// - Returns: Array of parsed lease entries
    private static func parseDHCPLeases(_ contents: String) throws -> [DHCPLease] {
        var leases: [DHCPLease] = []
        var currentLease: [String: String] = [:]
        var inLeaseBlock = false
        
        let lines = contents.components(separatedBy: .newlines)
        
        for line in lines {
            let trimmedLine = line.trimmingCharacters(in: .whitespaces)
            
            if trimmedLine == "{" {
                inLeaseBlock = true
                currentLease = [:]
            } else if trimmedLine == "}" {
                if let lease = DHCPLease.from(currentLease) {
                    leases.append(lease)
                }
                inLeaseBlock = false
            } else if inLeaseBlock {
                let parts = trimmedLine.split(separator: "=", maxSplits: 1)
                if parts.count == 2 {
                    let key = String(parts[0]).trimmingCharacters(in: .whitespaces)
                    let value = String(parts[1]).trimmingCharacters(in: .whitespaces)
                    currentLease[key] = value
                }
            }
        }
        
        return leases
    }
} 
```

--------------------------------------------------------------------------------
/examples/computer_examples.py:
--------------------------------------------------------------------------------

```python
import os
import asyncio
from pathlib import Path
import sys
import traceback

# Load environment variables from .env file
project_root = Path(__file__).parent.parent
env_file = project_root / ".env"
print(f"Loading environment from: {env_file}")
from dotenv import load_dotenv

load_dotenv(env_file)

# Add paths to sys.path if needed
pythonpath = os.environ.get("PYTHONPATH", "")
for path in pythonpath.split(":"):
    if path and path not in sys.path:
        sys.path.insert(0, path)  # Insert at beginning to prioritize
        print(f"Added to sys.path: {path}")

from computer.computer import Computer
from computer.providers.base import VMProviderType
from computer.logger import LogLevel

async def main():
    try:
        print("\n=== Using direct initialization ===")

        # Create a local macOS computer
        computer = Computer(
            display="1024x768", 
            memory="8GB", 
            cpu="4", 
            os_type="macos",
            name="macos",
            verbosity=LogLevel.VERBOSE,
            provider_type=VMProviderType.LUME,
            storage="/Users/<USER>/repos/trycua/computer/examples/storage",
            shared_directories=[
                "/Users/<USER>/repos/trycua/computer/examples/shared"
            ],
            ephemeral=False,
        )

        # Create a remote Linux computer with Cua
        # computer = Computer(
        #     os_type="linux",
        #     api_key=os.getenv("CUA_API_KEY"),
        #     name=os.getenv("CONTAINER_NAME"),
        #     provider_type=VMProviderType.CLOUD,
        # )
        
        try:
            # Run the computer with default parameters
            await computer.run()
            
            screenshot = await computer.interface.screenshot()
            
            # Create output directory if it doesn't exist
            output_dir = Path("./output")
            output_dir.mkdir(exist_ok=True)
            
            screenshot_path = output_dir / "screenshot.png"
            with open(screenshot_path, "wb") as f:
                f.write(screenshot)
            print(f"Screenshot saved to: {screenshot_path.absolute()}")
            
            # await computer.interface.hotkey("command", "space")

            # res = await computer.interface.run_command("touch ./Downloads/empty_file")
            # print(f"Run command result: {res}")

            accessibility_tree = await computer.interface.get_accessibility_tree()
            print(f"Accessibility tree: {accessibility_tree}")

            # Screen Actions Examples
            # print("\n===  Screen Actions ===")
            # screenshot = await computer.interface.screenshot()
            # with open("screenshot_direct.png", "wb") as f:
            #     f.write(screenshot)

            screen_size = await computer.interface.get_screen_size()
            print(f"Screen size: {screen_size}")

            # Demonstrate coordinate conversion
            center_x, center_y = 733, 736
            print(f"Center in screen coordinates: ({center_x}, {center_y})")

            screenshot_center = await computer.to_screenshot_coordinates(center_x, center_y)
            print(f"Center in screenshot coordinates: {screenshot_center}")

            screen_center = await computer.to_screen_coordinates(*screenshot_center)
            print(f"Back to screen coordinates: {screen_center}")

            # Mouse Actions Examples
            print("\n=== Mouse Actions ===")
            await computer.interface.move_cursor(100, 100)
            await computer.interface.left_click()
            await computer.interface.right_click(300, 300)
            await computer.interface.double_click(400, 400)

            # Keyboard Actions Examples
            print("\n=== Keyboard Actions ===")
            await computer.interface.type_text("Hello, World!")
            await computer.interface.press_key("enter")

            # Clipboard Actions Examples
            print("\n=== Clipboard Actions ===")
            await computer.interface.set_clipboard("Test clipboard")
            content = await computer.interface.copy_to_clipboard()
            print(f"Clipboard content: {content}")

        finally:
            # Important to clean up resources
            await computer.stop()
    except Exception as e:
        print(f"Error in main: {e}")
        traceback.print_exc()


if __name__ == "__main__":
    asyncio.run(main())

```

--------------------------------------------------------------------------------
/libs/python/agent/agent/loops/opencua.py:
--------------------------------------------------------------------------------

```python
"""
OpenCUA agent loop implementation for click prediction using litellm.acompletion
Based on OpenCUA model for GUI grounding tasks.
"""

import asyncio
import json
import re
import base64
from typing import Dict, List, Any, AsyncGenerator, Union, Optional, Tuple
from io import BytesIO
import uuid
from PIL import Image
import litellm
import math

from .composed_grounded import ComposedGroundedConfig
from ..decorators import register_agent
from ..types import Messages, AgentResponse, Tools, AgentCapability
from ..loops.base import AsyncAgentConfig

def extract_coordinates_from_pyautogui(text: str) -> Optional[Tuple[int, int]]:
    """Extract coordinates from pyautogui.click(x=..., y=...) format."""
    try:
        # Look for pyautogui.click(x=1443, y=343) pattern
        pattern = r"pyautogui\.click\(x=(\d+),\s*y=(\d+)\)"
        match = re.search(pattern, text)
        if match:
            x, y = int(match.group(1)), int(match.group(2))
            return (x, y)
        return None
    except Exception:
        return None

@register_agent(models=r"(?i).*OpenCUA.*")
class OpenCUAConfig(ComposedGroundedConfig):
    """OpenCUA agent configuration implementing AsyncAgentConfig protocol for click prediction."""
    
    def __init__(self):
        super().__init__()
        self.current_model = None
        self.last_screenshot_b64 = None

    async def predict_step(
        self,
        messages: List[Dict[str, Any]],
        model: str,
        tools: Optional[List[Dict[str, Any]]] = None,
        max_retries: Optional[int] = None,
        stream: bool = False,
        computer_handler=None,
        _on_api_start=None,
        _on_api_end=None,
        _on_usage=None,
        _on_screenshot=None,
        **kwargs
    ) -> Dict[str, Any]:
        """Fallback to a self-composed model"""
        return await super().predict_step(
            messages=messages,
            model=f"{model}+{model}",
            tools=tools,
            max_retries=max_retries,
            stream=stream,
            computer_handler=computer_handler,
            _on_api_start=_on_api_start,
            _on_api_end=_on_api_end,
            _on_usage=_on_usage,
            _on_screenshot=_on_screenshot,
            **kwargs
        )

    async def predict_click(
        self,
        model: str,
        image_b64: str,
        instruction: str,
        **kwargs
    ) -> Optional[Tuple[int, int]]:
        """
        Predict click coordinates using OpenCUA model via litellm.acompletion.
        
        Args:
            model: The OpenCUA model name
            image_b64: Base64 encoded image
            instruction: Instruction for where to click
            
        Returns:
            Tuple of (x, y) coordinates or None if prediction fails
        """
        # Prepare system message
        system_prompt = (
            "You are a GUI agent. You are given a task and a screenshot of the screen. "
            "You need to perform a series of pyautogui actions to complete the task."
        )
        
        system_message = {
            "role": "system",
            "content": system_prompt
        }
        
        # Prepare user message with image and instruction
        user_message = {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/png;base64,{image_b64}"
                    }
                },
                {
                    "type": "text",
                    "text": f"Click on {instruction}"
                }
            ]
        }
        
        # Prepare API call kwargs
        api_kwargs = {
            "model": model,
            "messages": [system_message, user_message],
            "max_new_tokens": 2056,
            "temperature": 0,
            **kwargs
        }
        
        # Use liteLLM acompletion
        response = await litellm.acompletion(**api_kwargs)
        
        # Extract response text
        output_text = response.choices[0].message.content
        # print(output_text)
        
        # Extract coordinates from pyautogui format
        coordinates = extract_coordinates_from_pyautogui(output_text)
        
        return coordinates
    
    def get_capabilities(self) -> List[AgentCapability]:
        """Return the capabilities supported by this agent."""
        return ["click"]

```
Page 3/16FirstPrevNextLast