#
tokens: 49465/50000 27/616 files (page 6/28)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 6 of 28. Use http://codebase.md/trycua/cua?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .cursorignore
├── .dockerignore
├── .editorconfig
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── scripts
│   │   ├── get_pyproject_version.py
│   │   └── tests
│   │       ├── __init__.py
│   │       ├── README.md
│   │       └── test_get_pyproject_version.py
│   └── workflows
│       ├── bump-version.yml
│       ├── ci-lume.yml
│       ├── docker-publish-cua-linux.yml
│       ├── docker-publish-cua-windows.yml
│       ├── docker-publish-kasm.yml
│       ├── docker-publish-xfce.yml
│       ├── docker-reusable-publish.yml
│       ├── link-check.yml
│       ├── lint.yml
│       ├── npm-publish-cli.yml
│       ├── npm-publish-computer.yml
│       ├── npm-publish-core.yml
│       ├── publish-lume.yml
│       ├── pypi-publish-agent.yml
│       ├── pypi-publish-computer-server.yml
│       ├── pypi-publish-computer.yml
│       ├── pypi-publish-core.yml
│       ├── pypi-publish-mcp-server.yml
│       ├── pypi-publish-som.yml
│       ├── pypi-reusable-publish.yml
│       ├── python-tests.yml
│       ├── test-cua-models.yml
│       └── test-validation-script.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .prettierignore
├── .prettierrc.yaml
├── .vscode
│   ├── docs.code-workspace
│   ├── extensions.json
│   ├── launch.json
│   ├── libs-ts.code-workspace
│   ├── lume.code-workspace
│   ├── lumier.code-workspace
│   ├── py.code-workspace
│   └── settings.json
├── blog
│   ├── app-use.md
│   ├── assets
│   │   ├── composite-agents.png
│   │   ├── docker-ubuntu-support.png
│   │   ├── hack-booth.png
│   │   ├── hack-closing-ceremony.jpg
│   │   ├── hack-cua-ollama-hud.jpeg
│   │   ├── hack-leaderboard.png
│   │   ├── hack-the-north.png
│   │   ├── hack-winners.jpeg
│   │   ├── hack-workshop.jpeg
│   │   ├── hud-agent-evals.png
│   │   └── trajectory-viewer.jpeg
│   ├── bringing-computer-use-to-the-web.md
│   ├── build-your-own-operator-on-macos-1.md
│   ├── build-your-own-operator-on-macos-2.md
│   ├── cloud-windows-ga-macos-preview.md
│   ├── composite-agents.md
│   ├── computer-use-agents-for-growth-hacking.md
│   ├── cua-hackathon.md
│   ├── cua-playground-preview.md
│   ├── cua-vlm-router.md
│   ├── hack-the-north.md
│   ├── hud-agent-evals.md
│   ├── human-in-the-loop.md
│   ├── introducing-cua-cli.md
│   ├── introducing-cua-cloud-containers.md
│   ├── lume-to-containerization.md
│   ├── neurips-2025-cua-papers.md
│   ├── sandboxed-python-execution.md
│   ├── training-computer-use-models-trajectories-1.md
│   ├── trajectory-viewer.md
│   ├── ubuntu-docker-support.md
│   └── windows-sandbox.md
├── CONTRIBUTING.md
├── Development.md
├── Dockerfile
├── docs
│   ├── .env.example
│   ├── .gitignore
│   ├── content
│   │   └── docs
│   │       ├── agent-sdk
│   │       │   ├── agent-loops.mdx
│   │       │   ├── benchmarks
│   │       │   │   ├── index.mdx
│   │       │   │   ├── interactive.mdx
│   │       │   │   ├── introduction.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── osworld-verified.mdx
│   │       │   │   ├── screenspot-pro.mdx
│   │       │   │   └── screenspot-v2.mdx
│   │       │   ├── callbacks
│   │       │   │   ├── agent-lifecycle.mdx
│   │       │   │   ├── cost-saving.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── logging.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── pii-anonymization.mdx
│   │       │   │   └── trajectories.mdx
│   │       │   ├── chat-history.mdx
│   │       │   ├── custom-tools.mdx
│   │       │   ├── customizing-computeragent.mdx
│   │       │   ├── integrations
│   │       │   │   ├── hud.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── observability.mdx
│   │       │   ├── mcp-server
│   │       │   │   ├── client-integrations.mdx
│   │       │   │   ├── configuration.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── llm-integrations.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── tools.mdx
│   │       │   │   └── usage.mdx
│   │       │   ├── message-format.mdx
│   │       │   ├── meta.json
│   │       │   ├── migration-guide.mdx
│   │       │   ├── prompt-caching.mdx
│   │       │   ├── supported-agents
│   │       │   │   ├── composed-agents.mdx
│   │       │   │   ├── computer-use-agents.mdx
│   │       │   │   ├── grounding-models.mdx
│   │       │   │   ├── human-in-the-loop.mdx
│   │       │   │   └── meta.json
│   │       │   ├── supported-model-providers
│   │       │   │   ├── cua-vlm-router.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   └── local-models.mdx
│   │       │   ├── telemetry.mdx
│   │       │   └── usage-tracking.mdx
│   │       ├── cli-playbook
│   │       │   ├── commands.mdx
│   │       │   ├── index.mdx
│   │       │   └── meta.json
│   │       ├── computer-sdk
│   │       │   ├── cloud-vm-management.mdx
│   │       │   ├── commands.mdx
│   │       │   ├── computer-server
│   │       │   │   ├── Commands.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── REST-API.mdx
│   │       │   │   └── WebSocket-API.mdx
│   │       │   ├── computer-ui.mdx
│   │       │   ├── computers.mdx
│   │       │   ├── custom-computer-handlers.mdx
│   │       │   ├── meta.json
│   │       │   ├── sandboxed-python.mdx
│   │       │   └── tracing-api.mdx
│   │       ├── example-usecases
│   │       │   ├── form-filling.mdx
│   │       │   ├── gemini-complex-ui-navigation.mdx
│   │       │   ├── meta.json
│   │       │   ├── post-event-contact-export.mdx
│   │       │   └── windows-app-behind-vpn.mdx
│   │       ├── get-started
│   │       │   ├── meta.json
│   │       │   └── quickstart.mdx
│   │       ├── index.mdx
│   │       ├── macos-vm-cli-playbook
│   │       │   ├── lume
│   │       │   │   ├── cli-reference.mdx
│   │       │   │   ├── faq.md
│   │       │   │   ├── http-api.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── prebuilt-images.mdx
│   │       │   ├── lumier
│   │       │   │   ├── building-lumier.mdx
│   │       │   │   ├── docker-compose.mdx
│   │       │   │   ├── docker.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   └── meta.json
│   │       │   └── meta.json
│   │       └── meta.json
│   ├── next.config.mjs
│   ├── package-lock.json
│   ├── package.json
│   ├── pnpm-lock.yaml
│   ├── postcss.config.mjs
│   ├── public
│   │   └── img
│   │       ├── agent_gradio_ui.png
│   │       ├── agent.png
│   │       ├── bg-dark.jpg
│   │       ├── bg-light.jpg
│   │       ├── cli.png
│   │       ├── computer.png
│   │       ├── grounding-with-gemini3.gif
│   │       ├── hero.png
│   │       ├── laminar_trace_example.png
│   │       ├── som_box_threshold.png
│   │       └── som_iou_threshold.png
│   ├── README.md
│   ├── source.config.ts
│   ├── src
│   │   ├── app
│   │   │   ├── (home)
│   │   │   │   ├── [[...slug]]
│   │   │   │   │   └── page.tsx
│   │   │   │   └── layout.tsx
│   │   │   ├── api
│   │   │   │   ├── posthog
│   │   │   │   │   └── [...path]
│   │   │   │   │       └── route.ts
│   │   │   │   └── search
│   │   │   │       └── route.ts
│   │   │   ├── favicon.ico
│   │   │   ├── global.css
│   │   │   ├── layout.config.tsx
│   │   │   ├── layout.tsx
│   │   │   ├── llms.mdx
│   │   │   │   └── [[...slug]]
│   │   │   │       └── route.ts
│   │   │   ├── llms.txt
│   │   │   │   └── route.ts
│   │   │   ├── robots.ts
│   │   │   └── sitemap.ts
│   │   ├── assets
│   │   │   ├── discord-black.svg
│   │   │   ├── discord-white.svg
│   │   │   ├── logo-black.svg
│   │   │   └── logo-white.svg
│   │   ├── components
│   │   │   ├── analytics-tracker.tsx
│   │   │   ├── cookie-consent.tsx
│   │   │   ├── doc-actions-menu.tsx
│   │   │   ├── editable-code-block.tsx
│   │   │   ├── footer.tsx
│   │   │   ├── hero.tsx
│   │   │   ├── iou.tsx
│   │   │   ├── mermaid.tsx
│   │   │   └── page-feedback.tsx
│   │   ├── lib
│   │   │   ├── llms.ts
│   │   │   └── source.ts
│   │   ├── mdx-components.tsx
│   │   └── providers
│   │       └── posthog-provider.tsx
│   └── tsconfig.json
├── examples
│   ├── agent_examples.py
│   ├── agent_ui_examples.py
│   ├── browser_tool_example.py
│   ├── cloud_api_examples.py
│   ├── computer_examples_windows.py
│   ├── computer_examples.py
│   ├── computer_ui_examples.py
│   ├── computer-example-ts
│   │   ├── .env.example
│   │   ├── .gitignore
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── README.md
│   │   ├── src
│   │   │   ├── helpers.ts
│   │   │   └── index.ts
│   │   └── tsconfig.json
│   ├── docker_examples.py
│   ├── evals
│   │   ├── hud_eval_examples.py
│   │   └── wikipedia_most_linked.txt
│   ├── pylume_examples.py
│   ├── sandboxed_functions_examples.py
│   ├── som_examples.py
│   ├── tracing_examples.py
│   ├── utils.py
│   └── winsandbox_example.py
├── img
│   ├── agent_gradio_ui.png
│   ├── agent.png
│   ├── cli.png
│   ├── computer.png
│   ├── logo_black.png
│   └── logo_white.png
├── libs
│   ├── kasm
│   │   ├── Dockerfile
│   │   ├── LICENSE
│   │   ├── README.md
│   │   └── src
│   │       └── ubuntu
│   │           └── install
│   │               └── firefox
│   │                   ├── custom_startup.sh
│   │                   ├── firefox.desktop
│   │                   └── install_firefox.sh
│   ├── lume
│   │   ├── .cursorignore
│   │   ├── CONTRIBUTING.md
│   │   ├── Development.md
│   │   ├── img
│   │   │   └── cli.png
│   │   ├── Package.resolved
│   │   ├── Package.swift
│   │   ├── README.md
│   │   ├── resources
│   │   │   └── lume.entitlements
│   │   ├── scripts
│   │   │   ├── build
│   │   │   │   ├── build-debug.sh
│   │   │   │   ├── build-release-notarized.sh
│   │   │   │   └── build-release.sh
│   │   │   └── install.sh
│   │   ├── src
│   │   │   ├── Commands
│   │   │   │   ├── Clone.swift
│   │   │   │   ├── Config.swift
│   │   │   │   ├── Create.swift
│   │   │   │   ├── Delete.swift
│   │   │   │   ├── Get.swift
│   │   │   │   ├── Images.swift
│   │   │   │   ├── IPSW.swift
│   │   │   │   ├── List.swift
│   │   │   │   ├── Logs.swift
│   │   │   │   ├── Options
│   │   │   │   │   └── FormatOption.swift
│   │   │   │   ├── Prune.swift
│   │   │   │   ├── Pull.swift
│   │   │   │   ├── Push.swift
│   │   │   │   ├── Run.swift
│   │   │   │   ├── Serve.swift
│   │   │   │   ├── Set.swift
│   │   │   │   └── Stop.swift
│   │   │   ├── ContainerRegistry
│   │   │   │   ├── ImageContainerRegistry.swift
│   │   │   │   ├── ImageList.swift
│   │   │   │   └── ImagesPrinter.swift
│   │   │   ├── Errors
│   │   │   │   └── Errors.swift
│   │   │   ├── FileSystem
│   │   │   │   ├── Home.swift
│   │   │   │   ├── Settings.swift
│   │   │   │   ├── VMConfig.swift
│   │   │   │   ├── VMDirectory.swift
│   │   │   │   └── VMLocation.swift
│   │   │   ├── LumeController.swift
│   │   │   ├── Main.swift
│   │   │   ├── Server
│   │   │   │   ├── Handlers.swift
│   │   │   │   ├── HTTP.swift
│   │   │   │   ├── Requests.swift
│   │   │   │   ├── Responses.swift
│   │   │   │   └── Server.swift
│   │   │   ├── Utils
│   │   │   │   ├── CommandRegistry.swift
│   │   │   │   ├── CommandUtils.swift
│   │   │   │   ├── Logger.swift
│   │   │   │   ├── NetworkUtils.swift
│   │   │   │   ├── Path.swift
│   │   │   │   ├── ProcessRunner.swift
│   │   │   │   ├── ProgressLogger.swift
│   │   │   │   ├── String.swift
│   │   │   │   └── Utils.swift
│   │   │   ├── Virtualization
│   │   │   │   ├── DarwinImageLoader.swift
│   │   │   │   ├── DHCPLeaseParser.swift
│   │   │   │   ├── ImageLoaderFactory.swift
│   │   │   │   └── VMVirtualizationService.swift
│   │   │   ├── VM
│   │   │   │   ├── DarwinVM.swift
│   │   │   │   ├── LinuxVM.swift
│   │   │   │   ├── VM.swift
│   │   │   │   ├── VMDetails.swift
│   │   │   │   ├── VMDetailsPrinter.swift
│   │   │   │   ├── VMDisplayResolution.swift
│   │   │   │   └── VMFactory.swift
│   │   │   └── VNC
│   │   │       ├── PassphraseGenerator.swift
│   │   │       └── VNCService.swift
│   │   └── tests
│   │       ├── Mocks
│   │       │   ├── MockVM.swift
│   │       │   ├── MockVMVirtualizationService.swift
│   │       │   └── MockVNCService.swift
│   │       ├── VM
│   │       │   └── VMDetailsPrinterTests.swift
│   │       ├── VMTests.swift
│   │       ├── VMVirtualizationServiceTests.swift
│   │       └── VNCServiceTests.swift
│   ├── lumier
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   └── src
│   │       ├── bin
│   │       │   └── entry.sh
│   │       ├── config
│   │       │   └── constants.sh
│   │       ├── hooks
│   │       │   └── on-logon.sh
│   │       └── lib
│   │           ├── utils.sh
│   │           └── vm.sh
│   ├── python
│   │   ├── agent
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── agent
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── adapters
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cua_adapter.py
│   │   │   │   │   ├── huggingfacelocal_adapter.py
│   │   │   │   │   ├── human_adapter.py
│   │   │   │   │   ├── mlxvlm_adapter.py
│   │   │   │   │   └── models
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── generic.py
│   │   │   │   │       ├── internvl.py
│   │   │   │   │       ├── opencua.py
│   │   │   │   │       └── qwen2_5_vl.py
│   │   │   │   ├── agent.py
│   │   │   │   ├── callbacks
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── budget_manager.py
│   │   │   │   │   ├── image_retention.py
│   │   │   │   │   ├── logging.py
│   │   │   │   │   ├── operator_validator.py
│   │   │   │   │   ├── pii_anonymization.py
│   │   │   │   │   ├── prompt_instructions.py
│   │   │   │   │   ├── telemetry.py
│   │   │   │   │   └── trajectory_saver.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── computers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cua.py
│   │   │   │   │   └── custom.py
│   │   │   │   ├── decorators.py
│   │   │   │   ├── human_tool
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── server.py
│   │   │   │   │   └── ui.py
│   │   │   │   ├── integrations
│   │   │   │   │   └── hud
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── agent.py
│   │   │   │   │       └── proxy.py
│   │   │   │   ├── loops
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── anthropic.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── composed_grounded.py
│   │   │   │   │   ├── gelato.py
│   │   │   │   │   ├── gemini.py
│   │   │   │   │   ├── generic_vlm.py
│   │   │   │   │   ├── glm45v.py
│   │   │   │   │   ├── gta1.py
│   │   │   │   │   ├── holo.py
│   │   │   │   │   ├── internvl.py
│   │   │   │   │   ├── model_types.csv
│   │   │   │   │   ├── moondream3.py
│   │   │   │   │   ├── omniparser.py
│   │   │   │   │   ├── openai.py
│   │   │   │   │   ├── opencua.py
│   │   │   │   │   ├── uiins.py
│   │   │   │   │   ├── uitars.py
│   │   │   │   │   └── uitars2.py
│   │   │   │   ├── proxy
│   │   │   │   │   ├── examples.py
│   │   │   │   │   └── handlers.py
│   │   │   │   ├── responses.py
│   │   │   │   ├── tools
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── browser_tool.py
│   │   │   │   ├── types.py
│   │   │   │   └── ui
│   │   │   │       ├── __init__.py
│   │   │   │       ├── __main__.py
│   │   │   │       └── gradio
│   │   │   │           ├── __init__.py
│   │   │   │           ├── app.py
│   │   │   │           └── ui_components.py
│   │   │   ├── benchmarks
│   │   │   │   ├── .gitignore
│   │   │   │   ├── contrib.md
│   │   │   │   ├── interactive.py
│   │   │   │   ├── models
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   └── gta1.py
│   │   │   │   ├── README.md
│   │   │   │   ├── ss-pro.py
│   │   │   │   ├── ss-v2.py
│   │   │   │   └── utils.py
│   │   │   ├── example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_computer_agent.py
│   │   ├── bench-ui
│   │   │   ├── bench_ui
│   │   │   │   ├── __init__.py
│   │   │   │   ├── api.py
│   │   │   │   └── child.py
│   │   │   ├── examples
│   │   │   │   ├── folder_example.py
│   │   │   │   ├── gui
│   │   │   │   │   ├── index.html
│   │   │   │   │   ├── logo.svg
│   │   │   │   │   └── styles.css
│   │   │   │   ├── output_overlay.png
│   │   │   │   └── simple_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       └── test_port_detection.py
│   │   ├── computer
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer
│   │   │   │   ├── __init__.py
│   │   │   │   ├── computer.py
│   │   │   │   ├── diorama_computer.py
│   │   │   │   ├── helpers.py
│   │   │   │   ├── interface
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── logger.py
│   │   │   │   ├── models.py
│   │   │   │   ├── providers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cloud
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── docker
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── lume
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── lume_api.py
│   │   │   │   │   ├── lumier
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── types.py
│   │   │   │   │   └── winsandbox
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── provider.py
│   │   │   │   │       └── setup_script.ps1
│   │   │   │   ├── tracing_wrapper.py
│   │   │   │   ├── tracing.py
│   │   │   │   ├── ui
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   └── gradio
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── app.py
│   │   │   │   └── utils.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_computer.py
│   │   ├── computer-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── browser.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── diorama
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── diorama_computer.py
│   │   │   │   │   ├── diorama.py
│   │   │   │   │   ├── draw.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── safezone.py
│   │   │   │   ├── handlers
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── main.py
│   │   │   │   ├── server.py
│   │   │   │   ├── utils
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── wallpaper.py
│   │   │   │   └── watchdog.py
│   │   │   ├── examples
│   │   │   │   ├── __init__.py
│   │   │   │   └── usage_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   ├── run_server.py
│   │   │   ├── test_connection.py
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_server.py
│   │   ├── core
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── core
│   │   │   │   ├── __init__.py
│   │   │   │   └── telemetry
│   │   │   │       ├── __init__.py
│   │   │   │       └── posthog.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_telemetry.py
│   │   ├── mcp-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── build-extension.py
│   │   │   ├── CONCURRENT_SESSIONS.md
│   │   │   ├── desktop-extension
│   │   │   │   ├── cua-extension.mcpb
│   │   │   │   ├── desktop_extension.png
│   │   │   │   ├── manifest.json
│   │   │   │   ├── README.md
│   │   │   │   ├── requirements.txt
│   │   │   │   ├── run_server.sh
│   │   │   │   └── setup.py
│   │   │   ├── mcp_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── server.py
│   │   │   │   └── session_manager.py
│   │   │   ├── pdm.lock
│   │   │   ├── pyproject.toml
│   │   │   ├── QUICK_TEST_COMMANDS.sh
│   │   │   ├── quick_test_local_option.py
│   │   │   ├── README.md
│   │   │   ├── scripts
│   │   │   │   ├── install_mcp_server.sh
│   │   │   │   └── start_mcp_server.sh
│   │   │   ├── test_mcp_server_local_option.py
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_mcp_server.py
│   │   ├── pylume
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_pylume.py
│   │   └── som
│   │       ├── .bumpversion.cfg
│   │       ├── LICENSE
│   │       ├── poetry.toml
│   │       ├── pyproject.toml
│   │       ├── README.md
│   │       ├── som
│   │       │   ├── __init__.py
│   │       │   ├── detect.py
│   │       │   ├── detection.py
│   │       │   ├── models.py
│   │       │   ├── ocr.py
│   │       │   ├── util
│   │       │   │   └── utils.py
│   │       │   └── visualization.py
│   │       └── tests
│   │           ├── conftest.py
│   │           └── test_omniparser.py
│   ├── qemu-docker
│   │   ├── linux
│   │   │   ├── Dockerfile
│   │   │   ├── README.md
│   │   │   └── src
│   │   │       ├── entry.sh
│   │   │       └── vm
│   │   │           ├── image
│   │   │           │   └── README.md
│   │   │           └── setup
│   │   │               ├── install.sh
│   │   │               ├── setup-cua-server.sh
│   │   │               └── setup.sh
│   │   ├── README.md
│   │   └── windows
│   │       ├── Dockerfile
│   │       ├── README.md
│   │       └── src
│   │           ├── entry.sh
│   │           └── vm
│   │               ├── image
│   │               │   └── README.md
│   │               └── setup
│   │                   ├── install.bat
│   │                   ├── on-logon.ps1
│   │                   ├── setup-cua-server.ps1
│   │                   ├── setup-utils.psm1
│   │                   └── setup.ps1
│   ├── typescript
│   │   ├── .gitignore
│   │   ├── .nvmrc
│   │   ├── agent
│   │   │   ├── examples
│   │   │   │   ├── playground-example.html
│   │   │   │   └── README.md
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── client.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   └── client.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── computer
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── computer
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── providers
│   │   │   │   │   │   ├── base.ts
│   │   │   │   │   │   ├── cloud.ts
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── base.ts
│   │   │   │   │   ├── factory.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── linux.ts
│   │   │   │   │   ├── macos.ts
│   │   │   │   │   └── windows.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   ├── computer
│   │   │   │   │   └── cloud.test.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── factory.test.ts
│   │   │   │   │   ├── index.test.ts
│   │   │   │   │   ├── linux.test.ts
│   │   │   │   │   ├── macos.test.ts
│   │   │   │   │   └── windows.test.ts
│   │   │   │   └── setup.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── core
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── index.ts
│   │   │   │   └── telemetry
│   │   │   │       ├── clients
│   │   │   │       │   ├── index.ts
│   │   │   │       │   └── posthog.ts
│   │   │   │       └── index.ts
│   │   │   ├── tests
│   │   │   │   └── telemetry.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── cua-cli
│   │   │   ├── .gitignore
│   │   │   ├── .prettierrc
│   │   │   ├── bun.lock
│   │   │   ├── CLAUDE.md
│   │   │   ├── index.ts
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── auth.ts
│   │   │   │   ├── cli.ts
│   │   │   │   ├── commands
│   │   │   │   │   ├── auth.ts
│   │   │   │   │   └── sandbox.ts
│   │   │   │   ├── config.ts
│   │   │   │   ├── http.ts
│   │   │   │   ├── storage.ts
│   │   │   │   └── util.ts
│   │   │   └── tsconfig.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── pnpm-workspace.yaml
│   │   └── README.md
│   └── xfce
│       ├── .dockerignore
│       ├── .gitignore
│       ├── Development.md
│       ├── Dockerfile
│       ├── Dockerfile.dev
│       ├── README.md
│       └── src
│           ├── scripts
│           │   ├── resize-display.sh
│           │   ├── start-computer-server.sh
│           │   ├── start-novnc.sh
│           │   ├── start-vnc.sh
│           │   └── xstartup.sh
│           ├── supervisor
│           │   └── supervisord.conf
│           └── xfce-config
│               ├── helpers.rc
│               ├── xfce4-power-manager.xml
│               └── xfce4-session.xml
├── LICENSE.md
├── Makefile
├── notebooks
│   ├── agent_nb.ipynb
│   ├── blog
│   │   ├── build-your-own-operator-on-macos-1.ipynb
│   │   └── build-your-own-operator-on-macos-2.ipynb
│   ├── composite_agents_docker_nb.ipynb
│   ├── computer_nb.ipynb
│   ├── computer_server_nb.ipynb
│   ├── customizing_computeragent.ipynb
│   ├── eval_osworld.ipynb
│   ├── ollama_nb.ipynb
│   ├── README.md
│   ├── sota_hackathon_cloud.ipynb
│   └── sota_hackathon.ipynb
├── package-lock.json
├── package.json
├── pnpm-lock.yaml
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── scripts
│   ├── install-cli.ps1
│   ├── install-cli.sh
│   ├── playground-docker.sh
│   ├── playground.sh
│   ├── run-docker-dev.sh
│   └── typescript-typecheck.js
├── TESTING.md
├── tests
│   ├── agent_loop_testing
│   │   ├── agent_test.py
│   │   └── README.md
│   ├── pytest.ini
│   ├── shell_cmd.py
│   ├── test_files.py
│   ├── test_mcp_server_session_management.py
│   ├── test_mcp_server_streaming.py
│   ├── test_shell_bash.py
│   ├── test_telemetry.py
│   ├── test_tracing.py
│   ├── test_venv.py
│   └── test_watchdog.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/libs/python/agent/agent/loops/opencua.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | OpenCUA agent loop implementation for click prediction using litellm.acompletion
  3 | Based on OpenCUA model for GUI grounding tasks.
  4 | """
  5 | 
  6 | import asyncio
  7 | import base64
  8 | import json
  9 | import math
 10 | import re
 11 | import uuid
 12 | from io import BytesIO
 13 | from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
 14 | 
 15 | import litellm
 16 | from PIL import Image
 17 | 
 18 | from ..decorators import register_agent
 19 | from ..loops.base import AsyncAgentConfig
 20 | from ..types import AgentCapability, AgentResponse, Messages, Tools
 21 | from .composed_grounded import ComposedGroundedConfig
 22 | 
 23 | 
 24 | def extract_coordinates_from_pyautogui(text: str) -> Optional[Tuple[int, int]]:
 25 |     """Extract coordinates from pyautogui.click(x=..., y=...) format."""
 26 |     try:
 27 |         # Look for pyautogui.click(x=1443, y=343) pattern
 28 |         pattern = r"pyautogui\.click\(x=(\d+),\s*y=(\d+)\)"
 29 |         match = re.search(pattern, text)
 30 |         if match:
 31 |             x, y = int(match.group(1)), int(match.group(2))
 32 |             return (x, y)
 33 |         return None
 34 |     except Exception:
 35 |         return None
 36 | 
 37 | 
 38 | @register_agent(models=r"(?i).*OpenCUA.*")
 39 | class OpenCUAConfig(ComposedGroundedConfig):
 40 |     """OpenCUA agent configuration implementing AsyncAgentConfig protocol for click prediction."""
 41 | 
 42 |     def __init__(self):
 43 |         super().__init__()
 44 |         self.current_model = None
 45 |         self.last_screenshot_b64 = None
 46 | 
 47 |     async def predict_step(
 48 |         self,
 49 |         messages: List[Dict[str, Any]],
 50 |         model: str,
 51 |         tools: Optional[List[Dict[str, Any]]] = None,
 52 |         max_retries: Optional[int] = None,
 53 |         stream: bool = False,
 54 |         computer_handler=None,
 55 |         _on_api_start=None,
 56 |         _on_api_end=None,
 57 |         _on_usage=None,
 58 |         _on_screenshot=None,
 59 |         **kwargs,
 60 |     ) -> Dict[str, Any]:
 61 |         """Fallback to a self-composed model"""
 62 |         return await super().predict_step(
 63 |             messages=messages,
 64 |             model=f"{model}+{model}",
 65 |             tools=tools,
 66 |             max_retries=max_retries,
 67 |             stream=stream,
 68 |             computer_handler=computer_handler,
 69 |             _on_api_start=_on_api_start,
 70 |             _on_api_end=_on_api_end,
 71 |             _on_usage=_on_usage,
 72 |             _on_screenshot=_on_screenshot,
 73 |             **kwargs,
 74 |         )
 75 | 
 76 |     async def predict_click(
 77 |         self, model: str, image_b64: str, instruction: str, **kwargs
 78 |     ) -> Optional[Tuple[int, int]]:
 79 |         """
 80 |         Predict click coordinates using OpenCUA model via litellm.acompletion.
 81 | 
 82 |         Args:
 83 |             model: The OpenCUA model name
 84 |             image_b64: Base64 encoded image
 85 |             instruction: Instruction for where to click
 86 | 
 87 |         Returns:
 88 |             Tuple of (x, y) coordinates or None if prediction fails
 89 |         """
 90 |         # Prepare system message
 91 |         system_prompt = (
 92 |             "You are a GUI agent. You are given a task and a screenshot of the screen. "
 93 |             "You need to perform a series of pyautogui actions to complete the task."
 94 |         )
 95 | 
 96 |         system_message = {"role": "system", "content": system_prompt}
 97 | 
 98 |         # Prepare user message with image and instruction
 99 |         user_message = {
100 |             "role": "user",
101 |             "content": [
102 |                 {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}},
103 |                 {"type": "text", "text": f"Click on {instruction}"},
104 |             ],
105 |         }
106 | 
107 |         # Prepare API call kwargs
108 |         api_kwargs = {
109 |             "model": model,
110 |             "messages": [system_message, user_message],
111 |             "max_new_tokens": 2056,
112 |             "temperature": 0,
113 |             **kwargs,
114 |         }
115 | 
116 |         # Use liteLLM acompletion
117 |         response = await litellm.acompletion(**api_kwargs)
118 | 
119 |         # Extract response text
120 |         output_text = response.choices[0].message.content
121 |         # print(output_text)
122 | 
123 |         # Extract coordinates from pyautogui format
124 |         coordinates = extract_coordinates_from_pyautogui(output_text)
125 | 
126 |         return coordinates
127 | 
128 |     def get_capabilities(self) -> List[AgentCapability]:
129 |         """Return the capabilities supported by this agent."""
130 |         return ["click"]
131 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/customizing-computeragent.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Customize ComputerAgent
  3 | ---
  4 | 
  5 | <Callout>
  6 |   A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/customizing_computeragent.ipynb" target="_blank">Jupyter Notebook</a> is available for this documentation.
  7 | </Callout>
  8 | 
  9 | The `ComputerAgent` interface provides an easy proxy to any computer-using model configuration, and it is a powerful framework for extending and building your own agentic systems.
 10 | 
 11 | This guide shows four proven ways to increase capabilities and success rate:
 12 | 
 13 | - 1 — Simple: Prompt engineering
 14 | - 2 — Easy: Tools
 15 | - 3 — Intermediate: Callbacks
 16 | - 4 — Expert: Custom `@register_agent`
 17 | 
 18 | ## 1) Simple: Prompt engineering
 19 | 
 20 | Provide guiding instructions to shape behavior. `ComputerAgent` accepts an optional `instructions: str | None` which acts like a system-style preface. Internally, this uses a callback that pre-pends a user message before each LLM call.
 21 | 
 22 | ```python
 23 | from agent.agent import ComputerAgent
 24 | 
 25 | agent = ComputerAgent(
 26 |     model="openai/computer-use-preview",
 27 |     tools=[computer],
 28 |     instructions=(
 29 |         "You are a meticulous software operator. Prefer safe, deterministic actions. "
 30 |         "Always confirm via on-screen text before proceeding."
 31 |     ),
 32 | )
 33 | ```
 34 | 
 35 | ## 2) Easy: Tools
 36 | 
 37 | Expose deterministic capabilities as tools (Python functions or custom computer handlers). The agent will call them when appropriate.
 38 | 
 39 | ```python
 40 | def calculate_percentage(numerator: float, denominator: float) -> str:
 41 |     """Calculate percentage as a string.
 42 | 
 43 |     Args:
 44 |         numerator: Numerator value
 45 |         denominator: Denominator value
 46 |     Returns:
 47 |         A formatted percentage string (e.g., '75.00%').
 48 |     """
 49 |     if denominator == 0:
 50 |         return "0.00%"
 51 |     return f"{(numerator/denominator)*100:.2f}%"
 52 | 
 53 | agent = ComputerAgent(
 54 |     model="openai/computer-use-preview",
 55 |     tools=[computer, calculate_percentage],
 56 | )
 57 | ```
 58 | 
 59 | - See `docs/agent-sdk/custom-tools` for authoring function tools.
 60 | - See `docs/agent-sdk/custom-computer-handlers` for building full computer interfaces.
 61 | 
 62 | ## 3) Intermediate: Callbacks
 63 | 
 64 | Callbacks provide lifecycle hooks to preprocess messages, postprocess outputs, record trajectories, manage costs, and more.
 65 | 
 66 | ```python
 67 | from agent.callbacks import ImageRetentionCallback, TrajectorySaverCallback, BudgetManagerCallback
 68 | 
 69 | agent = ComputerAgent(
 70 |     model="cua/anthropic/claude-sonnet-4.5",
 71 |     tools=[computer],
 72 |     callbacks=[
 73 |         ImageRetentionCallback(only_n_most_recent_images=3),
 74 |         TrajectorySaverCallback("./trajectories"),
 75 |         BudgetManagerCallback(max_budget=10.0, raise_error=True),
 76 |     ],
 77 | )
 78 | ```
 79 | 
 80 | - Browse implementations in `libs/python/agent/agent/loops/`.
 81 | 
 82 | ## 4) Expert: Custom `@register_agent`
 83 | 
 84 | Build your own agent configuration class to control prompting, message shaping, and tool handling. This is the most flexible option for specialized domains.
 85 | 
 86 | - Register your own `model=...` loop using `@register_agent`
 87 | - Browse implementations in `libs/python/agent/agent/loops/`.
 88 | - Implement `predict_step()` (and optionally `predict_click()`) and return the standardized output schema.
 89 | 
 90 | ```python
 91 | from agent.decorators import register_agent
 92 | 
 93 | @register_agent(models=r".*my-special-model.*", priority=10)
 94 | class MyCustomAgentConfig:
 95 |     async def predict_step(self, messages, model, tools, **kwargs):
 96 |         # 1) Format messages for your provider
 97 |         # 2) Call provider
 98 |         # 3) Convert responses to the agent output schema
 99 |         return {"output": [], "usage": {}}
100 | 
101 |     async def predict_click(self, model, image_b64, instruction):
102 |         # Optional: click-only capability
103 |         return None
104 | 
105 |     def get_capabilities(self):
106 |         return ["step"]
107 | ```
108 | 
109 | ## HUD integration (optional)
110 | 
111 | When using the HUD evaluation integration (`agent/integrations/hud/`), you can pass `instructions`, `tools`, and `callbacks` directly
112 | 
113 | ```python
114 | from agent.integrations.hud import run_single_task
115 | 
116 | await run_single_task(
117 |     dataset="username/dataset-name",
118 |     model="openai/computer-use-preview",
119 |     instructions="Operate carefully. Always verify on-screen text before actions.",
120 |     # tools=[your_custom_function],
121 |     # callbacks=[YourCustomCallback()],
122 | )
123 | ```
124 | 
```

--------------------------------------------------------------------------------
/docs/src/components/iou.tsx:
--------------------------------------------------------------------------------

```typescript
  1 | 'use client';
  2 | import React, { useRef, useEffect, useState, useCallback } from 'react';
  3 | 
  4 | /**
  5 |  * Represents a rectangle with position, dimensions, styling, and identification
  6 |  */
  7 | interface Rectangle {
  8 |   /** The x-coordinate of the rectangle's left edge */
  9 |   left: number;
 10 |   /** The y-coordinate of the rectangle's top edge */
 11 |   top: number;
 12 |   /** The width of the rectangle */
 13 |   width: number;
 14 |   /** The height of the rectangle */
 15 |   height: number;
 16 |   /** The fill color of the rectangle */
 17 |   fill: string;
 18 |   /** The display name of the rectangle */
 19 |   name: string;
 20 | }
 21 | 
 22 | /**
 23 |  * Props for the IOU component
 24 |  */
 25 | interface IOUProps {
 26 |   /** The title to display above the visualization */
 27 |   title: string;
 28 |   /** The description text to display below the IOU value */
 29 |   description: string;
 30 |   /** The first rectangle for IOU calculation */
 31 |   rect1: Rectangle;
 32 |   /** The second rectangle for IOU calculation */
 33 |   rect2: Rectangle;
 34 | }
 35 | 
 36 | /**
 37 |  * A React component that visualizes and calculates the Intersection over Union (IOU)
 38 |  * of two rectangles on a canvas
 39 |  * @param props - The component props
 40 |  * @returns The rendered IOU visualization component
 41 |  */
 42 | export default function IOU({ title, description, rect1, rect2 }: IOUProps) {
 43 |   const canvasRef = useRef<HTMLCanvasElement>(null);
 44 |   const [actualIOU, setActualIOU] = useState<number>(0);
 45 | 
 46 |   /**
 47 |    * Converts a rectangle to a bounding box with left, right, top, and bottom coordinates
 48 |    * @param rect - The rectangle to convert
 49 |    * @returns An object containing the bounding box coordinates
 50 |    */
 51 |   const getBbox = (rect: Rectangle) => ({
 52 |     left: rect.left,
 53 |     right: rect.left + rect.width,
 54 |     top: rect.top,
 55 |     bottom: rect.top + rect.height,
 56 |   });
 57 | 
 58 |   /**
 59 |    * Calculates the intersection area between two bounding boxes
 60 |    * @param bbox1 - The first bounding box
 61 |    * @param bbox2 - The second bounding box
 62 |    * @returns The area of intersection between the two bounding boxes
 63 |    */
 64 |   const calcIntersection = (bbox1: any, bbox2: any): number => {
 65 |     const x1 = Math.max(bbox1.left, bbox2.left);
 66 |     const x2 = Math.min(bbox1.right, bbox2.right);
 67 |     const y1 = Math.max(bbox1.top, bbox2.top);
 68 |     const y2 = Math.min(bbox1.bottom, bbox2.bottom);
 69 | 
 70 |     // Check if there's actually an overlap
 71 |     if (x2 <= x1 || y2 <= y1) {
 72 |       return 0;
 73 |     }
 74 | 
 75 |     const intersection = (x2 - x1) * (y2 - y1);
 76 |     return intersection;
 77 |   };
 78 | 
 79 |   /**
 80 |    * Calculates the area of a rectangle
 81 |    * @param rect - The rectangle to calculate area for
 82 |    * @returns The area of the rectangle
 83 |    */
 84 |   const calcArea = (rect: Rectangle): number => {
 85 |     return rect.width * rect.height;
 86 |   };
 87 | 
 88 |   /**
 89 |    * Draws the rectangles on the canvas and calculates the IOU value
 90 |    */
 91 |   const drawCanvas = useCallback(() => {
 92 |     const canvas = canvasRef.current;
 93 |     if (!canvas) return;
 94 | 
 95 |     const ctx = canvas.getContext('2d');
 96 |     if (!ctx) return;
 97 | 
 98 |     // Clear canvas
 99 |     ctx.clearRect(0, 0, canvas.width, canvas.height);
100 | 
101 |     // Calculate IOU
102 |     const bbox1 = getBbox(rect1);
103 |     const bbox2 = getBbox(rect2);
104 |     const intersection = calcIntersection(bbox1, bbox2);
105 |     const union = calcArea(rect1) + calcArea(rect2) - intersection;
106 |     const iou = intersection / union;
107 |     setActualIOU(iou);
108 | 
109 |     // Draw rectangles
110 |     [rect1, rect2].forEach((rect) => {
111 |       ctx.fillStyle = rect.fill;
112 |       ctx.fillRect(rect.left, rect.top, rect.width, rect.height);
113 | 
114 |       ctx.strokeStyle = '#000';
115 |       ctx.lineWidth = 2;
116 |       ctx.strokeRect(rect.left, rect.top, rect.width, rect.height);
117 | 
118 |       ctx.fillStyle = '#000';
119 |       ctx.font = '12px';
120 |       ctx.fillText(rect.name, rect.left + 5, rect.top + 15);
121 |     });
122 |   }, [rect1, rect2]);
123 | 
124 |   useEffect(() => {
125 |     drawCanvas();
126 |   }, [drawCanvas]);
127 | 
128 |   return (
129 |     <div className="">
130 |       <h3 className="text-sm font-semibold ">{title}</h3>
131 |       <div className="flex items-start gap-6">
132 |         <div>
133 |           <canvas ref={canvasRef} width={200} height={150} className="border bg-white rounded-md" />
134 |           <div className="mt-2 text-sm">
135 |             <div className="font-mono mb-2">IOU = {actualIOU.toFixed(3)}</div>
136 |             <span className="">{description}</span>
137 |           </div>
138 |         </div>
139 |       </div>
140 |     </div>
141 |   );
142 | }
143 | 
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/providers/base.py:
--------------------------------------------------------------------------------

```python
  1 | """Base provider interface for VM backends."""
  2 | 
  3 | import abc
  4 | from enum import StrEnum
  5 | from typing import Any, AsyncContextManager, Dict, Optional
  6 | 
  7 | from .types import ListVMsResponse
  8 | 
  9 | 
 10 | class VMProviderType(StrEnum):
 11 |     """Enum of supported VM provider types."""
 12 | 
 13 |     LUME = "lume"
 14 |     LUMIER = "lumier"
 15 |     CLOUD = "cloud"
 16 |     WINSANDBOX = "winsandbox"
 17 |     DOCKER = "docker"
 18 |     UNKNOWN = "unknown"
 19 | 
 20 | 
 21 | class BaseVMProvider(AsyncContextManager):
 22 |     """Base interface for VM providers.
 23 | 
 24 |     All VM provider implementations must implement this interface.
 25 |     """
 26 | 
 27 |     @property
 28 |     @abc.abstractmethod
 29 |     def provider_type(self) -> VMProviderType:
 30 |         """Get the provider type."""
 31 |         pass
 32 | 
 33 |     @abc.abstractmethod
 34 |     async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
 35 |         """Get VM information by name.
 36 | 
 37 |         Args:
 38 |             name: Name of the VM to get information for
 39 |             storage: Optional storage path override. If provided, this will be used
 40 |                     instead of the provider's default storage path.
 41 | 
 42 |         Returns:
 43 |             Dictionary with VM information including status, IP address, etc.
 44 |         """
 45 |         pass
 46 | 
 47 |     @abc.abstractmethod
 48 |     async def list_vms(self) -> ListVMsResponse:
 49 |         """List all available VMs.
 50 | 
 51 |         Returns:
 52 |             ListVMsResponse: A list of minimal VM objects as defined in
 53 |             `computer.providers.types.MinimalVM`.
 54 |         """
 55 |         pass
 56 | 
 57 |     @abc.abstractmethod
 58 |     async def run_vm(
 59 |         self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None
 60 |     ) -> Dict[str, Any]:
 61 |         """Run a VM by name with the given options.
 62 | 
 63 |         Args:
 64 |             image: Name/tag of the image to use
 65 |             name: Name of the VM to run
 66 |             run_opts: Dictionary of run options (memory, cpu, etc.)
 67 |             storage: Optional storage path override. If provided, this will be used
 68 |                     instead of the provider's default storage path.
 69 | 
 70 |         Returns:
 71 |             Dictionary with VM run status and information
 72 |         """
 73 |         pass
 74 | 
 75 |     @abc.abstractmethod
 76 |     async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
 77 |         """Stop a VM by name.
 78 | 
 79 |         Args:
 80 |             name: Name of the VM to stop
 81 |             storage: Optional storage path override. If provided, this will be used
 82 |                     instead of the provider's default storage path.
 83 | 
 84 |         Returns:
 85 |             Dictionary with VM stop status and information
 86 |         """
 87 |         pass
 88 | 
 89 |     @abc.abstractmethod
 90 |     async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
 91 |         """Restart a VM by name.
 92 | 
 93 |         Args:
 94 |             name: Name of the VM to restart
 95 |             storage: Optional storage path override. If provided, this will be used
 96 |                     instead of the provider's default storage path.
 97 | 
 98 |         Returns:
 99 |             Dictionary with VM restart status and information
100 |         """
101 |         pass
102 | 
103 |     @abc.abstractmethod
104 |     async def update_vm(
105 |         self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None
106 |     ) -> Dict[str, Any]:
107 |         """Update VM configuration.
108 | 
109 |         Args:
110 |             name: Name of the VM to update
111 |             update_opts: Dictionary of update options (memory, cpu, etc.)
112 |             storage: Optional storage path override. If provided, this will be used
113 |                     instead of the provider's default storage path.
114 | 
115 |         Returns:
116 |             Dictionary with VM update status and information
117 |         """
118 |         pass
119 | 
120 |     @abc.abstractmethod
121 |     async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str:
122 |         """Get the IP address of a VM, waiting indefinitely until it's available.
123 | 
124 |         Args:
125 |             name: Name of the VM to get the IP for
126 |             storage: Optional storage path override. If provided, this will be used
127 |                     instead of the provider's default storage path.
128 |             retry_delay: Delay between retries in seconds (default: 2)
129 | 
130 |         Returns:
131 |             IP address of the VM when it becomes available
132 |         """
133 |         pass
134 | 
```

--------------------------------------------------------------------------------
/blog/cua-hackathon.md:
--------------------------------------------------------------------------------

```markdown
 1 | # Computer-Use Agents SOTA Challenge: Hack the North + Global Online
 2 | 
 3 | _Published on August 25, 2025 by Francesco Bonacci_
 4 | 
 5 | We’re bringing something new to [Hack the North](https://hackthenorth.com), Canada’s largest hackathon, this year: a head-to-head competition for **Computer-Use Agents** - on-site at Waterloo and a **Global online challenge**. From September 12–14, 2025, teams build on the **Cua Agent Framework** and are scored in **HUD’s OSWorld-Verified** environment to push past today’s SOTA on [OS-World](https://os-world.github.io).
 6 | 
 7 | <img src="./assets/hack-the-north.png">
 8 | 
 9 | ## Track A: On-site @ Hack the North
10 | 
11 | There’s one global leaderboard: **Cua - Best State-of-the-Art Computer-Use Agent**. Use any model setup you like (cloud or local). After projects are submitted, [HUD](https://www.hud.so) runs the official benchmark; the top team earns a **guaranteed YC partner interview (W26 batch)**. We’ll also feature winners on our blog and socials and kit the team out with swag.
12 | 
13 | ## Track B: Cua Global Online Hackathon
14 | 
15 | **Cua** and [**Ollama**](https://ollama.com) organize a global hackathon to find the **most creative uses of local and hybrid computer-use agents**. There are no geographic restrictions on who can join — this is a worldwide competition focused on **originality, impact, and inventive applications** that showcase what's possible with local and hybrid inference.
16 | 
17 | **Prizes:**
18 | 
19 | - 1st **MacBook Air M4 (or equivalent value)** + features in Cua & Ollama channels
20 | - 2nd **$500 CAD + swag**
21 | - 3rd **swag + public feature**
22 | 
23 | ---
24 | 
25 | ## How it works
26 | 
27 | Two different tracks, two different processes:
28 | 
29 | ### On-site (Track A)
30 | 
31 | Build during the weekend and submit a repo with a one-line start command. **HUD** executes your command in a clean environment and runs **OSWorld-Verified**. Scores come from official benchmark results; ties break by median, then wall-clock time, then earliest submission. Any model setup is allowed (cloud or local).
32 | 
33 | **HUD** runs official evaluations immediately after submission. Winners are announced at the **closing ceremony**.
34 | 
35 | ### Rules
36 | 
37 | - Fork and star the [Cua repo](https://github.com/trycua/cua).
38 | - Add your agent and instructions in `samples/community/hack-the-north/<YOUR_TEAM_NAME>`.
39 | - Include a README with details on the approach and any required notes.
40 | - Submit a PR.
41 | 
42 | **Deadline: Sept 15, 8:00 AM EDT**
43 | 
44 | ### Global Online (Track B)
45 | 
46 | Open to anyone, anywhere. Build on your own timeline and submit through the **Cua Discord form** by the deadline.
47 | 
48 | **Project Requirements:**
49 | 
50 | - Your agent must integrate **Cua and Ollama** in some way
51 | - Your agent must be **easily runnable by judges**
52 | 
53 | Judged by **Cua** and **Ollama** teams on:
54 | 
55 | - **Creativity (30%)** – originality, usefulness, surprise factor
56 | - **Technical Depth (30%)** – quality of engineering and agent design
57 | - **Use of Ollama (30%)** – effective integration of local/hybrid inference
58 | - **Polish (10%)** – presentation, clarity, demo readiness
59 | 
60 | ### Submission Process
61 | 
62 | Submissions will be collected via a **form link provided in the Cua Discord**. Your submission must contain:
63 | 
64 | - **GitHub repo** containing the agent source code and a clear README with instructions on how to use the agent
65 | - **Explanation** of the models and tools used, and what's local or hybrid about your design
66 | - **Short demo video** (up to two minutes)
67 | 
68 | A **commit freeze** will be used to ensure that no changes are made after the deadline. Winners will be announced after judging is complete.
69 | 
70 | **Deadline: Sept 28, 11:59 PM UTC (extended due to popular demand!)**
71 | 
72 | ---
73 | 
74 | ## Join us
75 | 
76 | Bring a team, pick a model stack, and push what agents can do on real computers. We can’t wait to see what you build at **Hack the North 2025**.
77 | 
78 | **Discord channels**
79 | 
80 | - Join the Discord first: https://discord.gg/cua-ai
81 | - **#hack-the-north (on-site):** https://discord.com/channels/1328377437301641247/1409508526774157342
82 | - **#global-online (Ollama × Cua):** https://discord.com/channels/1328377437301641247/1409518100491145226
83 | 
84 | **Contact**  
85 | Questions on Hack the North? Email **[email protected]**.
86 | 
87 | _P.S. If you’re planning ahead, start with the Cua Agent Framework and OSWorld-Verified docs at cua.ai/docs; we’ll share office-hour times in both Discord channels._
88 | 
```

--------------------------------------------------------------------------------
/.github/workflows/bump-version.yml:
--------------------------------------------------------------------------------

```yaml
  1 | name: Bump Version & Publish
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       service:
  7 |         description: "Service/Package to bump"
  8 |         required: true
  9 |         type: choice
 10 |         options:
 11 |           - cua-agent
 12 |           - cua-computer
 13 |           - cua-computer-server
 14 |           - cua-core
 15 |           - cua-mcp-server
 16 |           - cua-som
 17 |           - pylume
 18 |       bump_type:
 19 |         description: "Version bump type"
 20 |         required: true
 21 |         type: choice
 22 |         options:
 23 |           - patch
 24 |           - minor
 25 |           - major
 26 | 
 27 | permissions:
 28 |   contents: write
 29 | 
 30 | jobs:
 31 |   bump-version:
 32 |     runs-on: ubuntu-latest
 33 |     outputs:
 34 |       agent_version: ${{ steps.agent_version.outputs.version }}
 35 |       computer_version: ${{ steps.computer_version.outputs.version }}
 36 |     steps:
 37 |       - name: Set package directory
 38 |         id: package
 39 |         run: |
 40 |           case "${{ inputs.service }}" in
 41 |             "cua-agent")
 42 |               echo "directory=libs/python/agent" >> $GITHUB_OUTPUT
 43 |               ;;
 44 |             "cua-computer")
 45 |               echo "directory=libs/python/computer" >> $GITHUB_OUTPUT
 46 |               ;;
 47 |             "cua-computer-server")
 48 |               echo "directory=libs/python/computer-server" >> $GITHUB_OUTPUT
 49 |               ;;
 50 |             "cua-core")
 51 |               echo "directory=libs/python/core" >> $GITHUB_OUTPUT
 52 |               ;;
 53 |             "cua-mcp-server")
 54 |               echo "directory=libs/python/mcp-server" >> $GITHUB_OUTPUT
 55 |               ;;
 56 |             "cua-som")
 57 |               echo "directory=libs/python/som" >> $GITHUB_OUTPUT
 58 |               ;;
 59 |             "pylume")
 60 |               echo "directory=libs/python/pylume" >> $GITHUB_OUTPUT
 61 |               ;;
 62 |             *)
 63 |               echo "Unknown service: ${{ inputs.service }}"
 64 |               exit 1
 65 |               ;;
 66 |           esac
 67 | 
 68 |       - name: Checkout repository
 69 |         uses: actions/checkout@v4
 70 |         with:
 71 |           fetch-depth: 0
 72 |           token: ${{ secrets.GITHUB_TOKEN }}
 73 | 
 74 |       - name: Set up Python
 75 |         uses: actions/setup-python@v5
 76 |         with:
 77 |           python-version: "3.11"
 78 | 
 79 |       - name: Install bump2version
 80 |         run: pip install bump2version
 81 | 
 82 |       - name: Configure Git
 83 |         run: |
 84 |           git config user.name "github-actions[bot]"
 85 |           git config user.email "github-actions[bot]@users.noreply.github.com"
 86 | 
 87 |       - name: Run bump2version
 88 |         run: |
 89 |           cd ${{ steps.package.outputs.directory }}
 90 |           bump2version ${{ inputs.bump_type }}
 91 | 
 92 |       - name: Also bump cua-agent
 93 |         if: ${{ inputs.service == 'cua-computer' }}
 94 |         run: |
 95 |           cd libs/python/agent
 96 |           bump2version ${{ inputs.bump_type }}
 97 | 
 98 |       - name: Capture bumped agent version
 99 |         if: ${{ inputs.service == 'cua-agent' || inputs.service == 'cua-computer' }}
100 |         id: agent_version
101 |         run: |
102 |           cd libs/python/agent
103 |           VERSION=$(python -c "import tomllib; from pathlib import Path; data = tomllib.loads(Path('pyproject.toml').read_text()); print(data['project']['version'])")
104 |           echo "Agent version: $VERSION"
105 |           echo "version=$VERSION" >> "$GITHUB_OUTPUT"
106 | 
107 |       - name: Capture bumped computer version
108 |         if: ${{ inputs.service == 'cua-computer' }}
109 |         id: computer_version
110 |         run: |
111 |           cd libs/python/computer
112 |           VERSION=$(python -c "import tomllib; from pathlib import Path; data = tomllib.loads(Path('pyproject.toml').read_text()); print(data['project']['version'])")
113 |           echo "Computer version: $VERSION"
114 |           echo "version=$VERSION" >> "$GITHUB_OUTPUT"
115 | 
116 |       - name: Push changes
117 |         run: |
118 |           git push origin main --follow-tags
119 | 
120 |   publish-computer:
121 |     needs: bump-version
122 |     if: ${{ inputs.service == 'cua-computer' }}
123 |     uses: ./.github/workflows/pypi-publish-computer.yml
124 |     with:
125 |       version: ${{ needs.bump-version.outputs.computer_version }}
126 |     secrets: inherit
127 | 
128 |   publish-agent:
129 |     needs: [bump-version, publish-computer]
130 |     if: ${{ always() && (inputs.service == 'cua-agent' || inputs.service == 'cua-computer') && needs.bump-version.result == 'success' && (inputs.service == 'cua-agent' || needs.publish-computer.result == 'success') }}
131 |     uses: ./.github/workflows/pypi-publish-agent.yml
132 |     with:
133 |       version: ${{ needs.bump-version.outputs.agent_version }}
134 |     secrets: inherit
135 | 
```

--------------------------------------------------------------------------------
/examples/computer_examples.py:
--------------------------------------------------------------------------------

```python
  1 | import asyncio
  2 | import os
  3 | import sys
  4 | import traceback
  5 | from pathlib import Path
  6 | 
  7 | # Load environment variables from .env file
  8 | project_root = Path(__file__).parent.parent
  9 | env_file = project_root / ".env"
 10 | print(f"Loading environment from: {env_file}")
 11 | from dotenv import load_dotenv
 12 | 
 13 | load_dotenv(env_file)
 14 | 
 15 | # Add paths to sys.path if needed
 16 | pythonpath = os.environ.get("PYTHONPATH", "")
 17 | for path in pythonpath.split(":"):
 18 |     if path and path not in sys.path:
 19 |         sys.path.insert(0, path)  # Insert at beginning to prioritize
 20 |         print(f"Added to sys.path: {path}")
 21 | 
 22 | from computer.computer import Computer
 23 | from computer.logger import LogLevel
 24 | from computer.providers.base import VMProviderType
 25 | 
 26 | 
 27 | async def main():
 28 |     try:
 29 |         print("\n=== Using direct initialization ===")
 30 | 
 31 |         # Create a local macOS computer
 32 |         computer = Computer(
 33 |             display="1024x768",
 34 |             memory="8GB",
 35 |             cpu="4",
 36 |             os_type="macos",
 37 |             name="macos",
 38 |             verbosity=LogLevel.VERBOSE,
 39 |             provider_type=VMProviderType.LUME,
 40 |             storage="/Users/<USER>/repos/trycua/computer/examples/storage",
 41 |             shared_directories=["/Users/<USER>/repos/trycua/computer/examples/shared"],
 42 |             ephemeral=False,
 43 |         )
 44 | 
 45 |         # Create a remote Linux computer with Cua
 46 |         # computer = Computer(
 47 |         #     os_type="linux",
 48 |         #     api_key=os.getenv("CUA_API_KEY"),
 49 |         #     name=os.getenv("CONTAINER_NAME"),
 50 |         #     provider_type=VMProviderType.CLOUD,
 51 |         # )
 52 | 
 53 |         try:
 54 |             # Run the computer with default parameters
 55 |             await computer.run()
 56 | 
 57 |             screenshot = await computer.interface.screenshot()
 58 | 
 59 |             # Create output directory if it doesn't exist
 60 |             output_dir = Path("./output")
 61 |             output_dir.mkdir(exist_ok=True)
 62 | 
 63 |             screenshot_path = output_dir / "screenshot.png"
 64 |             with open(screenshot_path, "wb") as f:
 65 |                 f.write(screenshot)
 66 |             print(f"Screenshot saved to: {screenshot_path.absolute()}")
 67 | 
 68 |             # await computer.interface.hotkey("command", "space")
 69 | 
 70 |             # res = await computer.interface.run_command("touch ./Downloads/empty_file")
 71 |             # print(f"Run command result: {res}")
 72 | 
 73 |             accessibility_tree = await computer.interface.get_accessibility_tree()
 74 |             print(f"Accessibility tree: {accessibility_tree}")
 75 | 
 76 |             # Screen Actions Examples
 77 |             # print("\n===  Screen Actions ===")
 78 |             # screenshot = await computer.interface.screenshot()
 79 |             # with open("screenshot_direct.png", "wb") as f:
 80 |             #     f.write(screenshot)
 81 | 
 82 |             screen_size = await computer.interface.get_screen_size()
 83 |             print(f"Screen size: {screen_size}")
 84 | 
 85 |             # Demonstrate coordinate conversion
 86 |             center_x, center_y = 733, 736
 87 |             print(f"Center in screen coordinates: ({center_x}, {center_y})")
 88 | 
 89 |             screenshot_center = await computer.to_screenshot_coordinates(center_x, center_y)
 90 |             print(f"Center in screenshot coordinates: {screenshot_center}")
 91 | 
 92 |             screen_center = await computer.to_screen_coordinates(*screenshot_center)
 93 |             print(f"Back to screen coordinates: {screen_center}")
 94 | 
 95 |             # Mouse Actions Examples
 96 |             print("\n=== Mouse Actions ===")
 97 |             await computer.interface.move_cursor(100, 100)
 98 |             await computer.interface.left_click()
 99 |             await computer.interface.right_click(300, 300)
100 |             await computer.interface.double_click(400, 400)
101 | 
102 |             # Keyboard Actions Examples
103 |             print("\n=== Keyboard Actions ===")
104 |             await computer.interface.type_text("Hello, World!")
105 |             await computer.interface.press_key("enter")
106 | 
107 |             # Clipboard Actions Examples
108 |             print("\n=== Clipboard Actions ===")
109 |             await computer.interface.set_clipboard("Test clipboard")
110 |             content = await computer.interface.copy_to_clipboard()
111 |             print(f"Clipboard content: {content}")
112 | 
113 |         finally:
114 |             # Important to clean up resources
115 |             await computer.stop()
116 |     except Exception as e:
117 |         print(f"Error in main: {e}")
118 |         traceback.print_exc()
119 | 
120 | 
121 | if __name__ == "__main__":
122 |     asyncio.run(main())
123 | 
```

--------------------------------------------------------------------------------
/libs/lume/src/Virtualization/DHCPLeaseParser.swift:
--------------------------------------------------------------------------------

```swift
  1 | import Foundation
  2 | 
  3 | /// Represents a DHCP lease entry from the system's DHCP lease file
  4 | private struct DHCPLease {
  5 |     let macAddress: String
  6 |     let ipAddress: String
  7 |     let expirationDate: Date
  8 |     
  9 |     /// Creates a lease entry from raw DHCP lease file key-value pairs
 10 |     /// - Parameter dict: Dictionary containing the raw lease data
 11 |     /// - Returns: A DHCPLease instance if the data is valid, nil otherwise
 12 |     static func from(_ dict: [String: String]) -> DHCPLease? {
 13 |         guard let hwAddress = dict["hw_address"],
 14 |               let ipAddress = dict["ip_address"],
 15 |               let lease = dict["lease"] else {
 16 |             return nil
 17 |         }
 18 |         
 19 |         // Parse MAC address from hw_address field (format can be "1,xx:xx:xx:xx:xx:xx" or "ff,...")
 20 |         let hwParts = hwAddress.split(separator: ",")
 21 |         guard hwParts.count >= 2 else { return nil }
 22 |         
 23 |         // Get the MAC part after the prefix and normalize it
 24 |         let rawMacAddress = String(hwParts[1]).trimmingCharacters(in: .whitespaces)
 25 |         
 26 |         // Normalize the MAC address by ensuring each component is two digits
 27 |         let normalizedMacAddress = rawMacAddress.split(separator: ":")
 28 |             .map { component in
 29 |                 let hex = String(component)
 30 |                 return hex.count == 1 ? "0\(hex)" : hex
 31 |             }
 32 |             .joined(separator: ":")
 33 |         
 34 |         // Convert hex timestamp to Date
 35 |         let timestampHex = lease.trimmingCharacters(in: CharacterSet(charactersIn: "0x"))
 36 |         guard let timestamp = UInt64(timestampHex, radix: 16) else { return nil }
 37 |         let expirationDate = Date(timeIntervalSince1970: TimeInterval(timestamp))
 38 |         
 39 |         return DHCPLease(
 40 |             macAddress: normalizedMacAddress,
 41 |             ipAddress: ipAddress,
 42 |             expirationDate: expirationDate
 43 |         )
 44 |     }
 45 |     
 46 |     /// Checks if the lease is currently valid
 47 |     var isValid: Bool {
 48 |         expirationDate > Date()
 49 |     }
 50 | }
 51 | 
 52 | /// Parses DHCP lease files to retrieve IP addresses for VMs based on their MAC addresses
 53 | enum DHCPLeaseParser {
 54 |     private static let leasePath = "/var/db/dhcpd_leases"
 55 |     
 56 |     /// Retrieves the IP address for a given MAC address from the DHCP lease file
 57 |     /// - Parameter macAddress: The MAC address to look up
 58 |     /// - Returns: The IP address if found, nil otherwise
 59 |     static func getIPAddress(forMAC macAddress: String) -> String? {
 60 |         guard let leaseContents = try? String(contentsOfFile: leasePath, encoding: .utf8) else {
 61 |             return nil
 62 |         }
 63 | 
 64 |         // Normalize the input MAC address to ensure consistent format
 65 |         let normalizedMacAddress = macAddress.split(separator: ":").map { component in
 66 |             let hex = String(component)
 67 |             return hex.count == 1 ? "0\(hex)" : hex
 68 |         }.joined(separator: ":")
 69 |         
 70 |         let leases = try? parseDHCPLeases(leaseContents)
 71 |         return leases?.first { lease in 
 72 |             lease.macAddress == normalizedMacAddress
 73 |         }?.ipAddress
 74 |     }
 75 |     
 76 |     /// Parses the contents of a DHCP lease file into lease entries
 77 |     /// - Parameter contents: The raw contents of the lease file
 78 |     /// - Returns: Array of parsed lease entries
 79 |     private static func parseDHCPLeases(_ contents: String) throws -> [DHCPLease] {
 80 |         var leases: [DHCPLease] = []
 81 |         var currentLease: [String: String] = [:]
 82 |         var inLeaseBlock = false
 83 |         
 84 |         let lines = contents.components(separatedBy: .newlines)
 85 |         
 86 |         for line in lines {
 87 |             let trimmedLine = line.trimmingCharacters(in: .whitespaces)
 88 |             
 89 |             if trimmedLine == "{" {
 90 |                 inLeaseBlock = true
 91 |                 currentLease = [:]
 92 |             } else if trimmedLine == "}" {
 93 |                 if let lease = DHCPLease.from(currentLease) {
 94 |                     leases.append(lease)
 95 |                 }
 96 |                 inLeaseBlock = false
 97 |             } else if inLeaseBlock {
 98 |                 let parts = trimmedLine.split(separator: "=", maxSplits: 1)
 99 |                 if parts.count == 2 {
100 |                     let key = String(parts[0]).trimmingCharacters(in: .whitespaces)
101 |                     let value = String(parts[1]).trimmingCharacters(in: .whitespaces)
102 |                     currentLease[key] = value
103 |                 }
104 |             }
105 |         }
106 |         
107 |         return leases
108 |     }
109 | } 
```

--------------------------------------------------------------------------------
/blog/trajectory-viewer.md:
--------------------------------------------------------------------------------

```markdown
 1 | # Trajectory Viewer for Cua
 2 | 
 3 | _Published on May 13, 2025 by Dillon DuPont_
 4 | 
 5 | Don’t forget to check out [Part 1: Building your own Computer-Use Operator](build-your-own-operator-on-macos-1) and [Part 2: Using the Agent framework](build-your-own-operator-on-macos-2) for setting up your Cua environment and basic tips and tricks!
 6 | 
 7 | ## Introduction
 8 | 
 9 | Okay, so you’ve gotten your environment up and also tested a few agent runs. You’ll likely have encountered cases where your agent was successful at doing some tasks but also places where it got stuck or outright failed.
10 | Now what?
11 | If you’ve ever wondered exactly what your computer agent is doing and why it sometimes doesn’t do what you expected, then the Trajectory Viewer for Cua is here to help! Whether you’re a seasoned developer or someone who just wants to dive in and see results, this tool makes it easy to explore every step your agent takes on your screen.
12 | Plus, if you want to start thinking about generating data to train your own agentic model (we’ll cover training in an upcoming blog, so look forward to it), then our Trajectory Viewer might be for you.
13 | 
14 | ## So, what’s a “trajectory”?
15 | 
16 | Think of a trajectory as a detailed video recording of your agent’s journey:
17 | 
18 | - **Observations**: What did the agent see (the exact screen content) at each point in time?
19 | - **Actions**: What clicks, keystrokes, or commands did it perform in response?
20 | - **Decisions**: Which options did it choose, and why?
21 |   Especially for longer and more complex tasks, your agent will make multiple steps, take multiple actions, and make multiple observations. By examining this record, you can pinpoint where things go right, and more importantly, where they go wrong.
22 | 
23 | ## So, what’s Cua’s Trajectory Viewer and why use it?
24 | 
25 | The Trajectory Player for Cua is a GUI tool that helps you explore saved trajectories generated from your Cua computer agent runs. This tool provides a powerful way to:
26 | 
27 | - **Debug your agents**: See exactly what your agent saw to reproduce bugs
28 | - **Analyze failure cases**: Identify the moment when your agent went off-script
29 | - **Collect training data**: Export your trajectories for your own processing, training, and more!
30 | 
31 | The viewer allows you to see exactly what your agent observed and how it interacted with the computer all through your browser.
32 | 
33 | ## Opening Trajectory Viewer in 3 Simple Steps
34 | 
35 | 1. **Visit**: Open your browser and go to [https://cua.ai/trajectory-viewer](https://cua.ai/trajectory-viewer).
36 | 2. **Upload**: Drag and drop a trajectories folder or click Select Folder.
37 | 3. **Explore**: View your agent’s trajectories! All data stays in your browser unless you give permission otherwise.
38 | 
39 | ![Trajectory Viewer Screenshot](./assets/trajectory-viewer.jpeg)
40 | 
41 | ## Recording a Trajectory
42 | 
43 | ### Using the ComputerAgent API
44 | 
45 | Trajectories are saved by default when using the ComputerAgent API:
46 | 
47 | ```python
48 | agent.run("book a flight for me")
49 | ```
50 | 
51 | You can explicitly control trajectory saving with the `save_trajectory` parameter:
52 | 
53 | ```python
54 | from cua import ComputerAgent
55 | 
56 | agent = ComputerAgent(save_trajectory=True)
57 | agent.run("search for hotels in Boston")
58 | ```
59 | 
60 | Each trajectory folder is saved in a `trajectories` directory with a timestamp format, for example: `trajectories/20250501_222749`
61 | 
62 | ## Exploring and Analyzing Trajectories
63 | 
64 | Our Trajectory Viewer is designed to allow for thorough analysis and debugging in a friendly way. Once loaded, the viewer presents:
65 | 
66 | - **Timeline Slider**: Jump to any step in the session
67 | - **Screen Preview**: See exactly what the agent saw
68 | - **Action Details**: Review clicks, keypresses, and API calls
69 | - **Logs & Metadata**: Inspect debug logs or performance stats
70 | 
71 | Use these features to:
72 | 
73 | - Step through each action and observation; understand your agent’s decision-making
74 | - Understand why and where your agent failed
75 | - Collect insights for improving your instructions, prompts, tasks, agent, etc.
76 | 
77 | The trajectory viewer provides a visual interface for stepping through each action your agent took, making it easy to see what your agent “sees”.
78 | 
79 | ## Getting Started
80 | 
81 | Ready to see your agent in action? Head over to the Trajectory Viewer and load up your first session. Debug smarter, train faster, and stay in control (all within your browser).
82 | 
83 | Happy tinkering and Cua on!
84 | 
85 | Have questions or want to share feedback? Join our community on Discord or open an issue on GitHub.
86 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/mcp-server/installation.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Installation
  3 | ---
  4 | 
  5 | Install the package from PyPI:
  6 | 
  7 | ```bash
  8 | pip install cua-mcp-server
  9 | ```
 10 | 
 11 | This will install:
 12 | 
 13 | - The MCP server
 14 | - CUA agent and computer dependencies
 15 | - An executable `cua-mcp-server` script in your PATH
 16 | 
 17 | ## Easy Setup Script
 18 | 
 19 | If you want to simplify installation, you can use this one-liner to download and run the installation script:
 20 | 
 21 | ```bash
 22 | curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/python/mcp-server/scripts/install_mcp_server.sh | bash
 23 | ```
 24 | 
 25 | This script will:
 26 | 
 27 | - Create the ~/.cua directory if it doesn't exist
 28 | - Generate a startup script at ~/.cua/start_mcp_server.sh
 29 | - Make the script executable
 30 | - The startup script automatically manages Python virtual environments and installs/updates the cua-mcp-server package
 31 | 
 32 | You can then use the script in your MCP configuration like this:
 33 | 
 34 | ```json
 35 | {
 36 |   "mcpServers": {
 37 |     "cua-agent": {
 38 |       "command": "/bin/bash",
 39 |       "args": ["~/.cua/start_mcp_server.sh"],
 40 |       "env": {
 41 |         "CUA_MODEL_NAME": "anthropic/claude-sonnet-4-20250514",
 42 |         "ANTHROPIC_API_KEY": "your-anthropic-api-key-here"
 43 |       }
 44 |     }
 45 |   }
 46 | }
 47 | ```
 48 | 
 49 | **Important**: You must include your Anthropic API key for the MCP server to work properly.
 50 | 
 51 | ## Development Setup
 52 | 
 53 | If you're working with the CUA source code directly (like in the CUA repository), you can use the development script instead:
 54 | 
 55 | ```json
 56 | {
 57 |   "mcpServers": {
 58 |     "cua-agent": {
 59 |       "command": "/usr/bin/env",
 60 |       "args": [
 61 |         "bash",
 62 |         "-lc",
 63 |         "export CUA_MODEL_NAME='anthropic/claude-sonnet-4-20250514'; export ANTHROPIC_API_KEY='your-anthropic-api-key-here'; /path/to/cua/libs/python/mcp-server/scripts/start_mcp_server.sh"
 64 |       ]
 65 |     }
 66 |   }
 67 | }
 68 | ```
 69 | 
 70 | **For host computer control** (development setup):
 71 | 
 72 | 1. **Install Computer Server Dependencies**:
 73 | 
 74 |    ```bash
 75 |    python3 -m pip install uvicorn fastapi
 76 |    python3 -m pip install -e libs/python/computer-server --break-system-packages
 77 |    ```
 78 | 
 79 | 2. **Start the Computer Server**:
 80 | 
 81 |    ```bash
 82 |    cd /path/to/cua
 83 |    python -m computer_server --log-level debug
 84 |    ```
 85 | 
 86 |    This will start the computer server on `http://localhost:8000` that controls your actual desktop.
 87 | 
 88 | 3. **Configure Claude Desktop**:
 89 |    ```json
 90 |    {
 91 |      "mcpServers": {
 92 |        "cua-agent": {
 93 |          "command": "/usr/bin/env",
 94 |          "args": [
 95 |            "bash",
 96 |            "-lc",
 97 |            "export CUA_MODEL_NAME='anthropic/claude-sonnet-4-20250514'; export ANTHROPIC_API_KEY='your-anthropic-api-key-here'; export CUA_USE_HOST_COMPUTER_SERVER='true'; export CUA_MAX_IMAGES='1'; /path/to/cua/libs/python/mcp-server/scripts/start_mcp_server.sh"
 98 |          ]
 99 |        }
100 |      }
101 |    }
102 |    ```
103 | 
104 | **Note**: Replace `/path/to/cua` with the absolute path to your CUA repository directory.
105 | 
106 | **⚠️ Important**: When using host computer control (`CUA_USE_HOST_COMPUTER_SERVER='true'`), the AI will have direct access to your desktop and can perform actions like opening applications, clicking, typing, and taking screenshots. Make sure you're comfortable with this level of access.
107 | 
108 | ### Troubleshooting
109 | 
110 | **Common Issues:**
111 | 
112 | 1. **"Claude's response was interrupted"** - This usually means:
113 |    - Missing API key: Add `ANTHROPIC_API_KEY` to your environment variables
114 |    - Invalid model name: Use a valid model like `anthropic/claude-sonnet-4-20250514`
115 |    - Check logs for specific error messages
116 | 
117 | 2. **"Missing Anthropic API Key"** - Add your API key to the configuration:
118 | 
119 |    ```json
120 |    "env": {
121 |      "ANTHROPIC_API_KEY": "your-api-key-here"
122 |    }
123 |    ```
124 | 
125 | 3. **"model not found"** - Use a valid model name:
126 |    - ✅ `anthropic/claude-sonnet-4-20250514`
127 | 
128 | 4. **Script not found** - If you get a `/bin/bash: ~/cua/libs/python/mcp-server/scripts/start_mcp_server.sh: No such file or directory` error, try changing the path to the script to be absolute instead of relative.
129 | 
130 | 5. **Host Computer Control Issues** - If using `CUA_USE_HOST_COMPUTER_SERVER='true'`:
131 |    - **Computer Server not running**: Make sure you've started the computer server with `python -m computer_server --log-level debug`
132 |    - **Port 8000 in use**: Check if another process is using port 8000 with `lsof -i :8000`
133 |    - **Missing dependencies**: Install `uvicorn` and `fastapi` with `python3 -m pip install uvicorn fastapi`
134 |    - **Image size errors**: Use `CUA_MAX_IMAGES='1'` to reduce image context size
135 | 
136 | **Viewing Logs:**
137 | 
138 | ```bash
139 | tail -n 20 -f ~/Library/Logs/Claude/mcp*.log
140 | ```
141 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/macos-vm-cli-playbook/lumier/docker.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Docker
  3 | ---
  4 | 
  5 | You can use Lumier through Docker:
  6 | 
  7 | ### Run a macOS VM (ephemeral)
  8 | 
  9 | ```bash
 10 | # Run the container with temporary storage (using pre-built image from Docker Hub)
 11 | docker run -it --rm \
 12 |     --name macos-vm \
 13 |     -p 8006:8006 \
 14 |     -e VM_NAME=macos-vm \
 15 |     -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \
 16 |     -e CPU_CORES=4 \
 17 |     -e RAM_SIZE=8192 \
 18 |     trycua/lumier:latest
 19 | ```
 20 | 
 21 | Access the VM in your browser at **http://localhost:8006**.
 22 | 
 23 | After running the command above, you can access your macOS VM through a web browser (e.g., http://localhost:8006).
 24 | 
 25 | <Callout title="Note">
 26 |   With the basic setup above, your VM will be reset when you stop the container (ephemeral mode).
 27 |   This means any changes you make inside the macOS VM will be lost. See the section below for how to
 28 |   save your VM state.
 29 | </Callout>
 30 | 
 31 | ## Saving Your VM State
 32 | 
 33 | To save your VM state between sessions (so your changes persist when you stop and restart the container), you'll need to set up a storage location:
 34 | 
 35 | ```bash
 36 | # First, create a storage directory if it doesn't exist
 37 | mkdir -p storage
 38 | 
 39 | # Then run the container with persistent storage
 40 | docker run -it --rm \
 41 |     --name lumier-vm \
 42 |     -p 8006:8006 \
 43 |     -v $(pwd)/storage:/storage \
 44 |     -e VM_NAME=lumier-vm \
 45 |     -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \
 46 |     -e CPU_CORES=4 \
 47 |     -e RAM_SIZE=8192 \
 48 |     -e HOST_STORAGE_PATH=$(pwd)/storage \
 49 |     trycua/lumier:latest
 50 | ```
 51 | 
 52 | This command creates a connection between a folder on your Mac (`$(pwd)/storage`) and a folder inside the Docker container (`/storage`). The `-v` flag (volume mount) and the `HOST_STORAGE_PATH` variable work together to ensure your VM data is saved on your host Mac.
 53 | 
 54 | ## Sharing Files with Your VM
 55 | 
 56 | To share files between your Mac and the virtual machine, you can set up a shared folder:
 57 | 
 58 | ```bash
 59 | # Create both storage and shared folders
 60 | mkdir -p storage shared
 61 | 
 62 | # Run with both persistent storage and a shared folder
 63 | docker run -it --rm \
 64 |     --name lumier-vm \
 65 |     -p 8006:8006 \
 66 |     -v $(pwd)/storage:/storage \
 67 |     -v $(pwd)/shared:/shared \
 68 |     -e VM_NAME=lumier-vm \
 69 |     -e VERSION=ghcr.io/trycua/macos-sequoia-cua:latest \
 70 |     -e CPU_CORES=4 \
 71 |     -e RAM_SIZE=8192 \
 72 |     -e HOST_STORAGE_PATH=$(pwd)/storage \
 73 |     -e HOST_SHARED_PATH=$(pwd)/shared \
 74 |     trycua/lumier:latest
 75 | ```
 76 | 
 77 | With this setup, any files you place in the `shared` folder on your Mac will be accessible from within the macOS VM, and vice versa.
 78 | 
 79 | ## Automating VM Startup with on-logon.sh
 80 | 
 81 | You can automatically run scripts when the VM starts up by placing an `on-logon.sh` script in the shared folder's lifecycle directory. This is useful for setting up your VM environment each time it starts.
 82 | 
 83 | ```bash
 84 | # Create the lifecycle directory in your shared folder
 85 | mkdir -p shared/lifecycle
 86 | 
 87 | # Create a sample on-logon.sh script
 88 | cat > shared/lifecycle/on-logon.sh << 'EOF'
 89 | #!/usr/bin/env bash
 90 | 
 91 | # Create a file on the desktop
 92 | echo "Hello from Lumier!" > /Users/lume/Desktop/hello_lume.txt
 93 | 
 94 | # You can add more commands to execute at VM startup
 95 | # For example:
 96 | # - Configure environment variables
 97 | # - Start applications
 98 | # - Mount network drives
 99 | # - Set up development environments
100 | EOF
101 | 
102 | # Make the script executable
103 | chmod +x shared/lifecycle/on-logon.sh
104 | ```
105 | 
106 | The script will be automatically executed when the VM starts up. It runs in the VM context and has access to:
107 | 
108 | - The `/Users/lume` user directory (home directory in the VM)
109 | - The shared folder at `/Volumes/My Shared Files` inside the VM
110 | - Any resources available to the VM
111 | 
112 | This feature enables automation of VM setup without modifying the base VM image.
113 | 
114 | ## Configuration Options
115 | 
116 | When running Lumier, you'll need to configure a few things:
117 | 
118 | - **Port forwarding** (`-p 8006:8006`): Makes the VM's VNC interface accessible in your browser. If port 8006 is already in use, you can use a different port like `-p 8007:8006`.
119 | 
120 | - **Environment variables** (`-e`): Configure your VM settings:
121 |   - `VM_NAME`: A name for your virtual machine
122 |   - `VERSION`: The macOS image to use
123 |   - `CPU_CORES`: Number of CPU cores to allocate
124 |   - `RAM_SIZE`: Memory in MB to allocate
125 |   - `HOST_STORAGE_PATH`: Path to save VM state (when using persistent storage)
126 |   - `HOST_SHARED_PATH`: Path to the shared folder (optional)
127 | 
128 | - **Background service**: The `lume serve` service should be running on your host (starts automatically when you install Lume using the `install.sh` script above).
129 | 
```

--------------------------------------------------------------------------------
/libs/typescript/agent/src/types.ts:
--------------------------------------------------------------------------------

```typescript
  1 | // #region Request
  2 | export type ConnectionType = 'http' | 'https' | 'peer';
  3 | export interface AgentClientOptions {
  4 |   timeout?: number;
  5 |   retries?: number;
  6 |   /** Optional CUA API key to send as X-API-Key header for HTTP requests */
  7 |   apiKey?: string;
  8 | }
  9 | // Request types matching the Python proxy API
 10 | export interface AgentRequest {
 11 |   model: string;
 12 |   input: string | AgentMessage[];
 13 |   agent_kwargs?: {
 14 |     save_trajectory?: boolean;
 15 |     verbosity?: number;
 16 |     [key: string]: any;
 17 |   };
 18 |   computer_kwargs?: {
 19 |     os_type?: string;
 20 |     provider_type?: string;
 21 |     [key: string]: any;
 22 |   };
 23 |   /**
 24 |    * Optional per-request environment variable overrides.
 25 |    * Keys and values are strings and will be forwarded to the backend proxy.
 26 |    */
 27 |   env?: Record<string, string>;
 28 | }
 29 | // #endregion
 30 | 
 31 | // #region Response
 32 | // Response types
 33 | export interface AgentResponse {
 34 |   output: AgentMessage[];
 35 |   usage: Usage;
 36 |   status: 'completed' | 'failed';
 37 |   error?: string;
 38 | }
 39 | // Usage information
 40 | export interface Usage {
 41 |   prompt_tokens: number;
 42 |   completion_tokens: number;
 43 |   total_tokens: number;
 44 |   response_cost: number;
 45 | }
 46 | // #endregion
 47 | 
 48 | // #region Messages
 49 | // Agent message types - can be one of several different message types
 50 | export type AgentMessage =
 51 |   | UserMessage
 52 |   | AssistantMessage
 53 |   | ReasoningMessage
 54 |   | ComputerCallMessage
 55 |   | ComputerCallOutputMessage
 56 |   | FunctionCallMessage
 57 |   | FunctionCallOutputMessage;
 58 | // Input message
 59 | export interface UserMessage {
 60 |   type?: 'message';
 61 |   role: 'user' | 'system' | 'developer';
 62 |   content: string | InputContent[];
 63 | }
 64 | // Output message
 65 | export interface AssistantMessage {
 66 |   type: 'message';
 67 |   role: 'assistant';
 68 |   content: OutputContent[];
 69 | }
 70 | // Output reasoning/thinking message
 71 | export interface ReasoningMessage {
 72 |   type: 'reasoning';
 73 |   summary: SummaryContent[];
 74 | }
 75 | // Output computer action call
 76 | export interface ComputerCallMessage {
 77 |   type: 'computer_call';
 78 |   call_id: string;
 79 |   status: 'completed' | 'failed' | 'pending';
 80 |   action: ComputerAction;
 81 | }
 82 | // Output computer action result (always a screenshot)
 83 | export interface ComputerCallOutputMessage {
 84 |   type: 'computer_call_output';
 85 |   call_id: string;
 86 |   output: ComputerResultContent;
 87 | }
 88 | // Output function call
 89 | export interface FunctionCallMessage {
 90 |   type: 'function_call';
 91 |   call_id: string;
 92 |   status: 'completed' | 'failed' | 'pending';
 93 |   name: string;
 94 |   arguments: string; // JSON dict of kwargs
 95 | }
 96 | // Output function call result (always text)
 97 | export interface FunctionCallOutputMessage {
 98 |   type: 'function_call_output';
 99 |   call_id: string;
100 |   output: string;
101 | }
102 | // #endregion
103 | 
104 | // #region Message Content
105 | export interface InputContent {
106 |   type: 'input_image' | 'input_text';
107 |   text?: string;
108 |   image_url?: string;
109 | }
110 | export interface OutputContent {
111 |   type: 'output_text';
112 |   text: string;
113 | }
114 | export interface SummaryContent {
115 |   type: 'summary_text';
116 |   text: string;
117 | }
118 | export interface ComputerResultContent {
119 |   type: 'computer_screenshot' | 'input_image';
120 |   image_url: string;
121 | }
122 | // #endregion
123 | 
124 | // #region Actions
125 | export type ComputerAction = ComputerActionOpenAI | ComputerActionAnthropic;
126 | // OpenAI Computer Actions
127 | export type ComputerActionOpenAI =
128 |   | ClickAction
129 |   | DoubleClickAction
130 |   | DragAction
131 |   | KeyPressAction
132 |   | MoveAction
133 |   | ScreenshotAction
134 |   | ScrollAction
135 |   | TypeAction
136 |   | WaitAction;
137 | export interface ClickAction {
138 |   type: 'click';
139 |   button: 'left' | 'right' | 'wheel' | 'back' | 'forward';
140 |   x: number;
141 |   y: number;
142 | }
143 | export interface DoubleClickAction {
144 |   type: 'double_click';
145 |   button?: 'left' | 'right' | 'wheel' | 'back' | 'forward';
146 |   x: number;
147 |   y: number;
148 | }
149 | export interface DragAction {
150 |   type: 'drag';
151 |   button?: 'left' | 'right' | 'wheel' | 'back' | 'forward';
152 |   path: Array<[number, number]>;
153 | }
154 | export interface KeyPressAction {
155 |   type: 'keypress';
156 |   keys: string[];
157 | }
158 | export interface MoveAction {
159 |   type: 'move';
160 |   x: number;
161 |   y: number;
162 | }
163 | export interface ScreenshotAction {
164 |   type: 'screenshot';
165 | }
166 | export interface ScrollAction {
167 |   type: 'scroll';
168 |   scroll_x: number;
169 |   scroll_y: number;
170 |   x: number;
171 |   y: number;
172 | }
173 | export interface TypeAction {
174 |   type: 'type';
175 |   text: string;
176 | }
177 | export interface WaitAction {
178 |   type: 'wait';
179 | }
180 | // Anthropic Computer Actions
181 | export type ComputerActionAnthropic = LeftMouseDownAction | LeftMouseUpAction;
182 | export interface LeftMouseDownAction {
183 |   type: 'left_mouse_down';
184 |   x: number;
185 |   y: number;
186 | }
187 | export interface LeftMouseUpAction {
188 |   type: 'left_mouse_up';
189 |   x: number;
190 |   y: number;
191 | }
192 | // #endregion
193 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/example.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Example usage of the agent library with docstring-based tool definitions.
  3 | """
  4 | 
  5 | import asyncio
  6 | import logging
  7 | 
  8 | from agent import ComputerAgent
  9 | from computer import Computer
 10 | from computer.helpers import sandboxed
 11 | 
 12 | 
 13 | @sandboxed()
 14 | def read_file(location: str) -> str:
 15 |     """Read contents of a file
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     location : str
 20 |         Path to the file to read
 21 | 
 22 |     Returns
 23 |     -------
 24 |     str
 25 |         Contents of the file or error message
 26 |     """
 27 |     try:
 28 |         with open(location, "r") as f:
 29 |             return f.read()
 30 |     except Exception as e:
 31 |         return f"Error reading file: {str(e)}"
 32 | 
 33 | 
 34 | def save_note(content: str, filename: str = "note.txt") -> str:
 35 |     """Save content to a note file
 36 | 
 37 |     Parameters
 38 |     ----------
 39 |     content : str
 40 |         Content to save to the file
 41 |     filename : str, optional
 42 |         Name of the file to save to (default is "note.txt")
 43 | 
 44 |     Returns
 45 |     -------
 46 |     str
 47 |         Success or error message
 48 |     """
 49 |     try:
 50 |         with open(filename, "w") as f:
 51 |             f.write(content)
 52 |         return f"Saved note to {filename}"
 53 |     except Exception as e:
 54 |         return f"Error saving note: {str(e)}"
 55 | 
 56 | 
 57 | def calculate(a: int, b: int) -> int:
 58 |     """Calculate the sum of two integers
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     a : int
 63 |         First integer
 64 |     b : int
 65 |         Second integer
 66 | 
 67 |     Returns
 68 |     -------
 69 |     int
 70 |         Sum of the two integers
 71 |     """
 72 |     return a + b
 73 | 
 74 | 
 75 | async def main():
 76 |     """Example usage of ComputerAgent with different models"""
 77 | 
 78 |     # Example 1: Using Claude with computer and custom tools
 79 |     print("=== Example 1: Claude with Computer ===")
 80 | 
 81 |     import json
 82 |     import os
 83 | 
 84 |     import dotenv
 85 | 
 86 |     dotenv.load_dotenv()
 87 | 
 88 |     assert os.getenv("CUA_CONTAINER_NAME") is not None, "CUA_CONTAINER_NAME is not set"
 89 |     assert os.getenv("CUA_API_KEY") is not None, "CUA_API_KEY is not set"
 90 | 
 91 |     async with Computer(
 92 |         os_type="linux",
 93 |         provider_type="cloud",
 94 |         name=os.getenv("CUA_CONTAINER_NAME") or "",
 95 |         api_key=os.getenv("CUA_API_KEY") or "",
 96 |     ) as computer:
 97 |         agent = ComputerAgent(
 98 |             # Supported models:
 99 |             # == OpenAI CUA (computer-use-preview) ==
100 |             model="openai/computer-use-preview",
101 |             # == Anthropic CUA (Claude > 3.5) ==
102 |             # model="anthropic/claude-opus-4-20250514",
103 |             # model="anthropic/claude-sonnet-4-20250514",
104 |             # model="anthropic/claude-3-7-sonnet-20250219",
105 |             # model="anthropic/claude-sonnet-4-5-20250929",
106 |             # == UI-TARS ==
107 |             # model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B",
108 |             # TODO: add local mlx provider
109 |             # model="mlx-community/UI-TARS-1.5-7B-6bit",
110 |             # model="ollama_chat/0000/ui-tars-1.5-7b",
111 |             # == Omniparser + Any LLM ==
112 |             # model="omniparser+..."
113 |             # model="omniparser+anthropic/claude-opus-4-20250514",
114 |             tools=[computer],
115 |             only_n_most_recent_images=3,
116 |             verbosity=logging.INFO,
117 |             trajectory_dir="trajectories",
118 |             use_prompt_caching=True,
119 |             max_trajectory_budget={
120 |                 "max_budget": 1.0,
121 |                 "raise_error": True,
122 |                 "reset_after_each_run": False,
123 |             },
124 |         )
125 | 
126 |         history = []
127 |         while True:
128 |             user_input = input("> ")
129 |             history.append({"role": "user", "content": user_input})
130 | 
131 |             # Non-streaming usage
132 |             async for result in agent.run(history, stream=False):
133 |                 history += result["output"]
134 | 
135 |                 # # Print output
136 |                 # for item in result["output"]:
137 |                 #     if item["type"] == "message":
138 |                 #         print(item["content"][0]["text"])
139 |                 #     elif item["type"] == "computer_call":
140 |                 #         action = item["action"]
141 |                 #         action_type = action["type"]
142 |                 #         action_args = {k: v for k, v in action.items() if k != "type"}
143 |                 #         print(f"{action_type}({action_args})")
144 |                 #     elif item["type"] == "function_call":
145 |                 #         action = item["name"]
146 |                 #         action_args = item["arguments"]
147 |                 #         print(f"{action}({action_args})")
148 |                 #     elif item["type"] == "function_call_output":
149 |                 #         print("===>", item["output"])
150 | 
151 | 
152 | if __name__ == "__main__":
153 |     asyncio.run(main())
154 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/benchmarks/contrib.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Contributing Reference Agent Implementations
  2 | 
  3 | This guide explains how to add your own reference agent implementations to the benchmark system.
  4 | 
  5 | ## Adding Reference Agent Implementations
  6 | 
  7 | ### 1. Implement the ModelProtocol
  8 | 
  9 | Create a new file in `models/` directory implementing the `ModelProtocol`:
 10 | 
 11 | ```python
 12 | from models.base import ModelProtocol
 13 | from typing import Optional, Tuple
 14 | from PIL import Image
 15 | 
 16 | class YourModelName(ModelProtocol):
 17 |     def __init__(self, model_path: str):
 18 |         self.model_path = model_path
 19 |         self._model = None
 20 | 
 21 |     @property
 22 |     def model_name(self) -> str:
 23 |         return self.model_path
 24 | 
 25 |     async def load_model(self) -> None:
 26 |         """Load the model into memory."""
 27 |         # Your model loading logic here
 28 |         pass
 29 | 
 30 |     async def unload_model(self) -> None:
 31 |         """Unload the model from memory."""
 32 |         # Your model cleanup logic here
 33 |         pass
 34 | 
 35 |     async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]:
 36 |         """
 37 |         Predict click coordinates for the given image and instruction.
 38 | 
 39 |         Args:
 40 |             image: PIL Image to analyze
 41 |             instruction: Text instruction describing what to click
 42 | 
 43 |         Returns:
 44 |             Tuple of (x, y) coordinates or None if prediction fails
 45 |         """
 46 |         # Your prediction logic here
 47 |         return (x, y)  # Return predicted coordinates
 48 | ```
 49 | 
 50 | ### 2. Register Your Model
 51 | 
 52 | Add your model to the `get_available_models()` function in `utils.py`:
 53 | 
 54 | ```python
 55 | def get_available_models() -> List[Union[str, ModelProtocol]]:
 56 |     models = [
 57 |         # Computer Agent SDK providers
 58 |         "huggingface-local/HelloKKMe/GTA1-7B",
 59 | 
 60 |         # Reference implementations
 61 |         GTA1Model("HelloKKMe/GTA1-7B"),
 62 |         YourModelName("path/to/your/model"),  # Add your model here
 63 |     ]
 64 |     return models
 65 | ```
 66 | 
 67 | ### 3. Test Your Implementation
 68 | 
 69 | Before submitting, test your model with the interactive tool:
 70 | 
 71 | ```bash
 72 | python interactive.py
 73 | ```
 74 | 
 75 | This will help you verify that your model loads correctly and produces reasonable predictions.
 76 | 
 77 | ## Example: Adding a New Model
 78 | 
 79 | Here's a complete example of adding a hypothetical "MyVisionModel":
 80 | 
 81 | 1. **Create `models/my_vision_model.py`:**
 82 | 
 83 | ```python
 84 | import torch
 85 | from transformers import AutoModel, AutoProcessor
 86 | from models.base import ModelProtocol
 87 | from typing import Optional, Tuple
 88 | from PIL import Image
 89 | 
 90 | class MyVisionModel(ModelProtocol):
 91 |     def __init__(self, model_path: str):
 92 |         self.model_path = model_path
 93 |         self.model = None
 94 |         self.processor = None
 95 | 
 96 |     @property
 97 |     def model_name(self) -> str:
 98 |         return f"MyVisionModel({self.model_path})"
 99 | 
100 |     async def load_model(self) -> None:
101 |         """Load the model and processor."""
102 |         self.processor = AutoProcessor.from_pretrained(self.model_path)
103 |         self.model = AutoModel.from_pretrained(
104 |             self.model_path,
105 |             torch_dtype=torch.float16,
106 |             device_map="auto"
107 |         )
108 | 
109 |     async def unload_model(self) -> None:
110 |         """Clean up model resources."""
111 |         del self.model
112 |         del self.processor
113 |         self.model = None
114 |         self.processor = None
115 |         torch.cuda.empty_cache()
116 | 
117 |     async def predict_click(self, image: Image.Image, instruction: str) -> Optional[Tuple[int, int]]:
118 |         """Predict click coordinates."""
119 |         try:
120 |             # Preprocess inputs
121 |             inputs = self.processor(
122 |                 text=instruction,
123 |                 images=image,
124 |                 return_tensors="pt"
125 |             )
126 | 
127 |             # Run inference
128 |             with torch.no_grad():
129 |                 outputs = self.model(**inputs)
130 | 
131 |             # Extract coordinates (model-specific logic)
132 |             x, y = self._extract_coordinates(outputs)
133 |             return (int(x), int(y))
134 | 
135 |         except Exception as e:
136 |             print(f"Prediction failed: {e}")
137 |             return None
138 | 
139 |     def _extract_coordinates(self, outputs):
140 |         """Extract x, y coordinates from model outputs."""
141 |         # Your model-specific coordinate extraction logic
142 |         pass
143 | ```
144 | 
145 | 2. **Update `models/__init__.py`:**
146 | 
147 | ```python
148 | from .gta1 import GTA1Model
149 | from .my_vision_model import MyVisionModel
150 | 
151 | __all__ = ["GTA1Model", "MyVisionModel"]
152 | ```
153 | 
154 | 3. **Update `utils.py`:**
155 | 
156 | ```python
157 | from models import GTA1Model, MyVisionModel
158 | 
159 | def get_available_models() -> List[Union[str, ModelProtocol]]:
160 |     models = [
161 |         "huggingface-local/HelloKKMe/GTA1-7B",
162 |         GTA1Model("HelloKKMe/GTA1-7B"),
163 |         MyVisionModel("my-org/my-vision-model"),  # Add here
164 |     ]
165 |     return models
166 | ```
167 | 
```

--------------------------------------------------------------------------------
/docs/src/components/doc-actions-menu.tsx:
--------------------------------------------------------------------------------

```typescript
  1 | 'use client';
  2 | 
  3 | import { useState } from 'react';
  4 | import { SiOpenai, SiAnthropic, SiMarkdown, SiGithub } from 'react-icons/si';
  5 | import posthog from 'posthog-js';
  6 | 
  7 | interface DocActionsMenuProps {
  8 |   pageUrl: string;
  9 |   pageTitle: string;
 10 |   filePath?: string;
 11 | }
 12 | 
 13 | export function DocActionsMenu({ pageUrl, pageTitle, filePath }: DocActionsMenuProps) {
 14 |   const [copied, setCopied] = useState(false);
 15 | 
 16 |   const handleCopyMarkdown = async () => {
 17 |     try {
 18 |       if (!filePath) {
 19 |         throw new Error('No file path available');
 20 |       }
 21 |       const githubRawUrl = `https://raw.githubusercontent.com/trycua/cua/refs/heads/main/docs/content/docs/${filePath}`;
 22 | 
 23 |       const response = await fetch(githubRawUrl);
 24 |       if (!response.ok) {
 25 |         throw new Error('Failed to fetch markdown');
 26 |       }
 27 |       const markdown = await response.text();
 28 | 
 29 |       await navigator.clipboard.writeText(markdown);
 30 | 
 31 |       setCopied(true);
 32 |       setTimeout(() => setCopied(false), 2000);
 33 | 
 34 |       posthog.capture('docs_copy_markdown_clicked', {
 35 |         page: pageUrl,
 36 |         page_title: pageTitle,
 37 |         success: true,
 38 |       });
 39 |     } catch (error) {
 40 |       console.error('Error copying markdown:', error);
 41 | 
 42 |       try {
 43 |         const urlWithUtm = `https://cua.ai${pageUrl}?utm_source=cua.ai/docs`;
 44 |         await navigator.clipboard.writeText(urlWithUtm);
 45 |         setCopied(true);
 46 |         setTimeout(() => setCopied(false), 2000);
 47 |       } catch (fallbackError) {
 48 |         console.error('Error copying URL:', fallbackError);
 49 |       }
 50 | 
 51 |       posthog.capture('docs_copy_markdown_clicked', {
 52 |         page: pageUrl,
 53 |         page_title: pageTitle,
 54 |         success: false,
 55 |         error: error instanceof Error ? error.message : 'Unknown error',
 56 |       });
 57 |     }
 58 |   };
 59 | 
 60 |   const handleEditGithub = () => {
 61 |     if (!filePath) {
 62 |       return;
 63 |     }
 64 |     posthog.capture('docs_edit_github_clicked', {
 65 |       page: pageUrl,
 66 |       page_title: pageTitle,
 67 |     });
 68 | 
 69 |     const githubEditUrl = `https://github.com/trycua/cua/edit/main/docs/content/docs/${filePath}`;
 70 |     window.open(githubEditUrl, '_blank', 'noopener,noreferrer');
 71 |   };
 72 | 
 73 |   const handleOpenChatGPT = () => {
 74 |     posthog.capture('docs_open_chatgpt_clicked', {
 75 |       page: pageUrl,
 76 |       page_title: pageTitle,
 77 |     });
 78 | 
 79 |     const docUrl = `https://cua.ai${pageUrl}?utm_source=cua.ai/docs`;
 80 |     const prompt = `I need help understanding this cua.ai documentation page: "${pageTitle}". Please read and help me with: ${docUrl}`;
 81 |     const chatgptUrl = `https://chatgpt.com/?q=${encodeURIComponent(prompt)}`;
 82 |     window.open(chatgptUrl, '_blank', 'noopener,noreferrer');
 83 |   };
 84 | 
 85 |   const handleOpenClaude = () => {
 86 |     posthog.capture('docs_open_claude_clicked', {
 87 |       page: pageUrl,
 88 |       page_title: pageTitle,
 89 |     });
 90 | 
 91 |     const docUrl = `https://cua.ai${pageUrl}?utm_source=cua.ai/docs`;
 92 |     const prompt = `I need help understanding this cua.ai documentation page: "${pageTitle}". Please read and help me with: ${docUrl}`;
 93 |     const claudeUrl = `https://claude.ai/new?q=${encodeURIComponent(prompt)}`;
 94 |     window.open(claudeUrl, '_blank', 'noopener,noreferrer');
 95 |   };
 96 | 
 97 |   return (
 98 |     <div className="flex flex-col gap-2">
 99 |       <button
100 |         onClick={handleCopyMarkdown}
101 |         className="inline-flex gap-3 w-full items-center rounded-md p-1 text-sm hover:bg-fd-accent hover:text-fd-accent-foreground text-left transition-colors px-2 hover:cursor-pointer"
102 |       >
103 |         <SiMarkdown className="w-2 h-4 flex-shrink-0" />
104 |         <span>{copied ? 'Copied!' : 'Copy as markdown'}</span>
105 |       </button>
106 | 
107 |       <button
108 |         onClick={handleEditGithub}
109 |         className="inline-flex gap-3 w-full items-center rounded-md p-1 text-sm hover:bg-fd-accent hover:text-fd-accent-foreground text-left transition-colors px-2 hover:cursor-pointer"
110 |       >
111 |         <SiGithub className="w-4 h-4 flex-shrink-0" />
112 |         <span>Edit on GitHub</span>
113 |       </button>
114 | 
115 |       <button
116 |         onClick={handleOpenChatGPT}
117 |         className="inline-flex gap-3 w-full items-center rounded-md p-1 text-sm hover:bg-fd-accent hover:text-fd-accent-foreground text-left transition-colors px-2 hover:cursor-pointer"
118 |       >
119 |         <SiOpenai className="w-4 h-4 flex-shrink-0" />
120 |         <span>Open in ChatGPT</span>
121 |       </button>
122 | 
123 |       <button
124 |         onClick={handleOpenClaude}
125 |         className="inline-flex gap-3 w-full items-center rounded-md p-1 text-sm hover:bg-fd-accent hover:text-fd-accent-foreground text-left transition-colors px-2 hover:cursor-pointer"
126 |       >
127 |         <SiAnthropic className="w-4 h-4 flex-shrink-0" />
128 |         <span>Open in Claude</span>
129 |       </button>
130 |     </div>
131 |   );
132 | }
133 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/supported-agents/composed-agents.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Composed Agents
  3 | description: Combine grounding models with any LLM for computer-use capabilities
  4 | ---
  5 | 
  6 | Composed agents combine the best of both worlds: specialized grounding models for precise click prediction and powerful LLMs for task planning and reasoning.
  7 | 
  8 | Use the format `"grounding_model+planning_model"` to create a composed agent with any vision-enabled LiteLLM-compatible model.
  9 | 
 10 | ## How Composed Agents Work
 11 | 
 12 | 1. **Planning Phase**: The planning model (LLM) analyzes the task and decides what actions to take (e.g., `click("find the login button")`, `type("username")`)
 13 | 2. **Grounding Phase**: The grounding model converts element descriptions to precise coordinates
 14 | 3. **Execution**: Actions are performed using the predicted coordinates
 15 | 
 16 | ## Supported Grounding Models
 17 | 
 18 | Any model that supports `predict_click()` can be used as the grounding component. See the full list on [Grounding Models](./grounding-models).
 19 | 
 20 | - OpenCUA: `huggingface-local/xlangai/OpenCUA-{7B,32B}`
 21 | - GTA1 family: `huggingface-local/HelloKKMe/GTA1-{7B,32B,72B}`
 22 | - Holo 1.5 family: `huggingface-local/Hcompany/Holo1.5-{3B,7B,72B}`
 23 | - InternVL 3.5 family: `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}`
 24 | - UI‑TARS 1.5: `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B` (also supports full CU)
 25 | - OmniParser (OCR): `omniparser` (requires combination with a LiteLLM vision model)
 26 | - Moondream3: `moondream3` (requires combination with a LiteLLM vision/text model)
 27 | 
 28 | ## Supported Planning Models
 29 | 
 30 | Any vision-enabled LiteLLM-compatible model can be used as the planning component:
 31 | 
 32 | - Any All‑in‑one CUA (planning-capable). See [All‑in‑one CUAs](./computer-use-agents).
 33 | - Any VLM via LiteLLM providers: `anthropic/*`, `openai/*`, `openrouter/*`, `gemini/*`, `vertex_ai/*`, `huggingface-local/*`, `mlx/*`, etc.
 34 | - Examples:
 35 |   - **Anthropic**: `anthropic/claude-sonnet-4-5-20250929`, `anthropic/claude-opus-4-1-20250805`
 36 |   - **OpenAI**: `openai/gpt-5`, `openai/gpt-o3`, `openai/gpt-4o`
 37 |   - **Google**: `gemini/gemini-1.5-pro`, `vertex_ai/gemini-pro-vision`
 38 |   - **Local models**: Any Hugging Face vision-language model
 39 | 
 40 | ## Usage Examples
 41 | 
 42 | ### GTA1 + GPT-5
 43 | 
 44 | Use OpenAI's GPT-5 for planning with specialized grounding:
 45 | 
 46 | ```python
 47 | agent = ComputerAgent(
 48 |     "huggingface-local/HelloKKMe/GTA1-7B+openai/gpt-5",
 49 |     tools=[computer]
 50 | )
 51 | 
 52 | async for _ in agent.run("Take a screenshot, analyze the UI, and click on the most prominent button"):
 53 |     pass
 54 | ```
 55 | 
 56 | ### GTA1 + Claude 3.5 Sonnet
 57 | 
 58 | Combine state-of-the-art grounding with powerful reasoning:
 59 | 
 60 | ```python
 61 | agent = ComputerAgent(
 62 |     "huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-sonnet-4-5-20250929",
 63 |     tools=[computer]
 64 | )
 65 | 
 66 | async for _ in agent.run("Open Firefox, navigate to github.com, and search for 'computer-use'"):
 67 |     pass
 68 | # Success! 🎉
 69 | # - Claude 3.5 Sonnet plans the sequence of actions
 70 | # - GTA1-7B provides precise click coordinates for each UI element
 71 | ```
 72 | 
 73 | ### UI-TARS + GPT-4o
 74 | 
 75 | Combine two different vision models for enhanced capabilities:
 76 | 
 77 | ```python
 78 | agent = ComputerAgent(
 79 |     "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B+openai/gpt-4o",
 80 |     tools=[computer]
 81 | )
 82 | 
 83 | async for _ in agent.run("Help me fill out this form with my personal information"):
 84 |     pass
 85 | ```
 86 | 
 87 | ### Moondream3 + GPT-4o
 88 | 
 89 | Use the built-in Moondream3 grounding with any planning model. Moondream3 will detect UI elements on the latest screenshot, label them, and provide a user message listing detected element names.
 90 | 
 91 | ```python
 92 | from agent import ComputerAgent
 93 | from computer import computer
 94 | 
 95 | agent = ComputerAgent(
 96 |     "moondream3+openai/gpt-4o",
 97 |     tools=[computer]
 98 | )
 99 | 
100 | async for _ in agent.run("Close the settings window, then open the Downloads folder"):
101 |     pass
102 | ```
103 | 
104 | ## Benefits of Composed Agents
105 | 
106 | - **Specialized Grounding**: Use models optimized for click prediction accuracy
107 | - **Flexible Planning**: Choose any LLM for task reasoning and planning
108 | - **Cost Optimization**: Use smaller grounding models with larger planning models only when needed
109 | - **Performance**: Leverage the strengths of different model architectures
110 | 
111 | ## Capabilities
112 | 
113 | Composed agents support both capabilities:
114 | 
115 | ```python
116 | agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B+anthropic/claude-sonnet-4-5-20250929")
117 | 
118 | # Full computer-use agent capabilities
119 | async for _ in agent.run("Complete this online form"):
120 |     pass
121 | 
122 | # Direct click prediction (uses grounding model only)
123 | coords = agent.predict_click("find the submit button")
124 | ```
125 | 
126 | ---
127 | 
128 | For more information on individual model capabilities, see [Computer-Use Agents](./computer-use-agents) and [Grounding Models](./grounding-models).
129 | 
```

--------------------------------------------------------------------------------
/blog/composite-agents.md:
--------------------------------------------------------------------------------

```markdown
 1 | # Announcing Cua Agent framework 0.4 and Composite Agents
 2 | 
 3 | _Published on August 26, 2025 by Dillon DuPont_
 4 | 
 5 | <img src="./assets/composite-agents.png" alt="Composite Agents">
 6 | 
 7 | So you want to build an agent that can use a computer. Great! You've probably discovered that there are now dozens of different AI models that claim they can click GUI buttons and fill out forms. Less great: actually getting them to work together is like trying to coordinate a group project where everyone speaks a different language and has invented seventeen different ways to say "click here".
 8 | 
 9 | Here's the thing about new GUI models: they're all special snowflakes. One model wants you to feed it images and expects coordinates back as percentages from 0 to 1. Another wants absolute pixel coordinates. A third model has invented its own numeral system with `<|loc095|><|loc821|>` tokens inside tool calls. Some models output Python code that calls `pyautogui.click(x, y)`. Others will start hallucinating coordinates if you forget to format all previous messages within a very specific GUI system prompt.
10 | 
11 | This is the kind of problem that makes you wonder if we're building the future of computing or just recreating the Tower of Babel with more GPUs.
12 | 
13 | ## What we fixed
14 | 
15 | Agent framework 0.4 solves this by doing something radical: making all these different models speak the same language.
16 | 
17 | Instead of writing separate code for each model's peculiarities, you now just pick a model with a string like `"anthropic/claude-sonnet-4-5-20250929"` or `"huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"`, and everything else Just Works™. Behind the scenes, we handle all the coordinate normalization, token parsing, and image preprocessing so you don't have to.
18 | 
19 | ```python
20 | # This works the same whether you're using Anthropic, OpenAI, or that new model you found on Hugging Face
21 | agent = ComputerAgent(
22 |     model="anthropic/claude-sonnet-4-5-20250929",  # or any other supported model
23 |     tools=[computer]
24 | )
25 | ```
26 | 
27 | The output format is consistent across all providers (OpenAI, Anthropic, Vertex, Hugging Face, OpenRouter, etc.). No more writing different parsers for each model's creative interpretation of how to represent a mouse click.
28 | 
29 | ## Composite Agents: Two Brains Are Better Than One
30 | 
31 | Here's where it gets interesting. We realized that you don't actually need one model to be good at everything. Some models are excellent at understanding what's on the screen—they can reliably identify buttons and text fields and figure out where to click. Other models are great at planning and reasoning but might be a bit fuzzy on the exact pixel coordinates.
32 | 
33 | So we let you combine them with a `+` sign:
34 | 
35 | ```python
36 | agent = ComputerAgent(
37 |     # specify the grounding model first, then the planning model
38 |     model="huggingface-local/HelloKKMe/GTA1-7B+huggingface-local/OpenGVLab/InternVL3_5-8B",
39 |     tools=[computer]
40 | )
41 | ```
42 | 
43 | This creates a composite agent where one model (the "grounding" model) handles the visual understanding and precise UI interactions, while the other (the "planning" model) handles the high-level reasoning and task orchestration. It's like having a pilot and a navigator, except they're both AI models and they're trying to help you star a GitHub repository.
44 | 
45 | You can even take a model that was never designed for computer use—like GPT-4o—and give it GUI capabilities by pairing it with a specialized vision model:
46 | 
47 | ```python
48 | agent = ComputerAgent(
49 |     model="huggingface-local/HelloKKMe/GTA1-7B+openai/gpt-4o",
50 |     tools=[computer]
51 | )
52 | ```
53 | 
54 | ## Example notebook
55 | 
56 | For a full, ready-to-run demo (install deps, local computer using Docker, and a composed agent example), see the notebook:
57 | 
58 | - https://github.com/trycua/cua/blob/models/opencua/notebooks/composite_agents_docker_nb.ipynb
59 | 
60 | ## What's next
61 | 
62 | We're building integration with HUD evals, allowing us to curate and benchmark model combinations. This will help us identify which composite agent pairs work best for different types of tasks, and provide you with tested recommendations rather than just throwing model names at the wall to see what sticks.
63 | 
64 | If you try out version 0.4.x, we'd love to hear how it goes. Join us on Discord to share your results and let us know what model combinations work best for your projects.
65 | 
66 | ---
67 | 
68 | ## Links
69 | 
70 | - **Composite Agent Docs:** [https://cua.ai/docs/agent-sdk/supported-agents/composed-agents](https://cua.ai/docs/agent-sdk/supported-agents/composed-agents)
71 | - **Discord:** [https://discord.gg/cua-ai](https://discord.gg/cua-ai)
72 | 
73 | Questions or weird edge cases? Ping us on Discord—we’re curious to see what you build.
74 | 
```

--------------------------------------------------------------------------------
/blog/cloud-windows-ga-macos-preview.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Cloud Windows Sandboxes GA + macOS Preview
  2 | 
  3 | If you've been building with our `cua` libraries, you might've hit a limitation with local computer-use sandboxes: to run agents on Windows or macOS, you need to be on that OS - Windows Sandbox for Windows, Apple Virtualization for macOS. The only cross-platform option is Linux on Docker, which limits you to virtualizing Linux environments ([see all local options here](https://cua.ai/docs/computer-sdk/computers)).
  4 | 
  5 | Today the story changes - we're announcing general availability of **Cloud Windows Sandboxes** and opening early preview access for **Cloud macOS Sandboxes**.
  6 | 
  7 | ## Cloud Windows Sandboxes: Now GA
  8 | 
  9 | ![Cloud Windows Sandboxes](https://github.com/user-attachments/assets/db15f4c4-70a4-425a-a264-82e629074de7)
 10 | 
 11 | Cloud Windows Sandboxes are now generally available. You get a full Windows 11 desktop in your browser with Edge and Python pre-installed, working seamlessly with all our [Computer-Use libraries](https://github.com/trycua/cua) for RPA, UI automation, code execution, and agent development.
 12 | 
 13 | **What's new with this release:**
 14 | 
 15 | - Hot-start under 1 second
 16 | - Direct noVNC over HTTPS under our sandbox.cua.ai domain
 17 | - 3 sandbox sizes available:
 18 | 
 19 | | Size   | CPU     | RAM   | Storage    |
 20 | | ------ | ------- | ----- | ---------- |
 21 | | Small  | 2 cores | 8 GB  | 128 GB SSD |
 22 | | Medium | 4 cores | 16 GB | 128 GB SSD |
 23 | | Large  | 8 cores | 32 GB | 256 GB SSD |
 24 | 
 25 | <div align="center">
 26 |   <video src="https://github.com/user-attachments/assets/8ab07646-6018-4128-87ce-53180cfea696" width="600" controls></video>
 27 | </div>
 28 | 
 29 | **Pricing:** Windows Sandboxes start at 8 credits/hour (Small), 15 credits/hour (Medium), or 31 credits/hour (Large).
 30 | 
 31 | ## Cloud macOS Sandboxes: Now in Preview
 32 | 
 33 | Running macOS locally comes with challenges: 30GB golden images, a maximum of 2 sandboxes per host, and unpredictable compatibility issues. With Cloud macOS Sandboxes, we provision bare-metal macOS hosts (M1, M2, M4) on-demand—giving you full desktop access without the overhead of managing local sandboxes.
 34 | 
 35 | ![macOS Preview Waitlist](https://github.com/user-attachments/assets/343c9a3f-59d8-4b1a-bba8-6af91e8a9cf0)
 36 | 
 37 | **Preview access:** Invite-only. [Join the waitlist](https://cua.ai/macos-waitlist) if you're building agents for macOS workflows.
 38 | 
 39 | ## Getting Started Today
 40 | 
 41 | Sign up at [cua.ai/signin](https://cua.ai/signin) and grab your API key from the dashboard. Then connect to a sandbox:
 42 | 
 43 | ```python
 44 | from computer import Computer
 45 | 
 46 | computer = Computer(
 47 |     os_type="windows",      # or "macos"
 48 |     provider_type="cloud",
 49 |     name="my-sandbox",
 50 |     api_key="your-api-key"
 51 | )
 52 | 
 53 | await computer.run()
 54 | ```
 55 | 
 56 | Manage existing sandboxes:
 57 | 
 58 | ```python
 59 | from computer.providers.cloud.provider import CloudProvider
 60 | 
 61 | provider = CloudProvider(api_key="your-api-key")
 62 | async with provider:
 63 |     sandboxes = await provider.list_vms()
 64 |     await provider.run_vm("my-sandbox")
 65 |     await provider.stop_vm("my-sandbox")
 66 | ```
 67 | 
 68 | Run an agent on Windows to automate a workflow:
 69 | 
 70 | ```python
 71 | from agent import ComputerAgent
 72 | 
 73 | agent = ComputerAgent(
 74 |     model="anthropic/claude-sonnet-4-5-20250929",
 75 |     tools=[computer],
 76 |     max_trajectory_budget=5.0
 77 | )
 78 | 
 79 | response = await agent.run(
 80 |     "Open Excel, create a sales report with this month's data, and save it to the desktop"
 81 | )
 82 | ```
 83 | 
 84 | ## FAQs
 85 | 
 86 | <details>
 87 | <summary><strong>Why not just use local Windows Sandbox?</strong></summary>
 88 | 
 89 | Local Windows Sandbox resets on every restart. No persistence, no hot-start, and you need Windows Pro. Our sandboxes persist state, hot-start in under a second, and work from any OS.
 90 | 
 91 | </details>
 92 | 
 93 | <details>
 94 | <summary><strong>What happens to my work when I stop a sandbox?</strong></summary>
 95 | 
 96 | Everything persists. Files, installed software, browser profiles—it's all there when you restart. Only pay for runtime, not storage.
 97 | 
 98 | </details>
 99 | 
100 | <details>
101 | <summary><strong>How's the latency for UI automation?</strong></summary>
102 | 
103 | We run in 4 regions so you can pick what's closest. The noVNC connection is optimized for automation, not video streaming. Your agent sees crisp screenshots, not compressed video.
104 | 
105 | </details>
106 | 
107 | <details>
108 | <summary><strong>Are there software restrictions?</strong></summary>
109 | 
110 | No. Full admin access on both platforms. Install whatever you need—Visual Studio, Photoshop, custom enterprise software. It's your sandbox.
111 | 
112 | </details>
113 | 
114 | ## Need help?
115 | 
116 | If you hit issues getting either platform working, reach out in [Discord](https://discord.gg/cua-ai). We respond fast and fix based on what people actually use.
117 | 
118 | ---
119 | 
120 | Get started at [cua.ai](https://cua.ai) or [join the macOS waitlist](https://cua.ai/macos-waitlist).
121 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/callbacks/base.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Base callback handler interface for ComputerAgent preprocessing and postprocessing hooks.
  3 | """
  4 | 
  5 | from abc import ABC, abstractmethod
  6 | from typing import Any, Dict, List, Optional, Union
  7 | 
  8 | 
  9 | class AsyncCallbackHandler(ABC):
 10 |     """
 11 |     Base class for async callback handlers that can preprocess messages before
 12 |     the agent loop and postprocess output after the agent loop.
 13 |     """
 14 | 
 15 |     async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
 16 |         """Called at the start of an agent run loop."""
 17 |         pass
 18 | 
 19 |     async def on_run_end(
 20 |         self,
 21 |         kwargs: Dict[str, Any],
 22 |         old_items: List[Dict[str, Any]],
 23 |         new_items: List[Dict[str, Any]],
 24 |     ) -> None:
 25 |         """Called at the end of an agent run loop."""
 26 |         pass
 27 | 
 28 |     async def on_run_continue(
 29 |         self,
 30 |         kwargs: Dict[str, Any],
 31 |         old_items: List[Dict[str, Any]],
 32 |         new_items: List[Dict[str, Any]],
 33 |     ) -> bool:
 34 |         """Called during agent run loop to determine if execution should continue.
 35 | 
 36 |         Args:
 37 |             kwargs: Run arguments
 38 |             old_items: Original messages
 39 |             new_items: New messages generated during run
 40 | 
 41 |         Returns:
 42 |             True to continue execution, False to stop
 43 |         """
 44 |         return True
 45 | 
 46 |     async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 47 |         """
 48 |         Called before messages are sent to the agent loop.
 49 | 
 50 |         Args:
 51 |             messages: List of message dictionaries to preprocess
 52 | 
 53 |         Returns:
 54 |             List of preprocessed message dictionaries
 55 |         """
 56 |         return messages
 57 | 
 58 |     async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 59 |         """
 60 |         Called after the agent loop returns output.
 61 | 
 62 |         Args:
 63 |             output: List of output message dictionaries to postprocess
 64 | 
 65 |         Returns:
 66 |             List of postprocessed output dictionaries
 67 |         """
 68 |         return output
 69 | 
 70 |     async def on_computer_call_start(self, item: Dict[str, Any]) -> None:
 71 |         """
 72 |         Called when a computer call is about to start.
 73 | 
 74 |         Args:
 75 |             item: The computer call item dictionary
 76 |         """
 77 |         pass
 78 | 
 79 |     async def on_computer_call_end(
 80 |         self, item: Dict[str, Any], result: List[Dict[str, Any]]
 81 |     ) -> None:
 82 |         """
 83 |         Called when a computer call has completed.
 84 | 
 85 |         Args:
 86 |             item: The computer call item dictionary
 87 |             result: The result of the computer call
 88 |         """
 89 |         pass
 90 | 
 91 |     async def on_function_call_start(self, item: Dict[str, Any]) -> None:
 92 |         """
 93 |         Called when a function call is about to start.
 94 | 
 95 |         Args:
 96 |             item: The function call item dictionary
 97 |         """
 98 |         pass
 99 | 
100 |     async def on_function_call_end(
101 |         self, item: Dict[str, Any], result: List[Dict[str, Any]]
102 |     ) -> None:
103 |         """
104 |         Called when a function call has completed.
105 | 
106 |         Args:
107 |             item: The function call item dictionary
108 |             result: The result of the function call
109 |         """
110 |         pass
111 | 
112 |     async def on_text(self, item: Dict[str, Any]) -> None:
113 |         """
114 |         Called when a text message is encountered.
115 | 
116 |         Args:
117 |             item: The message item dictionary
118 |         """
119 |         pass
120 | 
121 |     async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
122 |         """
123 |         Called when an API call is about to start.
124 | 
125 |         Args:
126 |             kwargs: The kwargs being passed to the API call
127 |         """
128 |         pass
129 | 
130 |     async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
131 |         """
132 |         Called when an API call has completed.
133 | 
134 |         Args:
135 |             kwargs: The kwargs that were passed to the API call
136 |             result: The result of the API call
137 |         """
138 |         pass
139 | 
140 |     async def on_usage(self, usage: Dict[str, Any]) -> None:
141 |         """
142 |         Called when usage information is received.
143 | 
144 |         Args:
145 |             usage: The usage information
146 |         """
147 |         pass
148 | 
149 |     async def on_screenshot(self, screenshot: Union[str, bytes], name: str = "screenshot") -> None:
150 |         """
151 |         Called when a screenshot is taken.
152 | 
153 |         Args:
154 |             screenshot: The screenshot image
155 |             name: The name of the screenshot
156 |         """
157 |         pass
158 | 
159 |     async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
160 |         """
161 |         Called when responses are received.
162 | 
163 |         Args:
164 |             kwargs: The kwargs being passed to the agent loop
165 |             responses: The responses received
166 |         """
167 |         pass
168 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/computer-sdk/computers.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Computer Types
  3 | description: Understanding Cua computer types and connection methods
  4 | ---
  5 | 
  6 | {/* prettier-ignore */}
  7 | <Callout>A corresponding <a href="https://github.com/trycua/cua/blob/main/notebooks/computer_nb.ipynb" target="_blank">Jupyter Notebook</a> and <a href="https://github.com/trycua/cua/tree/main/examples/computer-example-ts" target="_blank">NodeJS project</a> are available for this documentation.</Callout>
  8 | 
  9 | Before we can automate apps using AI, we need to first connect to a Computer Server to give the AI a safe environment to execute workflows in.
 10 | 
 11 | Cua Computers are preconfigured sandboxes running the Computer Server. They can be either macOS, Linux, or Windows. They're found in either a cloud-native sandbox, or on your host desktop.
 12 | 
 13 | ## Cloud Sandbox
 14 | 
 15 | **Easiest & safest way to get started - works on any host OS**
 16 | 
 17 | This is a Cloud Sandbox running the Computer Server. Get a sandbox at [cua.ai](https://cua.ai/).
 18 | 
 19 | <Tabs items={['Python', 'TypeScript']}>
 20 |   <Tab value="Python">
 21 |     ```python
 22 |     from computer import Computer
 23 | 
 24 |     computer = Computer(
 25 |         os_type="linux",
 26 |         provider_type="cloud",
 27 |         name="your-sandbox-name",
 28 |         api_key="your-api-key"
 29 |     )
 30 | 
 31 |     await computer.run() # Connect to the sandbox
 32 |     ```
 33 | 
 34 |   </Tab>
 35 |   <Tab value="TypeScript">
 36 |     ```typescript
 37 |     import { Computer, OSType } from '@trycua/computer';
 38 | 
 39 |     const computer = new Computer({
 40 |       osType: OSType.LINUX,
 41 |       name: "your-sandbox-name",
 42 |       apiKey: "your-api-key"
 43 |     });
 44 | 
 45 |     await computer.run(); // Connect to the sandbox
 46 |     ```
 47 | 
 48 |   </Tab>
 49 | </Tabs>
 50 | 
 51 | ## Linux on Docker
 52 | 
 53 | **Run Linux desktop locally on macOS, Windows, or Linux hosts**
 54 | 
 55 | Cua provides two Docker images for running Linux desktops:
 56 | 
 57 | <Tabs items={['XFCE (Lightweight)', 'KASM (Full-Featured)']}>
 58 |   <Tab value="XFCE (Lightweight)">
 59 | 
 60 |     **Recommended for most use cases** - lightweight XFCE desktop with Firefox
 61 | 
 62 |     1. Install Docker Desktop or Docker Engine
 63 | 
 64 |     2. Pull the CUA XFCE image
 65 | 
 66 |     ```bash
 67 |     docker pull --platform=linux/amd64 trycua/cua-xfce:latest
 68 |     ```
 69 | 
 70 |     3. Connect with Computer
 71 | 
 72 |     ```python
 73 |     from computer import Computer
 74 | 
 75 |     computer = Computer(
 76 |         os_type="linux",
 77 |         provider_type="docker",
 78 |         image="trycua/cua-xfce:latest",
 79 |         name="my-xfce-sandbox"
 80 |     )
 81 | 
 82 |     await computer.run() # Launch & connect to Docker sandbox
 83 |     ```
 84 | 
 85 |   </Tab>
 86 |   <Tab value="KASM (Full-Featured)">
 87 | 
 88 |     **Full-featured Ubuntu desktop** with additional applications
 89 | 
 90 |     1. Install Docker Desktop or Docker Engine
 91 | 
 92 |     2. Build or pull the CUA KASM image
 93 | 
 94 |     ```bash
 95 |     # Option 1: Pull from Docker Hub
 96 |     docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest
 97 | 
 98 |     # Option 2: Build locally
 99 |     cd libs/kasm
100 |     docker build -t cua-ubuntu:latest .
101 |     ```
102 | 
103 |     3. Connect with Computer
104 | 
105 |     ```python
106 |     from computer import Computer
107 | 
108 |     computer = Computer(
109 |         os_type="linux",
110 |         provider_type="docker",
111 |         image="trycua/cua-ubuntu:latest",
112 |         name="my-kasm-sandbox"
113 |     )
114 | 
115 |     await computer.run() # Launch & connect to Docker sandbox
116 |     ```
117 | 
118 |   </Tab>
119 | </Tabs>
120 | 
121 | ## Windows Sandbox
122 | 
123 | **Windows hosts only - requires Windows 10 Pro/Enterprise or Windows 11**
124 | 
125 | 1. Enable Windows Sandbox
126 | 2. Install pywinsandbox dependency
127 | 
128 | ```bash
129 | pip install -U git+git://github.com/karkason/pywinsandbox.git
130 | ```
131 | 
132 | 3. Connect with Computer
133 | 
134 | ```python
135 | from computer import Computer
136 | 
137 | computer = Computer(
138 |     os_type="windows",
139 |     provider_type="winsandbox",
140 |     ephemeral=True # Windows Sandbox is always ephemeral
141 | )
142 | 
143 | await computer.run() # Launch & connect to Windows Sandbox
144 | ```
145 | 
146 | ## macOS Sandbox
147 | 
148 | **macOS hosts only - requires Lume CLI**
149 | 
150 | 1. Install lume cli
151 | 
152 | ```bash
153 | /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
154 | ```
155 | 
156 | 2. Start a local Cua macOS sandbox
157 | 
158 | ```bash
159 | lume run macos-sequoia-cua:latest
160 | ```
161 | 
162 | 3. Connect with Computer
163 | 
164 | ```python
165 | from computer import Computer
166 | 
167 | computer = Computer(
168 |     os_type="macos",
169 |     provider_type="lume",
170 |     name="macos-sequoia-cua:latest"
171 | )
172 | 
173 | await computer.run() # Launch & connect to the sandbox
174 | ```
175 | 
176 | ## Your host desktop
177 | 
178 | You can also have agents control your desktop directly by running Computer Server without any containerization layer. Beware that AI models may perform risky actions.
179 | 
180 | ```bash
181 | pip install cua-computer-server
182 | python -m computer_server
183 | ```
184 | 
185 | Connect with:
186 | 
187 | <Tabs items={['Python']}>
188 |   <Tab value="Python">
189 |     ```python
190 | 
191 |     computer = Computer(use_host_computer_server=True)
192 |     await computer.run() # Connect to the host desktop
193 | 
194 |     ```
195 | 
196 |   </Tab>
197 | </Tabs>
198 | 
```

--------------------------------------------------------------------------------
/libs/lumier/src/bin/entry.sh:
--------------------------------------------------------------------------------

```bash
  1 | #!/usr/bin/env bash
  2 | 
  3 | # Configure SSH to prevent known hosts warnings
  4 | export SSHPASS_PROMPT=
  5 | export SSH_ASKPASS=/bin/echo
  6 | # Set SSH quiet mode via the SSHPASS environment variable
  7 | export SSHPASS_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -q"
  8 | 
  9 | # We'll enable strict error checking AFTER initialization
 10 | # to prevent premature exits
 11 | 
 12 | # Source configuration files
 13 | CONFIG_DIR="/run/config"
 14 | LIB_DIR="/run/lib"
 15 | 
 16 | # Source constants if available
 17 | if [ -f "${CONFIG_DIR}/constants.sh" ]; then
 18 |   source "${CONFIG_DIR}/constants.sh"
 19 | fi
 20 | 
 21 | # Import utilities
 22 | for lib in "${LIB_DIR}"/*.sh; do
 23 |   if [ -f "$lib" ]; then
 24 |     source "$lib"
 25 |   fi
 26 | done
 27 | 
 28 | # Set VM_NAME to env or fallback to container name (from --name)
 29 | if [ -z "${VM_NAME:-}" ]; then
 30 |     VM_NAME="$(cat /etc/hostname)"
 31 |     export VM_NAME
 32 | fi
 33 | 
 34 | # Set HOST_STORAGE_PATH to a lume ephemeral storage if not set
 35 | if [ -z "${HOST_STORAGE_PATH:-}" ]; then
 36 |     HOST_STORAGE_PATH="ephemeral"
 37 |     
 38 |     # Tell user that ephemeral storage is being used
 39 |     echo "Using ephemeral storage. VM state will be lost when macOS cleans up temporary files."
 40 |     
 41 |     export HOST_STORAGE_PATH
 42 | fi
 43 | 
 44 | # Only check and report mountpoints in debug mode
 45 | if [ "${LUMIER_DEBUG:-0}" == "1" ]; then
 46 |     if mountpoint -q /storage; then
 47 |         echo "/storage is mounted"
 48 |     fi
 49 |     if mountpoint -q /shared; then
 50 |         echo "/shared is mounted"
 51 |     fi
 52 |     # if mountpoint -q /data; then
 53 |     #     echo "/data is mounted"
 54 |     # fi
 55 | fi
 56 | 
 57 | # Check if we're running as PID 1 (important for Docker signal handling)
 58 | if [ $$ -ne 1 ]; then
 59 |     echo "Warning: This script is not running as PID 1 (current PID: $$)."
 60 |     echo "Docker signal handling may not work properly when stopped from Docker Desktop."
 61 | fi
 62 | 
 63 | # Log startup info
 64 | echo "Lumier VM is starting..."
 65 | 
 66 | # Cleanup function to ensure VM and noVNC proxy shutdown on container stop
 67 | # Counter for signal handling
 68 | SIGNAL_COUNT=0
 69 | 
 70 | cleanup() {
 71 |   local signal_name=$1
 72 |   set +e  # Don't exit on error in cleanup
 73 |   
 74 |   # Increment signal counter
 75 |   SIGNAL_COUNT=$((SIGNAL_COUNT + 1))
 76 |   
 77 |   # If this is the first signal, try graceful shutdown
 78 |   if [ $SIGNAL_COUNT -eq 1 ]; then
 79 |     echo "[cleanup] Caught $signal_name signal, shutting down..."
 80 |     
 81 |     # Check if we're in the middle of an image pull
 82 |     if [[ "$PULL_IN_PROGRESS" == "1" ]]; then
 83 |       echo "[cleanup] Interrupted during image pull, skipping VM stop."
 84 |     else
 85 |       echo "[cleanup] Stopping VM..."
 86 |       stop_vm true
 87 |     fi
 88 |     
 89 |     # Attempt to clean up ephemeral storage if it's in the /private/tmp directory
 90 |     if [[ "$HOST_STORAGE_PATH" == "ephemeral" ]]; then
 91 |       # First check if VM actually exists
 92 |       VM_INFO=$(lume_get "$VM_NAME" "$HOST_STORAGE_PATH" "json" "false")
 93 |       
 94 |       # Only try VM deletion if VM exists and not in the middle of a pull
 95 |       if [[ "$PULL_IN_PROGRESS" != "1" && $VM_INFO != *"Virtual machine not found"* ]]; then
 96 |         echo "[cleanup] Cleaning up VM..."
 97 |         lume_delete "$VM_NAME" "$HOST_STORAGE_PATH" > /dev/null 2>&1
 98 |       fi
 99 |     fi
100 |   else
101 |     # For multiple signals, force an immediate exit
102 |     echo "got $SIGNAL_COUNT SIGTERM/SIGINTs, forcefully exiting"
103 |   fi
104 |   
105 |   # If we've received multiple signals, just exit immediately
106 |   if [ $SIGNAL_COUNT -ge 3 ]; then
107 |     exit 1
108 |   fi
109 |   
110 |   # Exit with success for the first signal
111 |   if [ $SIGNAL_COUNT -eq 1 ]; then
112 |     exit 0
113 |   fi
114 | }
115 | # Ensure we catch all typical container termination signals
116 | trap 'cleanup SIGTERM' SIGTERM
117 | trap 'cleanup SIGINT' SIGINT
118 | trap 'cleanup SIGHUP' SIGHUP
119 | 
120 | # Now enable strict error handling after initialization
121 | set -euo pipefail
122 | 
123 | # Start the VM with error handling
124 | if ! start_vm; then
125 |     echo "ERROR: Failed to start VM!" >&2
126 |     exit 1
127 | fi
128 | 
129 | # Start noVNC for VNC access
130 | NOVNC_PID=""
131 | if [ -n "${VNC_PORT:-}" ] && [ -n "${VNC_PASSWORD:-}" ]; then
132 |   # Only show this in debug mode
133 |   if [ "${LUMIER_DEBUG:-0}" == "1" ]; then
134 |     echo "Starting noVNC proxy with optimized color settings..."
135 |   fi
136 |   ${NOVNC_PATH}/utils/novnc_proxy --vnc host.docker.internal:${VNC_PORT} --listen 8006 --web ${NOVNC_PATH} > /dev/null 2>&1 &
137 |   NOVNC_PID=$!
138 |   disown $NOVNC_PID
139 |   echo "noVNC interface available at: http://localhost:8006/vnc.html?password=${VNC_PASSWORD}&autoconnect=true (replace PORT with the port you forwarded to 8006)"
140 | fi
141 | 
142 | echo "Lumier is running. Press Ctrl+C to stop."
143 | 
144 | # Instead of tail -f /dev/null, use a wait loop that can be interrupted by signals
145 | while true; do
146 |   # Sleep in small increments to make signal handling more responsive
147 |   sleep 1 &
148 |   wait $!
149 |   # Break the loop if we've received a signal
150 |   if [ $SIGNAL_COUNT -gt 0 ]; then
151 |     break
152 |   fi
153 | done
```

--------------------------------------------------------------------------------
/libs/lume/src/Server/Requests.swift:
--------------------------------------------------------------------------------

```swift
  1 | import ArgumentParser
  2 | import Foundation
  3 | import Virtualization
  4 | 
  5 | struct RunVMRequest: Codable {
  6 |     let noDisplay: Bool?
  7 |     let sharedDirectories: [SharedDirectoryRequest]?
  8 |     let recoveryMode: Bool?
  9 |     let storage: String?
 10 | 
 11 |     struct SharedDirectoryRequest: Codable {
 12 |         let hostPath: String
 13 |         let readOnly: Bool?
 14 |     }
 15 | 
 16 |     func parse() throws -> [SharedDirectory] {
 17 |         guard let sharedDirectories = sharedDirectories else { return [] }
 18 | 
 19 |         return try sharedDirectories.map { dir -> SharedDirectory in
 20 |             // Validate that the host path exists and is a directory
 21 |             var isDirectory: ObjCBool = false
 22 |             guard FileManager.default.fileExists(atPath: dir.hostPath, isDirectory: &isDirectory),
 23 |                 isDirectory.boolValue
 24 |             else {
 25 |                 throw ValidationError(
 26 |                     "Host path does not exist or is not a directory: \(dir.hostPath)")
 27 |             }
 28 | 
 29 |             return SharedDirectory(
 30 |                 hostPath: dir.hostPath,
 31 |                 tag: VZVirtioFileSystemDeviceConfiguration.macOSGuestAutomountTag,
 32 |                 readOnly: dir.readOnly ?? false
 33 |             )
 34 |         }
 35 |     }
 36 | }
 37 | 
 38 | struct PullRequest: Codable {
 39 |     let image: String
 40 |     let name: String?
 41 |     var registry: String
 42 |     var organization: String
 43 |     let storage: String?
 44 | 
 45 |     enum CodingKeys: String, CodingKey {
 46 |         case image, name, registry, organization, storage
 47 |     }
 48 | 
 49 |     init(from decoder: Decoder) throws {
 50 |         let container = try decoder.container(keyedBy: CodingKeys.self)
 51 |         image = try container.decode(String.self, forKey: .image)
 52 |         name = try container.decodeIfPresent(String.self, forKey: .name)
 53 |         registry = try container.decodeIfPresent(String.self, forKey: .registry) ?? "ghcr.io"
 54 |         organization = try container.decodeIfPresent(String.self, forKey: .organization) ?? "trycua"
 55 |         storage = try container.decodeIfPresent(String.self, forKey: .storage)
 56 |     }
 57 | }
 58 | 
 59 | struct CreateVMRequest: Codable {
 60 |     let name: String
 61 |     let os: String
 62 |     let cpu: Int
 63 |     let memory: String
 64 |     let diskSize: String
 65 |     let display: String
 66 |     let ipsw: String?
 67 |     let storage: String?
 68 | 
 69 |     func parse() throws -> (memory: UInt64, diskSize: UInt64) {
 70 |         return (
 71 |             memory: try parseSize(memory),
 72 |             diskSize: try parseSize(diskSize)
 73 |         )
 74 |     }
 75 | }
 76 | 
 77 | struct SetVMRequest: Codable {
 78 |     let cpu: Int?
 79 |     let memory: String?
 80 |     let diskSize: String?
 81 |     let display: String?
 82 |     let storage: String?
 83 | 
 84 |     func parse() throws -> (memory: UInt64?, diskSize: UInt64?, display: VMDisplayResolution?) {
 85 |         return (
 86 |             memory: try memory.map { try parseSize($0) },
 87 |             diskSize: try diskSize.map { try parseSize($0) },
 88 |             display: try display.map {
 89 |                 guard let resolution = VMDisplayResolution(string: $0) else {
 90 |                     throw ValidationError(
 91 |                         "Invalid display resolution format: \($0). Expected format: WIDTHxHEIGHT")
 92 |                 }
 93 |                 return resolution
 94 |             }
 95 |         )
 96 |     }
 97 | }
 98 | 
 99 | struct CloneRequest: Codable {
100 |     let name: String
101 |     let newName: String
102 |     let sourceLocation: String?
103 |     let destLocation: String?
104 | }
105 | 
106 | struct PushRequest: Codable {
107 |     let name: String // Name of the local VM
108 |     let imageName: String // Base name for the image in the registry
109 |     let tags: [String] // List of tags to push
110 |     var registry: String // Registry URL
111 |     var organization: String // Organization/user in the registry
112 |     let storage: String? // Optional VM storage location or direct path
113 |     var chunkSizeMb: Int // Chunk size
114 |     // dryRun and reassemble are less common for API, default to false?
115 |     // verbose is usually handled by server logging
116 | 
117 |     enum CodingKeys: String, CodingKey {
118 |         case name, imageName, tags, registry, organization, storage, chunkSizeMb
119 |     }
120 | 
121 |     // Provide default values for optional fields during decoding
122 |     init(from decoder: Decoder) throws {
123 |         let container = try decoder.container(keyedBy: CodingKeys.self)
124 |         name = try container.decode(String.self, forKey: .name)
125 |         imageName = try container.decode(String.self, forKey: .imageName)
126 |         tags = try container.decode([String].self, forKey: .tags)
127 |         registry = try container.decodeIfPresent(String.self, forKey: .registry) ?? "ghcr.io"
128 |         organization = try container.decodeIfPresent(String.self, forKey: .organization) ?? "trycua"
129 |         storage = try container.decodeIfPresent(String.self, forKey: .storage)
130 |         chunkSizeMb = try container.decodeIfPresent(Int.self, forKey: .chunkSizeMb) ?? 512
131 |     }
132 | }
133 | 
```

--------------------------------------------------------------------------------
/libs/lume/src/FileSystem/VMConfig.swift:
--------------------------------------------------------------------------------

```swift
  1 | import ArgumentParser
  2 | import Foundation
  3 | import Virtualization
  4 | 
  5 | /// Represents a shared directory configuration
  6 | struct SharedDirectory: Codable {
  7 |     let hostPath: String
  8 |     let tag: String
  9 |     let readOnly: Bool
 10 | 
 11 |     var string: String {
 12 |         return "\(hostPath):\(tag):\(readOnly ? "ro" : "rw")"
 13 |     }
 14 | }
 15 | 
 16 | // MARK: - VMConfig
 17 | struct VMConfig: Codable {
 18 |     
 19 |     // MARK: - Properties
 20 |     let os: String
 21 |     private var _cpuCount: Int?
 22 |     private var _memorySize: UInt64?
 23 |     private var _diskSize: UInt64?
 24 |     private var _macAddress: String?
 25 |     private var _display: VMDisplayResolution
 26 |     private var _hardwareModel: Data?
 27 |     private var _machineIdentifier: Data?
 28 |     
 29 |     // MARK: - Initialization
 30 |     init(
 31 |         os: String,
 32 |         cpuCount: Int? = nil,
 33 |         memorySize: UInt64? = nil,
 34 |         diskSize: UInt64? = nil,
 35 |         macAddress: String? = nil,
 36 |         display: String,
 37 |         hardwareModel: Data? = nil,
 38 |         machineIdentifier: Data? = nil
 39 |     ) throws {
 40 |         self.os = os
 41 |         self._cpuCount = cpuCount
 42 |         self._memorySize = memorySize
 43 |         self._diskSize = diskSize
 44 |         self._macAddress = macAddress
 45 |         self._display = VMDisplayResolution(string: display) ?? VMDisplayResolution(string: "1024x768")!
 46 |         self._hardwareModel = hardwareModel
 47 |         self._machineIdentifier = machineIdentifier
 48 |     }
 49 |     
 50 |     var display: VMDisplayResolution {
 51 |         get { _display }
 52 |         set { _display = newValue }
 53 |     }
 54 |     
 55 |     var cpuCount: Int? {
 56 |         get { _cpuCount }
 57 |         set { _cpuCount = newValue }
 58 |     }
 59 |     
 60 |     var memorySize: UInt64? {
 61 |         get { _memorySize }
 62 |         set { _memorySize = newValue }
 63 |     }
 64 |     
 65 |     var diskSize: UInt64? {
 66 |         get { _diskSize }
 67 |         set { _diskSize = newValue }
 68 |     }
 69 | 
 70 |     var hardwareModel: Data? {
 71 |         get { _hardwareModel }
 72 |         set { _hardwareModel = newValue }
 73 |     }
 74 | 
 75 |     var machineIdentifier: Data? {
 76 |         get { _machineIdentifier }
 77 |         set { _machineIdentifier = newValue }
 78 |     }
 79 | 
 80 |     var macAddress: String? {
 81 |         get { _macAddress }
 82 |         set { _macAddress = newValue }
 83 |     }
 84 |     
 85 |     mutating func setCpuCount(_ count: Int) {
 86 |         _cpuCount = count
 87 |     }
 88 |     
 89 |     mutating func setMemorySize(_ size: UInt64) {
 90 |         _memorySize = size
 91 |     }
 92 |     
 93 |     mutating func setDiskSize(_ size: UInt64) {
 94 |         _diskSize = size
 95 |     }
 96 | 
 97 |     mutating func setHardwareModel(_ hardwareModel: Data) {
 98 |         _hardwareModel = hardwareModel
 99 |     }
100 | 
101 |     mutating func setMachineIdentifier(_ machineIdentifier: Data) {
102 |         _machineIdentifier = machineIdentifier
103 |     }
104 | 
105 |     mutating func setMacAddress(_ newMacAddress: String) {
106 |         self._macAddress = newMacAddress
107 |     }
108 | 
109 |     mutating func setDisplay(_ newDisplay: VMDisplayResolution) {
110 |         self._display = newDisplay
111 |     }
112 | 
113 |     // MARK: - Codable
114 |     enum CodingKeys: String, CodingKey {
115 |         case _cpuCount = "cpuCount"
116 |         case _memorySize = "memorySize"
117 |         case _diskSize = "diskSize"
118 |         case macAddress
119 |         case display
120 |         case _hardwareModel = "hardwareModel"
121 |         case _machineIdentifier = "machineIdentifier"
122 |         case os
123 |     }
124 |     
125 |     init(from decoder: Decoder) throws {
126 |         let container = try decoder.container(keyedBy: CodingKeys.self)
127 |         
128 |         os = try container.decode(String.self, forKey: .os)
129 |         _cpuCount = try container.decodeIfPresent(Int.self, forKey: ._cpuCount)
130 |         _memorySize = try container.decodeIfPresent(UInt64.self, forKey: ._memorySize)
131 |         _diskSize = try container.decodeIfPresent(UInt64.self, forKey: ._diskSize)
132 |         _macAddress = try container.decodeIfPresent(String.self, forKey: .macAddress)
133 |         _display = VMDisplayResolution(string: try container.decode(String.self, forKey: .display))!
134 |         _hardwareModel = try container.decodeIfPresent(Data.self, forKey: ._hardwareModel)
135 |         _machineIdentifier = try container.decodeIfPresent(Data.self, forKey: ._machineIdentifier)
136 |     }
137 |     
138 |     func encode(to encoder: Encoder) throws {
139 |         var container = encoder.container(keyedBy: CodingKeys.self)
140 |         
141 |         try container.encodeIfPresent(os, forKey: .os)
142 |         try container.encodeIfPresent(_cpuCount, forKey: ._cpuCount)
143 |         try container.encodeIfPresent(_memorySize, forKey: ._memorySize)
144 |         try container.encodeIfPresent(_diskSize, forKey: ._diskSize)
145 |         try container.encodeIfPresent(_macAddress, forKey: .macAddress)
146 |         try container.encode(display.string, forKey: .display)
147 |         try container.encodeIfPresent(_hardwareModel, forKey: ._hardwareModel)
148 |         try container.encodeIfPresent(_machineIdentifier, forKey: ._machineIdentifier)
149 |     }
150 | }
151 | 
```

--------------------------------------------------------------------------------
/libs/python/computer-server/computer_server/cli.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Command-line interface for the Computer API server.
  3 | """
  4 | 
  5 | import argparse
  6 | import asyncio
  7 | import logging
  8 | import os
  9 | import sys
 10 | import threading
 11 | from typing import List, Optional
 12 | 
 13 | from .server import Server
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
 19 |     """Parse command-line arguments."""
 20 |     parser = argparse.ArgumentParser(description="Start the Computer API server")
 21 |     parser.add_argument(
 22 |         "--host", default="0.0.0.0", help="Host to bind the server to (default: 0.0.0.0)"
 23 |     )
 24 |     parser.add_argument(
 25 |         "--port", type=int, default=8000, help="Port to bind the server to (default: 8000)"
 26 |     )
 27 |     parser.add_argument(
 28 |         "--log-level",
 29 |         choices=["debug", "info", "warning", "error", "critical"],
 30 |         default="info",
 31 |         help="Logging level (default: info)",
 32 |     )
 33 |     parser.add_argument(
 34 |         "--ssl-keyfile",
 35 |         type=str,
 36 |         help="Path to SSL private key file (enables HTTPS)",
 37 |     )
 38 |     parser.add_argument(
 39 |         "--ssl-certfile",
 40 |         type=str,
 41 |         help="Path to SSL certificate file (enables HTTPS)",
 42 |     )
 43 |     parser.add_argument(
 44 |         "--watchdog",
 45 |         action="store_true",
 46 |         help="Enable watchdog monitoring (automatically enabled if CONTAINER_NAME env var is set)",
 47 |     )
 48 |     parser.add_argument(
 49 |         "--watchdog-interval",
 50 |         type=int,
 51 |         default=30,
 52 |         help="Watchdog ping interval in seconds (default: 30)",
 53 |     )
 54 |     parser.add_argument(
 55 |         "--no-restart",
 56 |         action="store_true",
 57 |         help="Disable automatic server restart in watchdog",
 58 |     )
 59 | 
 60 |     return parser.parse_args(args)
 61 | 
 62 | 
 63 | def main() -> None:
 64 |     """Main entry point for the CLI."""
 65 |     args = parse_args()
 66 | 
 67 |     # Configure logging
 68 |     logging.basicConfig(
 69 |         level=getattr(logging, args.log_level.upper()),
 70 |         format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 71 |     )
 72 | 
 73 |     # Check if watchdog should be enabled
 74 |     container_name = os.environ.get("CONTAINER_NAME")
 75 |     enable_watchdog = (args.watchdog or bool(container_name)) and not sys.platform.startswith("win")
 76 | 
 77 |     if container_name:
 78 |         logger.info(
 79 |             f"Container environment detected (CONTAINER_NAME={container_name}), enabling watchdog"
 80 |         )
 81 |     elif args.watchdog:
 82 |         logger.info("Watchdog explicitly enabled via --watchdog flag")
 83 | 
 84 |     # Start watchdog if enabled
 85 |     if enable_watchdog:
 86 |         logger.info(f"Starting watchdog monitoring with {args.watchdog_interval}s interval")
 87 | 
 88 |         def run_watchdog_thread():
 89 |             """Run watchdog in a separate thread."""
 90 |             loop = asyncio.new_event_loop()
 91 |             asyncio.set_event_loop(loop)
 92 |             try:
 93 |                 # Create CLI args dict for watchdog
 94 |                 cli_args = {
 95 |                     "host": args.host,
 96 |                     "port": args.port,
 97 |                     "log_level": args.log_level,
 98 |                     "ssl_keyfile": args.ssl_keyfile,
 99 |                     "ssl_certfile": args.ssl_certfile,
100 |                 }
101 | 
102 |                 # Create watchdog with restart settings
103 |                 from .watchdog import Watchdog
104 | 
105 |                 watchdog = Watchdog(cli_args=cli_args, ping_interval=args.watchdog_interval)
106 |                 watchdog.restart_enabled = not args.no_restart
107 | 
108 |                 loop.run_until_complete(watchdog.start_monitoring())
109 |             except Exception as e:
110 |                 logger.error(f"Watchdog error: {e}")
111 |             finally:
112 |                 loop.close()
113 | 
114 |         # Start watchdog in background thread
115 |         watchdog_thread = threading.Thread(target=run_watchdog_thread, daemon=True, name="watchdog")
116 |         watchdog_thread.start()
117 | 
118 |     # Create and start the server
119 |     logger.info(f"Starting CUA Computer API server on {args.host}:{args.port}...")
120 | 
121 |     # Handle SSL configuration
122 |     ssl_args = {}
123 |     if args.ssl_keyfile and args.ssl_certfile:
124 |         ssl_args = {
125 |             "ssl_keyfile": args.ssl_keyfile,
126 |             "ssl_certfile": args.ssl_certfile,
127 |         }
128 |         logger.info("HTTPS mode enabled with SSL certificates")
129 |     elif args.ssl_keyfile or args.ssl_certfile:
130 |         logger.warning(
131 |             "Both --ssl-keyfile and --ssl-certfile are required for HTTPS. Running in HTTP mode."
132 |         )
133 |     else:
134 |         logger.info("HTTP mode (no SSL certificates provided)")
135 | 
136 |     server = Server(host=args.host, port=args.port, log_level=args.log_level, **ssl_args)
137 | 
138 |     try:
139 |         server.start()
140 |     except KeyboardInterrupt:
141 |         logger.info("Server stopped by user")
142 |         sys.exit(0)
143 |     except Exception as e:
144 |         logger.error(f"Error starting server: {e}")
145 |         sys.exit(1)
146 | 
147 | 
148 | if __name__ == "__main__":
149 |     main()
150 | 
```

--------------------------------------------------------------------------------
/libs/lume/src/Virtualization/DarwinImageLoader.swift:
--------------------------------------------------------------------------------

```swift
  1 | import Foundation
  2 | import Virtualization
  3 | 
  4 | /// Handles loading and validation of macOS restore images (IPSW files).
  5 | /// Provides functionality to:
  6 | /// - Fetch the latest supported macOS restore image URL
  7 | /// - Load and validate image requirements for VM creation
  8 | /// - Extract hardware model and auxiliary storage configuration
  9 | protocol ImageLoader: Sendable {
 10 |     typealias ImageRequirements = DarwinImageLoader.ImageRequirements
 11 |     func fetchLatestSupportedURL() async throws -> URL
 12 |     func loadImageRequirements(from url: URL) async throws -> ImageRequirements
 13 |     func downloadLatestImage() async throws -> Path
 14 | }
 15 | 
 16 | final class DarwinImageLoader: NSObject, ImageLoader, @unchecked Sendable, URLSessionDownloadDelegate {
 17 |     struct ImageRequirements: Sendable {
 18 |         let hardwareModel: Data
 19 |         let minimumSupportedCPUCount: Int
 20 |         let minimumSupportedMemorySize: UInt64
 21 |     }
 22 |     
 23 |     enum ImageError: Error {
 24 |         case invalidImage
 25 |         case unsupportedConfiguration
 26 |         case downloadFailed
 27 |     }
 28 |     
 29 |     private var lastLoggedProgress: Double = 0.0
 30 |     private var progressLogger = ProgressLogger()
 31 |     private var completionHandler: ((URL?, Error?) -> Void)?
 32 |     
 33 |     func fetchLatestSupportedURL() async throws -> URL {
 34 |         try await withCheckedThrowingContinuation { continuation in
 35 |             VZMacOSRestoreImage.fetchLatestSupported { result in
 36 |                 switch result {
 37 |                 case .success(let image):
 38 |                     continuation.resume(returning: image.url)
 39 |                 case .failure(let error):
 40 |                     continuation.resume(throwing: error)
 41 |                 }
 42 |             }
 43 |         }
 44 |     }
 45 |     
 46 |     func loadImageRequirements(from url: URL) async throws -> ImageRequirements {
 47 |         let image = try await VZMacOSRestoreImage.image(from: url)
 48 |         guard let requirements = image.mostFeaturefulSupportedConfiguration else {
 49 |             throw ImageError.unsupportedConfiguration
 50 |         }
 51 |         
 52 |         return ImageRequirements(
 53 |             hardwareModel: requirements.hardwareModel.dataRepresentation,
 54 |             minimumSupportedCPUCount: requirements.minimumSupportedCPUCount,
 55 |             minimumSupportedMemorySize: requirements.minimumSupportedMemorySize
 56 |         )
 57 |     }
 58 |     
 59 |     func downloadLatestImage() async throws -> Path {
 60 |         let url = try await fetchLatestSupportedURL()
 61 |         let tempDir = FileManager.default.temporaryDirectory
 62 |         let downloadPath = tempDir.appendingPathComponent("latest.ipsw")
 63 |         
 64 |         // Reset progress logger state
 65 |         progressLogger = ProgressLogger(threshold: 0.01)
 66 |         
 67 |         // Create a continuation to wait for download completion
 68 |         return try await withCheckedThrowingContinuation { continuation in
 69 |             let session = URLSession(configuration: .default, delegate: self, delegateQueue: nil)
 70 |             let task = session.downloadTask(with: url)
 71 |             
 72 |             // Use the delegate method to handle completion
 73 |             self.completionHandler = { location, error in
 74 |                 if let error = error {
 75 |                     continuation.resume(throwing: error)
 76 |                     return
 77 |                 }
 78 |                 
 79 |                 do {
 80 |                     // Remove existing file if it exists
 81 |                     if FileManager.default.fileExists(atPath: downloadPath.path) {
 82 |                         try FileManager.default.removeItem(at: downloadPath)
 83 |                     }
 84 |                     
 85 |                     try FileManager.default.moveItem(at: location!, to: downloadPath)
 86 |                     Logger.info("Download completed and moved to: \(downloadPath.path)")
 87 |                     continuation.resume(returning: Path(downloadPath.path))
 88 |                 } catch {
 89 |                     continuation.resume(throwing: error)
 90 |                 }
 91 |             }
 92 |             
 93 |             task.resume()
 94 |         }
 95 |     }
 96 |     
 97 |     func urlSession(_ session: URLSession, downloadTask: URLSessionDownloadTask, didWriteData bytesWritten: Int64, totalBytesWritten: Int64, totalBytesExpectedToWrite: Int64) {
 98 |         let progress = Double(totalBytesWritten) / Double(totalBytesExpectedToWrite)
 99 |         progressLogger.logProgress(current: progress, context: "Downloading IPSW")
100 |     }
101 |     
102 |     func urlSession(_ session: URLSession, downloadTask: URLSessionDownloadTask, didFinishDownloadingTo location: URL) {
103 |         // Call the stored completion handler
104 |         completionHandler?(location, nil)
105 |     }
106 |     
107 |     func urlSession(_ session: URLSession, task: URLSessionTask, didCompleteWithError error: Error?) {
108 |         // Call the stored completion handler with an error if it occurred
109 |         if let error = error {
110 |             completionHandler?(nil, error)
111 |         }
112 |     }
113 | }
```

--------------------------------------------------------------------------------
/examples/agent_examples.py:
--------------------------------------------------------------------------------

```python
  1 | """Example demonstrating the ComputerAgent capabilities with the Omni provider."""
  2 | 
  3 | import asyncio
  4 | import logging
  5 | import signal
  6 | import traceback
  7 | 
  8 | # Import the unified agent class and types
  9 | from agent import ComputerAgent
 10 | from computer import Computer, VMProviderType
 11 | 
 12 | # Import utility functions
 13 | from utils import handle_sigint, load_dotenv_files
 14 | 
 15 | # Set up logging
 16 | logging.basicConfig(level=logging.INFO)
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | async def run_agent_example():
 21 |     """Run example of using the ComputerAgent with different models."""
 22 |     print("\n=== Example: ComputerAgent with different models ===")
 23 | 
 24 |     try:
 25 |         # Create a local macOS computer
 26 |         computer = Computer(
 27 |             os_type="macos",
 28 |             verbosity=logging.DEBUG,
 29 |         )
 30 | 
 31 |         # Create a remote Linux computer with Cua
 32 |         # computer = Computer(
 33 |         #     os_type="linux",
 34 |         #     api_key=os.getenv("CUA_API_KEY"),
 35 |         #     name=os.getenv("CUA_CONTAINER_NAME"),
 36 |         #     provider_type=VMProviderType.CLOUD,
 37 |         # )
 38 | 
 39 |         # Create ComputerAgent with new API
 40 |         agent = ComputerAgent(
 41 |             # Supported models:
 42 |             # == OpenAI CUA (computer-use-preview) ==
 43 |             model="openai/computer-use-preview",
 44 |             # == Anthropic CUA (Claude > 3.5) ==
 45 |             # model="anthropic/claude-opus-4-20250514",
 46 |             # model="anthropic/claude-sonnet-4-20250514",
 47 |             # model="anthropic/claude-3-7-sonnet-20250219",
 48 |             # model="anthropic/claude-sonnet-4-5-20250929",
 49 |             # == UI-TARS ==
 50 |             # model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B",
 51 |             # model="mlx/mlx-community/UI-TARS-1.5-7B-6bit",
 52 |             # model="ollama_chat/0000/ui-tars-1.5-7b",
 53 |             # == Omniparser + Any LLM ==
 54 |             # model="omniparser+anthropic/claude-opus-4-20250514",
 55 |             # model="omniparser+ollama_chat/gemma3:12b-it-q4_K_M",
 56 |             # == Omniparser + Vertex AI Gemini 3 (with thinking_level) ==
 57 |             # model="omni+vertex_ai/gemini-3-flash",
 58 |             # thinking_level="high",  # or "low"
 59 |             # media_resolution="medium",  # or "low" or "high"
 60 |             tools=[computer],
 61 |             only_n_most_recent_images=3,
 62 |             verbosity=logging.DEBUG,
 63 |             trajectory_dir="trajectories",
 64 |             use_prompt_caching=True,
 65 |             max_trajectory_budget=1.0,
 66 |         )
 67 | 
 68 |         # Example tasks to demonstrate the agent
 69 |         tasks = [
 70 |             "Look for a repository named trycua/cua on GitHub.",
 71 |             "Check the open issues, open the most recent one and read it.",
 72 |             "Clone the repository in users/lume/projects if it doesn't exist yet.",
 73 |             "Open the repository with an app named Cursor (on the dock, black background and white cube icon).",
 74 |             "From Cursor, open Composer if not already open.",
 75 |             "Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.",
 76 |         ]
 77 | 
 78 |         # Use message-based conversation history
 79 |         history = []
 80 | 
 81 |         for i, task in enumerate(tasks):
 82 |             print(f"\nExecuting task {i+1}/{len(tasks)}: {task}")
 83 | 
 84 |             # Add user message to history
 85 |             history.append({"role": "user", "content": task})
 86 | 
 87 |             # Run agent with conversation history
 88 |             async for result in agent.run(history, stream=False):
 89 |                 # Add agent outputs to history
 90 |                 history += result.get("output", [])
 91 | 
 92 |                 # Print output for debugging
 93 |                 for item in result.get("output", []):
 94 |                     if item.get("type") == "message":
 95 |                         content = item.get("content", [])
 96 |                         for content_part in content:
 97 |                             if content_part.get("text"):
 98 |                                 print(f"Agent: {content_part.get('text')}")
 99 |                     elif item.get("type") == "computer_call":
100 |                         action = item.get("action", {})
101 |                         action_type = action.get("type", "")
102 |                         print(f"Computer Action: {action_type}({action})")
103 |                     elif item.get("type") == "computer_call_output":
104 |                         print("Computer Output: [Screenshot/Result]")
105 | 
106 |             print(f"✅ Task {i+1}/{len(tasks)} completed: {task}")
107 | 
108 |     except Exception as e:
109 |         logger.error(f"Error in run_agent_example: {e}")
110 |         traceback.print_exc()
111 |         raise
112 | 
113 | 
114 | def main():
115 |     """Run the Anthropic agent example."""
116 |     try:
117 |         load_dotenv_files()
118 | 
119 |         # Register signal handler for graceful exit
120 |         signal.signal(signal.SIGINT, handle_sigint)
121 | 
122 |         asyncio.run(run_agent_example())
123 |     except Exception as e:
124 |         print(f"Error running example: {e}")
125 |         traceback.print_exc()
126 | 
127 | 
128 | if __name__ == "__main__":
129 |     main()
130 | 
```

--------------------------------------------------------------------------------
/examples/computer_examples_windows.py:
--------------------------------------------------------------------------------

```python
  1 | import asyncio
  2 | import os
  3 | import sys
  4 | import traceback
  5 | from pathlib import Path
  6 | 
  7 | # Load environment variables from .env file
  8 | project_root = Path(__file__).parent.parent
  9 | env_file = project_root / ".env"
 10 | print(f"Loading environment from: {env_file}")
 11 | from computer.helpers import sandboxed
 12 | from dotenv import load_dotenv
 13 | 
 14 | load_dotenv(env_file)
 15 | 
 16 | # Add paths to sys.path if needed
 17 | pythonpath = os.environ.get("PYTHONPATH", "")
 18 | for path in pythonpath.split(":"):
 19 |     if path and path not in sys.path:
 20 |         sys.path.insert(0, path)  # Insert at beginning to prioritize
 21 |         print(f"Added to sys.path: {path}")
 22 | 
 23 | from computer.computer import Computer
 24 | from computer.logger import LogLevel
 25 | from computer.providers.base import VMProviderType
 26 | 
 27 | # ANSI color codes
 28 | RED = "\033[91m"
 29 | RESET = "\033[0m"
 30 | 
 31 | 
 32 | async def main():
 33 |     try:
 34 |         print("\n=== Using direct initialization ===")
 35 | 
 36 |         # Create a remote Windows computer with Cua
 37 |         computer = Computer(
 38 |             os_type="windows",
 39 |             api_key=os.getenv("CUA_API_KEY"),
 40 |             name=os.getenv("CONTAINER_NAME") or "",
 41 |             provider_type=VMProviderType.CLOUD,
 42 |         )
 43 | 
 44 |         try:
 45 |             # Run the computer with default parameters
 46 |             await computer.run()
 47 | 
 48 |             # Create output directory if it doesn't exist
 49 |             output_dir = Path("./output")
 50 |             output_dir.mkdir(exist_ok=True)
 51 | 
 52 |             # Keyboard Actions Examples
 53 |             print("\n=== Keyboard Actions ===")
 54 |             await computer.interface.type_text("Hello, World!")
 55 |             await computer.interface.press_key("enter")
 56 | 
 57 |             # Mouse Actions Examples
 58 |             print("\n=== Mouse Actions ===")
 59 |             await computer.interface.move_cursor(100, 100)
 60 |             await computer.interface.left_click()
 61 |             await computer.interface.double_click(400, 400)
 62 |             await computer.interface.right_click(300, 300)
 63 | 
 64 |             print("\n=== RPC ===")
 65 |             await computer.venv_install("demo_venv", ["mss"])
 66 | 
 67 |             @sandboxed("demo_venv")
 68 |             def greet_and_print(name):
 69 |                 import os
 70 | 
 71 |                 from mss import mss
 72 | 
 73 |                 # get username
 74 |                 username = os.getlogin()
 75 |                 print(f"Hello from inside the container, {name}!")
 76 |                 print("Username:", username)
 77 |                 print("Screens:", mss().monitors)
 78 | 
 79 |                 # take a screenshot
 80 |                 with mss() as sct:
 81 |                     filename = sct.shot(mon=-1, output="C:/Users/azureuser/Desktop/fullscreen.png")
 82 |                     print(filename)
 83 | 
 84 |                 return {"greeted": name, "username": username}
 85 | 
 86 |             # Call with args and kwargs
 87 |             result = await greet_and_print("John Doe")
 88 |             print("Result from sandboxed function:", result)
 89 | 
 90 |             # Command Actions Examples
 91 |             print("\n=== Command Actions ===")
 92 |             result = await computer.interface.run_command("notepad")
 93 |             print("Result from command:", result)
 94 | 
 95 |             screenshot = await computer.interface.screenshot()
 96 |             screenshot_path = output_dir / "screenshot.png"
 97 |             with open(screenshot_path, "wb") as f:
 98 |                 f.write(screenshot)
 99 |             print(f"Screenshot saved to: {screenshot_path.absolute()}")
100 | 
101 |             # Clipboard Actions Examples
102 |             print("\n=== Clipboard Actions ===")
103 |             await computer.interface.set_clipboard("Test clipboard")
104 |             content = await computer.interface.copy_to_clipboard()
105 |             print(f"Clipboard content: {content}")
106 | 
107 |             # Simple REPL Loop
108 |             print("\n=== Command REPL ===")
109 |             print("Enter commands to run on the remote computer.")
110 |             print("Type 'exit' or 'quit' to leave the REPL.\n")
111 | 
112 |             while True:
113 |                 try:
114 |                     # Get command from user
115 |                     command = input("command> ").strip()
116 | 
117 |                     # Check for exit commands
118 |                     if command.lower() in ["exit", "quit", ""]:
119 |                         if command.lower() in ["exit", "quit"]:
120 |                             print("Exiting REPL...")
121 |                         break
122 | 
123 |                     # Run the command
124 |                     result = await computer.interface.run_command(command)
125 | 
126 |                     print(result.stdout)
127 |                     if result.stderr:
128 |                         print(f"{RED}{result.stderr}{RESET}")
129 |                 except KeyboardInterrupt:
130 |                     print("\nExiting REPL...")
131 |                     break
132 |                 except Exception as e:
133 |                     print(f"{RED}Error running command: {e}{RESET}")
134 | 
135 |         finally:
136 |             # Important to clean up resources
137 |             # await computer.stop()
138 |             pass
139 |     except Exception as e:
140 |         print(f"Error in main: {e}")
141 |         traceback.print_exc()
142 | 
143 | 
144 | if __name__ == "__main__":
145 |     asyncio.run(main())
146 | 
```
Page 6/28FirstPrevNextLast