#
tokens: 48100/50000 13/513 files (page 9/21)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 9 of 21. Use http://codebase.md/trycua/cua?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .all-contributorsrc
├── .cursorignore
├── .devcontainer
│   ├── devcontainer.json
│   ├── post-install.sh
│   └── README.md
├── .dockerignore
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── scripts
│   │   ├── get_pyproject_version.py
│   │   └── tests
│   │       ├── __init__.py
│   │       ├── README.md
│   │       └── test_get_pyproject_version.py
│   └── workflows
│       ├── ci-lume.yml
│       ├── docker-publish-kasm.yml
│       ├── docker-publish-xfce.yml
│       ├── docker-reusable-publish.yml
│       ├── npm-publish-computer.yml
│       ├── npm-publish-core.yml
│       ├── publish-lume.yml
│       ├── pypi-publish-agent.yml
│       ├── pypi-publish-computer-server.yml
│       ├── pypi-publish-computer.yml
│       ├── pypi-publish-core.yml
│       ├── pypi-publish-mcp-server.yml
│       ├── pypi-publish-pylume.yml
│       ├── pypi-publish-som.yml
│       ├── pypi-reusable-publish.yml
│       └── test-validation-script.yml
├── .gitignore
├── .vscode
│   ├── docs.code-workspace
│   ├── launch.json
│   ├── libs-ts.code-workspace
│   ├── lume.code-workspace
│   ├── lumier.code-workspace
│   ├── py.code-workspace
│   └── settings.json
├── blog
│   ├── app-use.md
│   ├── assets
│   │   ├── composite-agents.png
│   │   ├── docker-ubuntu-support.png
│   │   ├── hack-booth.png
│   │   ├── hack-closing-ceremony.jpg
│   │   ├── hack-cua-ollama-hud.jpeg
│   │   ├── hack-leaderboard.png
│   │   ├── hack-the-north.png
│   │   ├── hack-winners.jpeg
│   │   ├── hack-workshop.jpeg
│   │   ├── hud-agent-evals.png
│   │   └── trajectory-viewer.jpeg
│   ├── bringing-computer-use-to-the-web.md
│   ├── build-your-own-operator-on-macos-1.md
│   ├── build-your-own-operator-on-macos-2.md
│   ├── composite-agents.md
│   ├── cua-hackathon.md
│   ├── hack-the-north.md
│   ├── hud-agent-evals.md
│   ├── human-in-the-loop.md
│   ├── introducing-cua-cloud-containers.md
│   ├── lume-to-containerization.md
│   ├── sandboxed-python-execution.md
│   ├── training-computer-use-models-trajectories-1.md
│   ├── trajectory-viewer.md
│   ├── ubuntu-docker-support.md
│   └── windows-sandbox.md
├── CONTRIBUTING.md
├── Development.md
├── Dockerfile
├── docs
│   ├── .gitignore
│   ├── .prettierrc
│   ├── content
│   │   └── docs
│   │       ├── agent-sdk
│   │       │   ├── agent-loops.mdx
│   │       │   ├── benchmarks
│   │       │   │   ├── index.mdx
│   │       │   │   ├── interactive.mdx
│   │       │   │   ├── introduction.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── osworld-verified.mdx
│   │       │   │   ├── screenspot-pro.mdx
│   │       │   │   └── screenspot-v2.mdx
│   │       │   ├── callbacks
│   │       │   │   ├── agent-lifecycle.mdx
│   │       │   │   ├── cost-saving.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── logging.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── pii-anonymization.mdx
│   │       │   │   └── trajectories.mdx
│   │       │   ├── chat-history.mdx
│   │       │   ├── custom-computer-handlers.mdx
│   │       │   ├── custom-tools.mdx
│   │       │   ├── customizing-computeragent.mdx
│   │       │   ├── integrations
│   │       │   │   ├── hud.mdx
│   │       │   │   └── meta.json
│   │       │   ├── message-format.mdx
│   │       │   ├── meta.json
│   │       │   ├── migration-guide.mdx
│   │       │   ├── prompt-caching.mdx
│   │       │   ├── supported-agents
│   │       │   │   ├── composed-agents.mdx
│   │       │   │   ├── computer-use-agents.mdx
│   │       │   │   ├── grounding-models.mdx
│   │       │   │   ├── human-in-the-loop.mdx
│   │       │   │   └── meta.json
│   │       │   ├── supported-model-providers
│   │       │   │   ├── index.mdx
│   │       │   │   └── local-models.mdx
│   │       │   └── usage-tracking.mdx
│   │       ├── computer-sdk
│   │       │   ├── cloud-vm-management.mdx
│   │       │   ├── commands.mdx
│   │       │   ├── computer-ui.mdx
│   │       │   ├── computers.mdx
│   │       │   ├── meta.json
│   │       │   └── sandboxed-python.mdx
│   │       ├── index.mdx
│   │       ├── libraries
│   │       │   ├── agent
│   │       │   │   └── index.mdx
│   │       │   ├── computer
│   │       │   │   └── index.mdx
│   │       │   ├── computer-server
│   │       │   │   ├── Commands.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── REST-API.mdx
│   │       │   │   └── WebSocket-API.mdx
│   │       │   ├── core
│   │       │   │   └── index.mdx
│   │       │   ├── lume
│   │       │   │   ├── cli-reference.mdx
│   │       │   │   ├── faq.md
│   │       │   │   ├── http-api.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── prebuilt-images.mdx
│   │       │   ├── lumier
│   │       │   │   ├── building-lumier.mdx
│   │       │   │   ├── docker-compose.mdx
│   │       │   │   ├── docker.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   └── meta.json
│   │       │   ├── mcp-server
│   │       │   │   ├── client-integrations.mdx
│   │       │   │   ├── configuration.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── llm-integrations.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── tools.mdx
│   │       │   │   └── usage.mdx
│   │       │   └── som
│   │       │       ├── configuration.mdx
│   │       │       └── index.mdx
│   │       ├── meta.json
│   │       ├── quickstart-cli.mdx
│   │       ├── quickstart-devs.mdx
│   │       └── telemetry.mdx
│   ├── next.config.mjs
│   ├── package-lock.json
│   ├── package.json
│   ├── pnpm-lock.yaml
│   ├── postcss.config.mjs
│   ├── public
│   │   └── img
│   │       ├── agent_gradio_ui.png
│   │       ├── agent.png
│   │       ├── cli.png
│   │       ├── computer.png
│   │       ├── som_box_threshold.png
│   │       └── som_iou_threshold.png
│   ├── README.md
│   ├── source.config.ts
│   ├── src
│   │   ├── app
│   │   │   ├── (home)
│   │   │   │   ├── [[...slug]]
│   │   │   │   │   └── page.tsx
│   │   │   │   └── layout.tsx
│   │   │   ├── api
│   │   │   │   └── search
│   │   │   │       └── route.ts
│   │   │   ├── favicon.ico
│   │   │   ├── global.css
│   │   │   ├── layout.config.tsx
│   │   │   ├── layout.tsx
│   │   │   ├── llms.mdx
│   │   │   │   └── [[...slug]]
│   │   │   │       └── route.ts
│   │   │   └── llms.txt
│   │   │       └── route.ts
│   │   ├── assets
│   │   │   ├── discord-black.svg
│   │   │   ├── discord-white.svg
│   │   │   ├── logo-black.svg
│   │   │   └── logo-white.svg
│   │   ├── components
│   │   │   ├── iou.tsx
│   │   │   └── mermaid.tsx
│   │   ├── lib
│   │   │   ├── llms.ts
│   │   │   └── source.ts
│   │   └── mdx-components.tsx
│   └── tsconfig.json
├── examples
│   ├── agent_examples.py
│   ├── agent_ui_examples.py
│   ├── cloud_api_examples.py
│   ├── computer_examples_windows.py
│   ├── computer_examples.py
│   ├── computer_ui_examples.py
│   ├── computer-example-ts
│   │   ├── .env.example
│   │   ├── .gitignore
│   │   ├── .prettierrc
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── README.md
│   │   ├── src
│   │   │   ├── helpers.ts
│   │   │   └── index.ts
│   │   └── tsconfig.json
│   ├── docker_examples.py
│   ├── evals
│   │   ├── hud_eval_examples.py
│   │   └── wikipedia_most_linked.txt
│   ├── pylume_examples.py
│   ├── sandboxed_functions_examples.py
│   ├── som_examples.py
│   ├── utils.py
│   └── winsandbox_example.py
├── img
│   ├── agent_gradio_ui.png
│   ├── agent.png
│   ├── cli.png
│   ├── computer.png
│   ├── logo_black.png
│   └── logo_white.png
├── libs
│   ├── kasm
│   │   ├── Dockerfile
│   │   ├── LICENSE
│   │   ├── README.md
│   │   └── src
│   │       └── ubuntu
│   │           └── install
│   │               └── firefox
│   │                   ├── custom_startup.sh
│   │                   ├── firefox.desktop
│   │                   └── install_firefox.sh
│   ├── lume
│   │   ├── .cursorignore
│   │   ├── CONTRIBUTING.md
│   │   ├── Development.md
│   │   ├── img
│   │   │   └── cli.png
│   │   ├── Package.resolved
│   │   ├── Package.swift
│   │   ├── README.md
│   │   ├── resources
│   │   │   └── lume.entitlements
│   │   ├── scripts
│   │   │   ├── build
│   │   │   │   ├── build-debug.sh
│   │   │   │   ├── build-release-notarized.sh
│   │   │   │   └── build-release.sh
│   │   │   └── install.sh
│   │   ├── src
│   │   │   ├── Commands
│   │   │   │   ├── Clone.swift
│   │   │   │   ├── Config.swift
│   │   │   │   ├── Create.swift
│   │   │   │   ├── Delete.swift
│   │   │   │   ├── Get.swift
│   │   │   │   ├── Images.swift
│   │   │   │   ├── IPSW.swift
│   │   │   │   ├── List.swift
│   │   │   │   ├── Logs.swift
│   │   │   │   ├── Options
│   │   │   │   │   └── FormatOption.swift
│   │   │   │   ├── Prune.swift
│   │   │   │   ├── Pull.swift
│   │   │   │   ├── Push.swift
│   │   │   │   ├── Run.swift
│   │   │   │   ├── Serve.swift
│   │   │   │   ├── Set.swift
│   │   │   │   └── Stop.swift
│   │   │   ├── ContainerRegistry
│   │   │   │   ├── ImageContainerRegistry.swift
│   │   │   │   ├── ImageList.swift
│   │   │   │   └── ImagesPrinter.swift
│   │   │   ├── Errors
│   │   │   │   └── Errors.swift
│   │   │   ├── FileSystem
│   │   │   │   ├── Home.swift
│   │   │   │   ├── Settings.swift
│   │   │   │   ├── VMConfig.swift
│   │   │   │   ├── VMDirectory.swift
│   │   │   │   └── VMLocation.swift
│   │   │   ├── LumeController.swift
│   │   │   ├── Main.swift
│   │   │   ├── Server
│   │   │   │   ├── Handlers.swift
│   │   │   │   ├── HTTP.swift
│   │   │   │   ├── Requests.swift
│   │   │   │   ├── Responses.swift
│   │   │   │   └── Server.swift
│   │   │   ├── Utils
│   │   │   │   ├── CommandRegistry.swift
│   │   │   │   ├── CommandUtils.swift
│   │   │   │   ├── Logger.swift
│   │   │   │   ├── NetworkUtils.swift
│   │   │   │   ├── Path.swift
│   │   │   │   ├── ProcessRunner.swift
│   │   │   │   ├── ProgressLogger.swift
│   │   │   │   ├── String.swift
│   │   │   │   └── Utils.swift
│   │   │   ├── Virtualization
│   │   │   │   ├── DarwinImageLoader.swift
│   │   │   │   ├── DHCPLeaseParser.swift
│   │   │   │   ├── ImageLoaderFactory.swift
│   │   │   │   └── VMVirtualizationService.swift
│   │   │   ├── VM
│   │   │   │   ├── DarwinVM.swift
│   │   │   │   ├── LinuxVM.swift
│   │   │   │   ├── VM.swift
│   │   │   │   ├── VMDetails.swift
│   │   │   │   ├── VMDetailsPrinter.swift
│   │   │   │   ├── VMDisplayResolution.swift
│   │   │   │   └── VMFactory.swift
│   │   │   └── VNC
│   │   │       ├── PassphraseGenerator.swift
│   │   │       └── VNCService.swift
│   │   └── tests
│   │       ├── Mocks
│   │       │   ├── MockVM.swift
│   │       │   ├── MockVMVirtualizationService.swift
│   │       │   └── MockVNCService.swift
│   │       ├── VM
│   │       │   └── VMDetailsPrinterTests.swift
│   │       ├── VMTests.swift
│   │       ├── VMVirtualizationServiceTests.swift
│   │       └── VNCServiceTests.swift
│   ├── lumier
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   └── src
│   │       ├── bin
│   │       │   └── entry.sh
│   │       ├── config
│   │       │   └── constants.sh
│   │       ├── hooks
│   │       │   └── on-logon.sh
│   │       └── lib
│   │           ├── utils.sh
│   │           └── vm.sh
│   ├── python
│   │   ├── agent
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── agent
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── adapters
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── huggingfacelocal_adapter.py
│   │   │   │   │   ├── human_adapter.py
│   │   │   │   │   ├── mlxvlm_adapter.py
│   │   │   │   │   └── models
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── generic.py
│   │   │   │   │       ├── internvl.py
│   │   │   │   │       ├── opencua.py
│   │   │   │   │       └── qwen2_5_vl.py
│   │   │   │   ├── agent.py
│   │   │   │   ├── callbacks
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── budget_manager.py
│   │   │   │   │   ├── image_retention.py
│   │   │   │   │   ├── logging.py
│   │   │   │   │   ├── operator_validator.py
│   │   │   │   │   ├── pii_anonymization.py
│   │   │   │   │   ├── prompt_instructions.py
│   │   │   │   │   ├── telemetry.py
│   │   │   │   │   └── trajectory_saver.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── computers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cua.py
│   │   │   │   │   └── custom.py
│   │   │   │   ├── decorators.py
│   │   │   │   ├── human_tool
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── server.py
│   │   │   │   │   └── ui.py
│   │   │   │   ├── integrations
│   │   │   │   │   └── hud
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── agent.py
│   │   │   │   │       └── proxy.py
│   │   │   │   ├── loops
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── anthropic.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── composed_grounded.py
│   │   │   │   │   ├── gemini.py
│   │   │   │   │   ├── glm45v.py
│   │   │   │   │   ├── gta1.py
│   │   │   │   │   ├── holo.py
│   │   │   │   │   ├── internvl.py
│   │   │   │   │   ├── model_types.csv
│   │   │   │   │   ├── moondream3.py
│   │   │   │   │   ├── omniparser.py
│   │   │   │   │   ├── openai.py
│   │   │   │   │   ├── opencua.py
│   │   │   │   │   └── uitars.py
│   │   │   │   ├── proxy
│   │   │   │   │   ├── examples.py
│   │   │   │   │   └── handlers.py
│   │   │   │   ├── responses.py
│   │   │   │   ├── types.py
│   │   │   │   └── ui
│   │   │   │       ├── __init__.py
│   │   │   │       ├── __main__.py
│   │   │   │       └── gradio
│   │   │   │           ├── __init__.py
│   │   │   │           ├── app.py
│   │   │   │           └── ui_components.py
│   │   │   ├── benchmarks
│   │   │   │   ├── .gitignore
│   │   │   │   ├── contrib.md
│   │   │   │   ├── interactive.py
│   │   │   │   ├── models
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   └── gta1.py
│   │   │   │   ├── README.md
│   │   │   │   ├── ss-pro.py
│   │   │   │   ├── ss-v2.py
│   │   │   │   └── utils.py
│   │   │   ├── example.py
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── computer
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer
│   │   │   │   ├── __init__.py
│   │   │   │   ├── computer.py
│   │   │   │   ├── diorama_computer.py
│   │   │   │   ├── helpers.py
│   │   │   │   ├── interface
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── logger.py
│   │   │   │   ├── models.py
│   │   │   │   ├── providers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cloud
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── docker
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── lume
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── lume_api.py
│   │   │   │   │   ├── lumier
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── types.py
│   │   │   │   │   └── winsandbox
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── provider.py
│   │   │   │   │       └── setup_script.ps1
│   │   │   │   ├── ui
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   └── gradio
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── app.py
│   │   │   │   └── utils.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── computer-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── diorama
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── diorama_computer.py
│   │   │   │   │   ├── diorama.py
│   │   │   │   │   ├── draw.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── safezone.py
│   │   │   │   ├── handlers
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── main.py
│   │   │   │   ├── server.py
│   │   │   │   └── watchdog.py
│   │   │   ├── examples
│   │   │   │   ├── __init__.py
│   │   │   │   └── usage_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   ├── run_server.py
│   │   │   └── test_connection.py
│   │   ├── core
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── core
│   │   │   │   ├── __init__.py
│   │   │   │   └── telemetry
│   │   │   │       ├── __init__.py
│   │   │   │       └── posthog.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   ├── mcp-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── CONCURRENT_SESSIONS.md
│   │   │   ├── mcp_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── server.py
│   │   │   │   └── session_manager.py
│   │   │   ├── pdm.lock
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── scripts
│   │   │       ├── install_mcp_server.sh
│   │   │       └── start_mcp_server.sh
│   │   ├── pylume
│   │   │   ├── __init__.py
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── pylume
│   │   │   │   ├── __init__.py
│   │   │   │   ├── client.py
│   │   │   │   ├── exceptions.py
│   │   │   │   ├── lume
│   │   │   │   ├── models.py
│   │   │   │   ├── pylume.py
│   │   │   │   └── server.py
│   │   │   ├── pyproject.toml
│   │   │   └── README.md
│   │   └── som
│   │       ├── .bumpversion.cfg
│   │       ├── LICENSE
│   │       ├── poetry.toml
│   │       ├── pyproject.toml
│   │       ├── README.md
│   │       ├── som
│   │       │   ├── __init__.py
│   │       │   ├── detect.py
│   │       │   ├── detection.py
│   │       │   ├── models.py
│   │       │   ├── ocr.py
│   │       │   ├── util
│   │       │   │   └── utils.py
│   │       │   └── visualization.py
│   │       └── tests
│   │           └── test_omniparser.py
│   ├── typescript
│   │   ├── .gitignore
│   │   ├── .nvmrc
│   │   ├── agent
│   │   │   ├── examples
│   │   │   │   ├── playground-example.html
│   │   │   │   └── README.md
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── client.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   └── client.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── biome.json
│   │   ├── computer
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── computer
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── providers
│   │   │   │   │   │   ├── base.ts
│   │   │   │   │   │   ├── cloud.ts
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── base.ts
│   │   │   │   │   ├── factory.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── linux.ts
│   │   │   │   │   ├── macos.ts
│   │   │   │   │   └── windows.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   ├── computer
│   │   │   │   │   └── cloud.test.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── factory.test.ts
│   │   │   │   │   ├── index.test.ts
│   │   │   │   │   ├── linux.test.ts
│   │   │   │   │   ├── macos.test.ts
│   │   │   │   │   └── windows.test.ts
│   │   │   │   └── setup.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── core
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── index.ts
│   │   │   │   └── telemetry
│   │   │   │       ├── clients
│   │   │   │       │   ├── index.ts
│   │   │   │       │   └── posthog.ts
│   │   │   │       └── index.ts
│   │   │   ├── tests
│   │   │   │   └── telemetry.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── pnpm-workspace.yaml
│   │   └── README.md
│   └── xfce
│       ├── .dockerignore
│       ├── .gitignore
│       ├── Dockerfile
│       ├── README.md
│       └── src
│           ├── scripts
│           │   ├── resize-display.sh
│           │   ├── start-computer-server.sh
│           │   ├── start-novnc.sh
│           │   ├── start-vnc.sh
│           │   └── xstartup.sh
│           ├── supervisor
│           │   └── supervisord.conf
│           └── xfce-config
│               ├── helpers.rc
│               ├── xfce4-power-manager.xml
│               └── xfce4-session.xml
├── LICENSE.md
├── Makefile
├── notebooks
│   ├── agent_nb.ipynb
│   ├── blog
│   │   ├── build-your-own-operator-on-macos-1.ipynb
│   │   └── build-your-own-operator-on-macos-2.ipynb
│   ├── composite_agents_docker_nb.ipynb
│   ├── computer_nb.ipynb
│   ├── computer_server_nb.ipynb
│   ├── customizing_computeragent.ipynb
│   ├── eval_osworld.ipynb
│   ├── ollama_nb.ipynb
│   ├── pylume_nb.ipynb
│   ├── README.md
│   ├── sota_hackathon_cloud.ipynb
│   └── sota_hackathon.ipynb
├── pdm.lock
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── samples
│   └── community
│       ├── global-online
│       │   └── README.md
│       └── hack-the-north
│           └── README.md
├── scripts
│   ├── build-uv.sh
│   ├── build.ps1
│   ├── build.sh
│   ├── cleanup.sh
│   ├── playground-docker.sh
│   ├── playground.sh
│   └── run-docker-dev.sh
└── tests
    ├── pytest.ini
    ├── shell_cmd.py
    ├── test_files.py
    ├── test_mcp_server_session_management.py
    ├── test_mcp_server_streaming.py
    ├── test_shell_bash.py
    ├── test_telemetry.py
    ├── test_venv.py
    └── test_watchdog.py
```

# Files

--------------------------------------------------------------------------------
/libs/kasm/src/ubuntu/install/firefox/install_firefox.sh:
--------------------------------------------------------------------------------

```bash
  1 | #!/usr/bin/env bash
  2 | set -xe
  3 | 
  4 | # Add icon
  5 | if [ -f /dockerstartup/install/ubuntu/install/firefox/firefox.desktop ]; then
  6 |   mv /dockerstartup/install/ubuntu/install/firefox/firefox.desktop $HOME/Desktop/
  7 | fi
  8 | 
  9 | ARCH=$(arch | sed 's/aarch64/arm64/g' | sed 's/x86_64/amd64/g')
 10 | 
 11 | set_desktop_icon() {
 12 |   sed -i -e 's!Icon=.\+!Icon=/usr/share/icons/hicolor/48x48/apps/firefox.png!' "$HOME/Desktop/firefox.desktop"
 13 | }
 14 | 
 15 | echo "Install Firefox"
 16 | if [[ "${DISTRO}" == @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|fedora39|fedora40) ]]; then
 17 |   dnf install -y firefox p11-kit
 18 | elif [ "${DISTRO}" == "opensuse" ]; then
 19 |   zypper install -yn p11-kit-tools MozillaFirefox
 20 | elif grep -q Jammy /etc/os-release || grep -q Noble /etc/os-release; then
 21 |   if [ ! -f '/etc/apt/preferences.d/mozilla-firefox' ]; then
 22 |     add-apt-repository -y ppa:mozillateam/ppa
 23 |     echo '
 24 | Package: *
 25 | Pin: release o=LP-PPA-mozillateam
 26 | Pin-Priority: 1001
 27 | ' > /etc/apt/preferences.d/mozilla-firefox
 28 |   fi
 29 |   apt-get install -y firefox p11-kit-modules
 30 | elif grep -q "ID=kali" /etc/os-release; then
 31 |   apt-get update
 32 |   apt-get install -y firefox-esr p11-kit-modules
 33 |   rm -f $HOME/Desktop/firefox.desktop
 34 |   cp \
 35 |     /usr/share/applications/firefox-esr.desktop \
 36 |     $HOME/Desktop/
 37 |   chmod +x $HOME/Desktop/firefox-esr.desktop
 38 | elif grep -q "ID=debian" /etc/os-release || grep -q "ID=parrot" /etc/os-release; then
 39 |   if [ "${ARCH}" == "amd64" ]; then
 40 |     install -d -m 0755 /etc/apt/keyrings
 41 |     wget -q https://packages.mozilla.org/apt/repo-signing-key.gpg -O- > /etc/apt/keyrings/packages.mozilla.org.asc
 42 |     echo "deb [signed-by=/etc/apt/keyrings/packages.mozilla.org.asc] https://packages.mozilla.org/apt mozilla main" > /etc/apt/sources.list.d/mozilla.list
 43 | echo '
 44 | Package: *
 45 | Pin: origin packages.mozilla.org
 46 | Pin-Priority: 1000
 47 | ' > /etc/apt/preferences.d/mozilla
 48 |     apt-get update
 49 |     apt-get install -y firefox p11-kit-modules
 50 |   else
 51 |     apt-get update
 52 |     apt-get install -y firefox-esr p11-kit-modules
 53 |     rm -f $HOME/Desktop/firefox.desktop
 54 |     cp \
 55 |       /usr/share/applications/firefox-esr.desktop \
 56 |       $HOME/Desktop/
 57 |     chmod +x $HOME/Desktop/firefox-esr.desktop
 58 |   fi
 59 | else
 60 |   apt-mark unhold firefox || :
 61 |   apt-get remove firefox
 62 |   apt-get update
 63 |   apt-get install -y firefox p11-kit-modules
 64 | fi
 65 | 
 66 | # Add Langpacks
 67 | FIREFOX_VERSION=$(curl -sI https://download.mozilla.org/?product=firefox-latest | awk -F '(releases/|/win32)' '/Location/ {print $2}')
 68 | RELEASE_URL="https://releases.mozilla.org/pub/firefox/releases/${FIREFOX_VERSION}/win64/xpi/"
 69 | LANGS=$(curl -Ls ${RELEASE_URL} | awk -F '(xpi">|</a>)' '/href.*xpi/ {print $2}' | tr '\n' ' ')
 70 | EXTENSION_DIR=/usr/lib/firefox-addons/distribution/extensions/
 71 | mkdir -p ${EXTENSION_DIR}
 72 | for LANG in ${LANGS}; do
 73 |   LANGCODE=$(echo ${LANG} | sed 's/\.xpi//g')
 74 |   echo "Downloading ${LANG} Language pack"
 75 |   curl -o \
 76 |     ${EXTENSION_DIR}langpack-${LANGCODE}@firefox.mozilla.org.xpi -Ls \
 77 |     ${RELEASE_URL}${LANG}
 78 | done
 79 | 
 80 | # Cleanup and install flash if supported
 81 | if [[ "${DISTRO}" == @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|fedora39|fedora40) ]]; then
 82 |   if [ -z ${SKIP_CLEAN+x} ]; then
 83 |     dnf clean all
 84 |   fi
 85 | elif [ "${DISTRO}" == "opensuse" ]; then
 86 |   if [ -z ${SKIP_CLEAN+x} ]; then
 87 |     zypper clean --all
 88 |   fi
 89 | else
 90 |   if [ "$ARCH" == "arm64" ] && [ "$(lsb_release -cs)" == "focal" ] ; then
 91 |     echo "Firefox flash player not supported on arm64 Ubuntu Focal Skipping"
 92 |   elif grep -q "ID=debian" /etc/os-release || grep -q "ID=kali" /etc/os-release || grep -q "ID=parrot" /etc/os-release; then
 93 |     echo "Firefox flash player not supported on Debian"
 94 |   elif grep -q Focal /etc/os-release; then
 95 |     # Plugin to support running flash videos for sites like vimeo 
 96 |     apt-get update
 97 |     apt-get install -y browser-plugin-freshplayer-pepperflash
 98 |     apt-mark hold firefox
 99 |     if [ -z ${SKIP_CLEAN+x} ]; then
100 |       apt-get autoclean
101 |       rm -rf \
102 |         /var/lib/apt/lists/* \
103 |         /var/tmp/*
104 |     fi
105 |   fi
106 | fi
107 | 
108 | if [[ "${DISTRO}" != @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|opensuse|fedora39|fedora40) ]]; then
109 |   # Update firefox to utilize the system certificate store instead of the one that ships with firefox
110 |   if grep -q "ID=debian" /etc/os-release || grep -q "ID=kali" /etc/os-release || grep -q "ID=parrot" /etc/os-release && [ "${ARCH}" == "arm64" ]; then
111 |     rm -f /usr/lib/firefox-esr/libnssckbi.so
112 |     ln /usr/lib/$(arch)-linux-gnu/pkcs11/p11-kit-trust.so /usr/lib/firefox-esr/libnssckbi.so
113 |   elif grep -q "ID=kali" /etc/os-release  && [ "${ARCH}" == "amd64" ]; then
114 |     rm -f /usr/lib/firefox-esr/libnssckbi.so
115 |     ln /usr/lib/$(arch)-linux-gnu/pkcs11/p11-kit-trust.so /usr/lib/firefox-esr/libnssckbi.so
116 |   else
117 |     rm -f /usr/lib/firefox/libnssckbi.so
118 |     ln /usr/lib/$(arch)-linux-gnu/pkcs11/p11-kit-trust.so /usr/lib/firefox/libnssckbi.so
119 |   fi
120 | fi
121 | 
122 | if [[ "${DISTRO}" == @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|fedora39|fedora40) ]]; then
123 |   if [[ "${DISTRO}" == @(fedora39|fedora40) ]]; then
124 |     preferences_file=/usr/lib64/firefox/browser/defaults/preferences/firefox-redhat-default-prefs.js
125 |   else
126 |     preferences_file=/usr/lib64/firefox/browser/defaults/preferences/all-redhat.js
127 |   fi
128 |   sed -i -e '/homepage/d' "$preferences_file"
129 | elif [ "${DISTRO}" == "opensuse" ]; then
130 |   preferences_file=/usr/lib64/firefox/browser/defaults/preferences/firefox.js
131 | elif grep -q "ID=kali" /etc/os-release; then
132 |   preferences_file=/usr/lib/firefox-esr/defaults/pref/firefox.js
133 | elif grep -q "ID=debian" /etc/os-release || grep -q "ID=parrot" /etc/os-release; then
134 |   if [ "${ARCH}" == "amd64" ]; then
135 |     preferences_file=/usr/lib/firefox/defaults/pref/firefox.js
136 |   else
137 |     preferences_file=/usr/lib/firefox-esr/defaults/pref/firefox.js
138 |   fi
139 | else
140 |   preferences_file=/usr/lib/firefox/browser/defaults/preferences/firefox.js
141 | fi
142 | 
143 | # Disabling default first run URL for Debian based images
144 | if [[ "${DISTRO}" != @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|opensuse|fedora39|fedora40) ]]; then
145 | cat >"$preferences_file" <<EOF
146 | pref("datareporting.policy.firstRunURL", "");
147 | pref("datareporting.policy.dataSubmissionEnabled", false);
148 | pref("datareporting.healthreport.service.enabled", false);
149 | pref("datareporting.healthreport.uploadEnabled", false);
150 | pref("trailhead.firstrun.branches", "nofirstrun-empty");
151 | pref("browser.aboutwelcome.enabled", false);
152 | EOF
153 | fi
154 | 
155 | if [[ "${DISTRO}" == @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|opensuse|fedora39|fedora40) ]]; then
156 |   # Creating a default profile
157 |   chown -R root:root $HOME
158 |   firefox -headless -CreateProfile "kasm $HOME/.mozilla/firefox/kasm"
159 |   # Generate a certdb to be detected on squid start
160 |   HOME=/root firefox --headless &
161 |   mkdir -p /root/.mozilla
162 |   CERTDB=$(find  /root/.mozilla* -name "cert9.db")
163 |   while [ -z "${CERTDB}" ] ; do
164 |     sleep 1
165 |     echo "waiting for certdb"
166 |     CERTDB=$(find  /root/.mozilla* -name "cert9.db")
167 |   done
168 |   sleep 2
169 |   kill $(pgrep firefox)
170 |   CERTDIR=$(dirname ${CERTDB})
171 |   mv ${CERTDB} $HOME/.mozilla/firefox/kasm/
172 |   rm -Rf /root/.mozilla
173 | else
174 |   # Creating Default Profile
175 |   chown -R 0:0 $HOME
176 |   firefox -headless -CreateProfile "kasm $HOME/.mozilla/firefox/kasm"
177 | fi
178 | 
179 | # Silence Firefox security nag "Some of Firefox's features may offer less protection on your current operating system".
180 | echo 'user_pref("security.sandbox.warn_unprivileged_namespaces", false);' > $HOME/.mozilla/firefox/kasm/user.js
181 | chown 1000:1000 $HOME/.mozilla/firefox/kasm/user.js
182 | 
183 | if [[ "${DISTRO}" == @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|opensuse|fedora39|fedora40) ]]; then
184 |   set_desktop_icon
185 | fi
186 | 
187 | # Starting with version 67, Firefox creates a unique profile mapping per installation which is hash generated
188 | #   based off the installation path. Because that path will be static for our deployments we can assume the hash
189 | #   and thus assign our profile to the default for the installation
190 | if grep -q "ID=kali" /etc/os-release; then
191 | cat >>$HOME/.mozilla/firefox/profiles.ini <<EOL
192 | [Install3B6073811A6ABF12]
193 | Default=kasm
194 | Locked=1
195 | EOL
196 | elif grep -q "ID=debian" /etc/os-release || grep -q "ID=parrot" /etc/os-release; then
197 |   if [ "${ARCH}" != "amd64" ]; then
198 |     cat >>$HOME/.mozilla/firefox/profiles.ini <<EOL
199 | [Install3B6073811A6ABF12]
200 | Default=kasm
201 | Locked=1
202 | EOL
203 |   else
204 |     cat >>$HOME/.mozilla/firefox/profiles.ini <<EOL
205 |   [Install4F96D1932A9F858E]
206 |   Default=kasm
207 |   Locked=1
208 | EOL
209 |   fi
210 | elif [[ "${DISTRO}" != @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|opensuse|fedora39|fedora40) ]]; then
211 | cat >>$HOME/.mozilla/firefox/profiles.ini <<EOL
212 | [Install4F96D1932A9F858E]
213 | Default=kasm
214 | Locked=1
215 | EOL
216 | elif [[ "${DISTRO}" == @(oracle8|rockylinux9|rockylinux8|oracle9|rhel9|almalinux9|almalinux8|opensuse|fedora39|fedora40) ]]; then
217 | cat >>$HOME/.mozilla/firefox/profiles.ini <<EOL
218 | [Install11457493C5A56847]
219 | Default=kasm
220 | Locked=1
221 | EOL
222 | fi
223 | 
224 | # Desktop Icon FIxes
225 | if [[ "${DISTRO}" == @(rockylinux9|oracle9|rhel9|almalinux9|fedora39|fedora40) ]]; then
226 |   sed -i 's#Icon=/usr/lib/firefox#Icon=/usr/lib64/firefox#g' $HOME/Desktop/firefox.desktop
227 | fi
228 | 
229 | # Cleanup for app layer
230 | chown -R 1000:0 $HOME
231 | find /usr/share/ -name "icon-theme.cache" -exec rm -f {} \;
232 | if [ -f $HOME/Desktop/firefox.desktop ]; then
233 |   chmod +x $HOME/Desktop/firefox.desktop
234 | fi
235 | chown -R 1000:1000 $HOME/.mozilla
236 | 
237 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/quickstart-devs.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Quickstart
  3 | description: Get started with Cua in three steps
  4 | icon: Rocket
  5 | ---
  6 | 
  7 | import { Step, Steps } from 'fumadocs-ui/components/steps';
  8 | import { Tab, Tabs } from 'fumadocs-ui/components/tabs';
  9 | 
 10 | This quickstart guides you through setting up your [computer environment](#set-up-your-computer-environment), programmatic control with a [Cua computer](#using-computer), and task automation with a [Cua agent](#using-agent):
 11 | 
 12 | <Steps>
 13 | 
 14 | <Step>
 15 | 
 16 | ## Set Up Your Computer Environment
 17 | 
 18 | Choose how you want to run your Cua computer. This will be the environment where your automated tasks will execute.
 19 | 
 20 | You can run your Cua computer in the cloud (recommended for easiest setup), locally on macOS with Lume, locally on Windows with a Windows Sandbox, or in a Docker container on any platform. Choose the option that matches your system and needs.
 21 | 
 22 | <Tabs items={['☁️ Cloud', '🐳 Docker', '🍎 Lume', '🪟 Windows Sandbox']}>
 23 |   <Tab value="☁️ Cloud">
 24 | 
 25 |     Cua Cloud Sandbox provides virtual machines that run Ubuntu.
 26 | 
 27 |     1. Go to [trycua.com/signin](https://www.trycua.com/signin)
 28 |     2. Navigate to **Dashboard > Containers > Create Instance**
 29 |     3. Create a **Medium, Ubuntu 22** sandbox
 30 |     4. Note your sandbox name and API key
 31 | 
 32 |     Your Cloud Sandbox will be automatically configured and ready to use.
 33 | 
 34 |   </Tab>
 35 |   <Tab value="🍎 Lume">
 36 | 
 37 |     Lume containers are macOS virtual machines that run on a macOS host machine.
 38 |     
 39 |     1. Install the Lume CLI:
 40 | 
 41 |     ```bash
 42 |     /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
 43 |     ```
 44 | 
 45 |     2. Start a local Cua sandbox:
 46 | 
 47 |     ```bash
 48 |     lume run macos-sequoia-cua:latest
 49 |     ```
 50 | 
 51 |   </Tab>
 52 |   <Tab value="🪟 Windows Sandbox">
 53 | 
 54 |   Windows Sandbox provides Windows virtual environments that run on a Windows host machine.
 55 |     
 56 |     1. Enable [Windows Sandbox](https://learn.microsoft.com/en-us/windows/security/application-security/application-isolation/windows-sandbox/windows-sandbox-install) (requires Windows 10 Pro/Enterprise or Windows 11)
 57 |     2. Install the `pywinsandbox` dependency:
 58 | 
 59 |     ```bash
 60 |     pip install -U git+git://github.com/karkason/pywinsandbox.git
 61 |     ```
 62 | 
 63 |     3. Windows Sandbox will be automatically configured when you run the CLI
 64 | 
 65 |   </Tab>
 66 |   <Tab value="🐳 Docker">
 67 | 
 68 |   Docker provides a way to run Ubuntu containers on any host machine.
 69 |     
 70 |     1. Install Docker Desktop or Docker Engine:
 71 | 
 72 |     2. Pull the CUA Ubuntu sandbox:
 73 | 
 74 |     ```bash
 75 |     docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest
 76 |     ```
 77 | 
 78 |   </Tab>
 79 | </Tabs>
 80 | 
 81 | </Step>
 82 | 
 83 | <Step>
 84 | 
 85 | ## Using Computer
 86 | 
 87 | Connect to your Cua computer and perform basic interactions, such as taking screenshots or simulating user input.
 88 | 
 89 | <Tabs items={['Python', 'TypeScript']}>
 90 |   <Tab value="Python">
 91 |     Install the Cua computer Python SDK:
 92 |     ```bash
 93 |     pip install cua-computer
 94 |     ```
 95 | 
 96 |     Then, connect to your desired computer environment:
 97 | 
 98 |     <Tabs items={['☁️ Cloud', '🐳 Docker', '🍎 Lume', '🪟 Windows Sandbox', '🖥️ Host Desktop']}>
 99 |       <Tab value="☁️ Cloud">
100 |         ```python
101 |         from computer import Computer
102 | 
103 |         computer = Computer(
104 |             os_type="linux",
105 |             provider_type="cloud",
106 |             name="your-sandbox-name",
107 |             api_key="your-api-key"
108 |         )
109 |         await computer.run() # Connect to the sandbox
110 |         ```
111 |       </Tab>
112 |       <Tab value="🍎 Lume">
113 |         ```python
114 |         from computer import Computer
115 | 
116 |         computer = Computer(
117 |             os_type="macos",
118 |             provider_type="lume",
119 |             name="macos-sequoia-cua:latest"
120 |         )
121 |         await computer.run() # Launch & connect to the container
122 |         ```
123 |       </Tab>
124 |       <Tab value="🪟 Windows Sandbox">
125 |         ```python
126 |         from computer import Computer
127 | 
128 |         computer = Computer(
129 |             os_type="windows",
130 |             provider_type="windows_sandbox"
131 |         )
132 |         await computer.run() # Launch & connect to the container
133 |         ```
134 |       </Tab>
135 |       <Tab value="🐳 Docker">
136 |         ```python
137 |         from computer import Computer
138 | 
139 |         computer = Computer(
140 |             os_type="linux",
141 |             provider_type="docker",
142 |             name="trycua/cua-ubuntu:latest"
143 |         )
144 |         await computer.run() # Launch & connect to the container
145 |         ```
146 |       </Tab>
147 |       <Tab value="🖥️ Host Desktop">
148 |         Install and run `cua-computer-server`:
149 |         ```bash
150 |         pip install cua-computer-server
151 |         python -m computer_server
152 |         ```
153 | 
154 |         Then, use the `Computer` object to connect:
155 |         ```python
156 |         from computer import Computer
157 | 
158 |         computer = Computer(use_host_computer_server=True)
159 |         await computer.run() # Connect to the host desktop
160 |         ```
161 |       </Tab>
162 |     </Tabs>
163 | 
164 |     Once connected, you can perform interactions:
165 |     ```python
166 |     try:
167 |         # Take a screenshot of the computer's current display
168 |         screenshot = await computer.interface.screenshot()
169 |         # Simulate a left-click at coordinates (100, 100)
170 |         await computer.interface.left_click(100, 100)
171 |         # Type "Hello!" into the active application
172 |         await computer.interface.type("Hello!")
173 |     finally:
174 |         await computer.close()
175 |     ```
176 |   </Tab>
177 |   <Tab value="TypeScript">
178 |     Install the Cua computer TypeScript SDK:
179 |     ```bash
180 |     npm install @trycua/computer
181 |     ```
182 | 
183 |     Then, connect to your desired computer environment:
184 | 
185 |     <Tabs items={['☁️ Cloud','🐳 Docker', '🍎 Lume', '🪟 Windows Sandbox', '🖥️ Host Desktop']}>
186 |       <Tab value="☁️ Cloud">
187 |         ```typescript
188 |         import { Computer, OSType } from '@trycua/computer';
189 | 
190 |         const computer = new Computer({
191 |           osType: OSType.LINUX,
192 |           name: "your-sandbox-name",
193 |           apiKey: "your-api-key"
194 |         });
195 |         await computer.run(); // Connect to the sandbox
196 |         ```
197 |       </Tab>
198 |       <Tab value="🍎 Lume">
199 |         ```typescript
200 |         import { Computer, OSType, ProviderType } from '@trycua/computer';
201 | 
202 |         const computer = new Computer({
203 |           osType: OSType.MACOS,
204 |           providerType: ProviderType.LUME,
205 |           name: "macos-sequoia-cua:latest"
206 |         });
207 |         await computer.run(); // Launch & connect to the container
208 |         ```
209 |       </Tab>
210 |       <Tab value="🪟 Windows Sandbox">
211 |         ```typescript
212 |         import { Computer, OSType, ProviderType } from '@trycua/computer';
213 | 
214 |         const computer = new Computer({
215 |           osType: OSType.WINDOWS,
216 |           providerType: ProviderType.WINDOWS_SANDBOX
217 |         });
218 |         await computer.run(); // Launch & connect to the container
219 |         ```
220 |       </Tab>
221 |       <Tab value="🐳 Docker">
222 |         ```typescript
223 |         import { Computer, OSType, ProviderType } from '@trycua/computer';
224 | 
225 |         const computer = new Computer({
226 |           osType: OSType.LINUX,
227 |           providerType: ProviderType.DOCKER,
228 |           name: "trycua/cua-ubuntu:latest"
229 |         });
230 |         await computer.run(); // Launch & connect to the container
231 |         ```
232 |       </Tab>
233 |       <Tab value="🖥️ Host Desktop">
234 |         First, install and run `cua-computer-server`:
235 |         ```bash
236 |         pip install cua-computer-server
237 |         python -m computer_server
238 |         ```
239 | 
240 |         Then, use the `Computer` object to connect:
241 |         ```typescript
242 |         import { Computer } from '@trycua/computer';
243 | 
244 |         const computer = new Computer({ useHostComputerServer: true });
245 |         await computer.run(); // Connect to the host desktop
246 |         ```
247 |       </Tab>
248 |     </Tabs>
249 | 
250 |     Once connected, you can perform interactions:
251 |     ```typescript
252 |     try {
253 |       // Take a screenshot of the computer's current display
254 |       const screenshot = await computer.interface.screenshot();
255 |       // Simulate a left-click at coordinates (100, 100)
256 |       await computer.interface.leftClick(100, 100);
257 |       // Type "Hello!" into the active application
258 |       await computer.interface.typeText("Hello!");
259 |     } finally {
260 |       await computer.close();
261 |     }
262 |     ```
263 |   </Tab>
264 | </Tabs>
265 | 
266 | Learn more about computers in the [Cua computers documentation](/computer-sdk/computers). You will see how to automate computers with agents in the next step.
267 | 
268 | </Step>
269 | 
270 | <Step>
271 | 
272 | ## Using Agent
273 | 
274 | Utilize an Agent to automate complex tasks by providing it with a goal and allowing it to interact with the computer environment.
275 | 
276 | Install the Cua agent Python SDK:
277 | ```bash
278 | pip install "cua-agent[all]"
279 | ```
280 | 
281 | Then, use the `ComputerAgent` object:
282 | ```python
283 | from agent import ComputerAgent
284 | 
285 | agent = ComputerAgent(
286 |     model="anthropic/claude-3-5-sonnet-20241022",
287 |     tools=[computer],
288 |     max_trajectory_budget=5.0
289 | )
290 | 
291 | messages = [{"role": "user", "content": "Take a screenshot and tell me what you see"}]
292 | 
293 | async for result in agent.run(messages):
294 |     for item in result["output"]:
295 |         if item["type"] == "message":
296 |             print(item["content"][0]["text"])
297 | ```
298 | 
299 | Learn more about agents in [Agent Loops](/agent-sdk/agent-loops) and available models in [Supported Models](/agent-sdk/supported-model-providers/).
300 | 
301 | </Step>
302 | </Steps>
303 | 
304 | ## Next Steps
305 | 
306 | - Learn more about [Cua computers](/computer-sdk/computers) and [computer commands](/computer-sdk/commands)
307 | - Read about [Agent loops](/agent-sdk/agent-loops), [tools](/agent-sdk/custom-tools), and [supported model providers](/agent-sdk/supported-model-providers/)
308 | - Join our [Discord community](https://discord.com/invite/mVnXXpdE85) for help
309 | 
```

--------------------------------------------------------------------------------
/libs/python/computer-server/test_connection.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python
  2 | """
  3 | Connection test script for Computer Server.
  4 | 
  5 | This script tests both WebSocket (/ws) and REST (/cmd) connections to the Computer Server
  6 | and keeps it alive, allowing you to verify the server is running correctly.
  7 | """
  8 | 
  9 | import asyncio
 10 | import json
 11 | import websockets
 12 | import argparse
 13 | import sys
 14 | import aiohttp
 15 | import os
 16 | 
 17 | import dotenv
 18 | dotenv.load_dotenv()
 19 | 
 20 | async def test_websocket_connection(host="localhost", port=8000, keep_alive=False, container_name=None, api_key=None):
 21 |     """Test WebSocket connection to the Computer Server."""
 22 |     if container_name:
 23 |         # Container mode: use WSS with container domain and port 8443
 24 |         uri = f"wss://{container_name}.containers.cloud.trycua.com:8443/ws"
 25 |         print(f"Connecting to container {container_name} at {uri}...")
 26 |     else:
 27 |         # Local mode: use WS with specified host and port
 28 |         uri = f"ws://{host}:{port}/ws"
 29 |         print(f"Connecting to local server at {uri}...")
 30 | 
 31 |     try:
 32 |         async with websockets.connect(uri) as websocket:
 33 |             print("WebSocket connection established!")
 34 | 
 35 |             # If container connection, send authentication first
 36 |             if container_name:
 37 |                 if not api_key:
 38 |                     print("Error: API key required for container connections")
 39 |                     return False
 40 |                 
 41 |                 print("Sending authentication...")
 42 |                 auth_message = {
 43 |                     "command": "authenticate",
 44 |                     "params": {
 45 |                         "api_key": api_key,
 46 |                         "container_name": container_name
 47 |                     }
 48 |                 }
 49 |                 await websocket.send(json.dumps(auth_message))
 50 |                 auth_response = await websocket.recv()
 51 |                 print(f"Authentication response: {auth_response}")
 52 |                 
 53 |                 # Check if authentication was successful
 54 |                 auth_data = json.loads(auth_response)
 55 |                 if not auth_data.get("success", False):
 56 |                     print("Authentication failed!")
 57 |                     return False
 58 |                 print("Authentication successful!")
 59 | 
 60 |             # Send a test command to get version
 61 |             await websocket.send(json.dumps({"command": "version", "params": {}}))
 62 |             response = await websocket.recv()
 63 |             print(f"Version response: {response}")
 64 | 
 65 |             # Send a test command to get screen size
 66 |             await websocket.send(json.dumps({"command": "get_screen_size", "params": {}}))
 67 |             response = await websocket.recv()
 68 |             print(f"Screen size response: {response}")
 69 | 
 70 |             if keep_alive:
 71 |                 print("\nKeeping WebSocket connection alive. Press Ctrl+C to exit...")
 72 |                 while True:
 73 |                     # Send a command every 5 seconds to keep the connection alive
 74 |                     await asyncio.sleep(5)
 75 |                     await websocket.send(
 76 |                         json.dumps({"command": "get_cursor_position", "params": {}})
 77 |                     )
 78 |                     response = await websocket.recv()
 79 |                     print(f"Cursor position: {response}")
 80 |     except websockets.exceptions.ConnectionClosed as e:
 81 |         print(f"WebSocket connection closed: {e}")
 82 |         return False
 83 |     except ConnectionRefusedError:
 84 |         print(f"Connection refused. Is the server running at {host}:{port}?")
 85 |         return False
 86 |     except Exception as e:
 87 |         print(f"WebSocket error: {e}")
 88 |         return False
 89 | 
 90 |     return True
 91 | 
 92 | 
 93 | async def test_rest_connection(host="localhost", port=8000, keep_alive=False, container_name=None, api_key=None):
 94 |     """Test REST connection to the Computer Server."""
 95 |     if container_name:
 96 |         # Container mode: use HTTPS with container domain and port 8443
 97 |         base_url = f"https://{container_name}.containers.cloud.trycua.com:8443"
 98 |         print(f"Connecting to container {container_name} at {base_url}...")
 99 |     else:
100 |         # Local mode: use HTTP with specified host and port
101 |         base_url = f"http://{host}:{port}"
102 |         print(f"Connecting to local server at {base_url}...")
103 | 
104 |     try:
105 |         async with aiohttp.ClientSession() as session:
106 |             print("REST connection established!")
107 | 
108 |             # Prepare headers for container authentication
109 |             headers = {}
110 |             if container_name:
111 |                 if not api_key:
112 |                     print("Error: API key required for container connections")
113 |                     return False
114 |                 headers["X-Container-Name"] = container_name
115 |                 headers["X-API-Key"] = api_key
116 |                 print(f"Using container authentication headers")
117 | 
118 |             # Test screenshot endpoint
119 |             async with session.post(
120 |                 f"{base_url}/cmd",
121 |                 json={"command": "screenshot", "params": {}},
122 |                 headers=headers
123 |             ) as response:
124 |                 if response.status == 200:
125 |                     text = await response.text()
126 |                     print(f"Screenshot response: {text}")
127 |                 else:
128 |                     print(f"Screenshot request failed with status: {response.status}")
129 |                     print(await response.text())
130 |                     return False
131 | 
132 |             # Test screen size endpoint
133 |             async with session.post(
134 |                 f"{base_url}/cmd",
135 |                 json={"command": "get_screen_size", "params": {}},
136 |                 headers=headers
137 |             ) as response:
138 |                 if response.status == 200:
139 |                     text = await response.text()
140 |                     print(f"Screen size response: {text}")
141 |                 else:
142 |                     print(f"Screen size request failed with status: {response.status}")
143 |                     print(await response.text())
144 |                     return False
145 | 
146 |             if keep_alive:
147 |                 print("\nKeeping REST connection alive. Press Ctrl+C to exit...")
148 |                 while True:
149 |                     # Send a command every 5 seconds to keep testing
150 |                     await asyncio.sleep(5)
151 |                     async with session.post(
152 |                         f"{base_url}/cmd",
153 |                         json={"command": "get_cursor_position", "params": {}},
154 |                         headers=headers
155 |                     ) as response:
156 |                         if response.status == 200:
157 |                             text = await response.text()
158 |                             print(f"Cursor position: {text}")
159 |                         else:
160 |                             print(f"Cursor position request failed with status: {response.status}")
161 |                             print(await response.text())
162 |                             return False
163 | 
164 |     except aiohttp.ClientError as e:
165 |         print(f"REST connection error: {e}")
166 |         return False
167 |     except Exception as e:
168 |         print(f"REST error: {e}")
169 |         return False
170 | 
171 |     return True
172 | 
173 | 
174 | async def test_connection(host="localhost", port=8000, keep_alive=False, container_name=None, use_rest=False, api_key=None):
175 |     """Test connection to the Computer Server using WebSocket or REST."""
176 |     if use_rest:
177 |         return await test_rest_connection(host, port, keep_alive, container_name, api_key)
178 |     else:
179 |         return await test_websocket_connection(host, port, keep_alive, container_name, api_key)
180 | 
181 | 
182 | def parse_args():
183 |     parser = argparse.ArgumentParser(description="Test connection to Computer Server")
184 |     parser.add_argument("--host", default="localhost", help="Host address (default: localhost)")
185 |     parser.add_argument("-p", "--port", type=int, default=8000, help="Port number (default: 8000)")
186 |     parser.add_argument("-c", "--container-name", help="Container name for cloud connection (uses WSS/HTTPS and port 8443)")
187 |     parser.add_argument("--api-key", help="API key for container authentication (can also use CUA_API_KEY env var)")
188 |     parser.add_argument("--keep-alive", action="store_true", help="Keep connection alive")
189 |     parser.add_argument("--rest", action="store_true", help="Use REST endpoint (/cmd) instead of WebSocket (/ws)")
190 |     return parser.parse_args()
191 | 
192 | 
193 | async def main():
194 |     args = parse_args()
195 |     
196 |     # Convert hyphenated argument to underscore for function parameter
197 |     container_name = getattr(args, 'container_name', None)
198 |     
199 |     # Get API key from argument or environment variable
200 |     api_key = getattr(args, 'api_key', None) or os.environ.get('CUA_API_KEY')
201 |     
202 |     # Check if container name is provided but API key is missing
203 |     if container_name and not api_key:
204 |         print("Warning: Container name provided but no API key found.")
205 |         print("Please provide --api-key argument or set CUA_API_KEY environment variable.")
206 |         return 1
207 |     
208 |     print(f"Testing {'REST' if args.rest else 'WebSocket'} connection...")
209 |     if container_name:
210 |         print(f"Container: {container_name}")
211 |         print(f"API Key: {'***' + api_key[-4:] if api_key and len(api_key) > 4 else 'Not provided'}")
212 |     
213 |     success = await test_connection(
214 |         host=args.host, 
215 |         port=args.port, 
216 |         keep_alive=args.keep_alive,
217 |         container_name=container_name,
218 |         use_rest=args.rest,
219 |         api_key=api_key
220 |     )
221 |     return 0 if success else 1
222 | 
223 | 
224 | if __name__ == "__main__":
225 |     try:
226 |         sys.exit(asyncio.run(main()))
227 |     except KeyboardInterrupt:
228 |         print("\nExiting...")
229 |         sys.exit(0)
230 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/proxy/handlers.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Request handlers for the proxy endpoints.
  3 | """
  4 | 
  5 | import asyncio
  6 | import json
  7 | import logging
  8 | import os
  9 | from contextlib import contextmanager
 10 | from typing import Dict, Any, List, Union, Optional
 11 | 
 12 | from ..agent import ComputerAgent
 13 | from computer import Computer
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | class ResponsesHandler:
 19 |     """Handler for /responses endpoint that processes agent requests."""
 20 |     
 21 |     def __init__(self):
 22 |         self.computer = None
 23 |         self.agent = None
 24 |         # Simple in-memory caches
 25 |         self._computer_cache: Dict[str, Any] = {}
 26 |         self._agent_cache: Dict[str, Any] = {}
 27 |     
 28 |     async def setup_computer_agent(
 29 |         self,
 30 |         model: str,
 31 |         agent_kwargs: Optional[Dict[str, Any]] = None,
 32 |         computer_kwargs: Optional[Dict[str, Any]] = None,
 33 |     ):
 34 |         """Set up (and cache) computer and agent instances.
 35 | 
 36 |         Caching keys:
 37 |         - Computer cache key: computer_kwargs
 38 |         - Agent cache key: {"model": model, **agent_kwargs}
 39 |         """
 40 |         agent_kwargs = agent_kwargs or {}
 41 |         computer_kwargs = computer_kwargs or {}
 42 | 
 43 |         def _stable_key(obj: Dict[str, Any]) -> str:
 44 |             try:
 45 |                 return json.dumps(obj, sort_keys=True, separators=(",", ":"))
 46 |             except Exception:
 47 |                 # Fallback: stringify non-serializable values
 48 |                 safe_obj = {}
 49 |                 for k, v in obj.items():
 50 |                     try:
 51 |                         json.dumps(v)
 52 |                         safe_obj[k] = v
 53 |                     except Exception:
 54 |                         safe_obj[k] = str(v)
 55 |                 return json.dumps(safe_obj, sort_keys=True, separators=(",", ":"))
 56 | 
 57 |         # Determine if custom tools are supplied; if so, skip computer setup entirely
 58 |         has_custom_tools = bool(agent_kwargs.get("tools"))
 59 | 
 60 |         computer = None
 61 |         if not has_custom_tools:
 62 |             # ---------- Computer setup (with cache) ----------
 63 |             comp_key = _stable_key(computer_kwargs)
 64 | 
 65 |             computer = self._computer_cache.get(comp_key)
 66 |             if computer is None:
 67 |                 # Default computer configuration
 68 |                 default_c_config = {
 69 |                     "os_type": "linux",
 70 |                     "provider_type": "cloud",
 71 |                     "name": os.getenv("CUA_CONTAINER_NAME"),
 72 |                     "api_key": os.getenv("CUA_API_KEY"),
 73 |                 }
 74 |                 default_c_config.update(computer_kwargs)
 75 |                 computer = Computer(**default_c_config)
 76 |                 await computer.__aenter__()
 77 |                 self._computer_cache[comp_key] = computer
 78 |                 logger.info(f"Computer created and cached with key={comp_key} config={default_c_config}")
 79 |             else:
 80 |                 logger.info(f"Reusing cached computer for key={comp_key}")
 81 | 
 82 |         # Bind current computer reference (None if custom tools supplied)
 83 |         self.computer = computer
 84 | 
 85 |         # ---------- Agent setup (with cache) ----------
 86 |         # Build agent cache key from {model} + agent_kwargs (excluding tools unless explicitly passed)
 87 |         agent_kwargs_for_key = dict(agent_kwargs)
 88 |         agent_key_payload = {"model": model, **agent_kwargs_for_key}
 89 |         agent_key = _stable_key(agent_key_payload)
 90 | 
 91 |         agent = self._agent_cache.get(agent_key)
 92 |         if agent is None:
 93 |             # Default agent configuration
 94 |             default_a_config: Dict[str, Any] = {"model": model}
 95 |             if not has_custom_tools:
 96 |                 default_a_config["tools"] = [computer]
 97 |             # Apply user overrides, but keep tools unless user explicitly sets
 98 |             if agent_kwargs:
 99 |                 if not has_custom_tools:
100 |                     agent_kwargs.setdefault("tools", [computer])
101 |                 default_a_config.update(agent_kwargs)
102 |             # JSON-derived kwargs may have loose types; ignore static arg typing here
103 |             agent = ComputerAgent(**default_a_config)  # type: ignore[arg-type]
104 |             self._agent_cache[agent_key] = agent
105 |             logger.info(f"Agent created and cached with key={agent_key} model={model}")
106 |         else:
107 |             # Ensure cached agent uses the current computer tool (in case object differs)
108 |             # Only update if tools not explicitly provided in agent_kwargs
109 |             if not has_custom_tools:
110 |                 try:
111 |                     agent.tools = [computer]
112 |                 except Exception:
113 |                     pass
114 |             logger.info(f"Reusing cached agent for key={agent_key}")
115 | 
116 |         # Bind current agent reference
117 |         self.agent = agent
118 |     
119 |     async def process_request(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
120 |         """
121 |         Process a /responses request and return the result.
122 |         
123 |         Args:
124 |             request_data: Dictionary containing model, input, and optional kwargs
125 |             
126 |         Returns:
127 |             Dictionary with the agent's response
128 |         """
129 |         try:
130 |             # Extract request parameters
131 |             model = request_data.get("model")
132 |             input_data = request_data.get("input")
133 |             agent_kwargs = request_data.get("agent_kwargs", {})
134 |             computer_kwargs = request_data.get("computer_kwargs", {})
135 |             env_overrides = request_data.get("env", {}) or {}
136 |             
137 |             if not model:
138 |                 raise ValueError("Model is required")
139 |             if not input_data:
140 |                 raise ValueError("Input is required")
141 |             
142 |             # Apply env overrides for the duration of this request
143 |             with self._env_overrides(env_overrides):
144 |                 # Set up (and possibly reuse) computer and agent via caches
145 |                 await self.setup_computer_agent(model, agent_kwargs, computer_kwargs)
146 | 
147 |                 # Defensive: ensure agent is initialized for type checkers
148 |                 agent = self.agent
149 |                 if agent is None:
150 |                     raise RuntimeError("Agent failed to initialize")
151 | 
152 |                 # Convert input to messages format
153 |                 messages = self._convert_input_to_messages(input_data)
154 | 
155 |                 # Run agent and get first result
156 |                 async for result in agent.run(messages):
157 |                     # Return the first result and break
158 |                     return {
159 |                         "success": True,
160 |                         "result": result,
161 |                         "model": model
162 |                     }
163 |                 
164 |             # If no results were yielded
165 |             return {
166 |                 "success": False,
167 |                 "error": "No results from agent",
168 |                 "model": model
169 |             }
170 |             
171 |         except Exception as e:
172 |             logger.error(f"Error processing request: {e}")
173 |             return {
174 |                 "success": False,
175 |                 "error": str(e),
176 |                 "model": request_data.get("model", "unknown")
177 |             }
178 |     
179 |     def _convert_input_to_messages(self, input_data: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
180 |         """Convert input data to messages format."""
181 |         if isinstance(input_data, str):
182 |             # Simple string input
183 |             return [{"role": "user", "content": input_data}]
184 |         elif isinstance(input_data, list):
185 |             # Already in messages format
186 |             messages = []
187 |             for msg in input_data:
188 |                 # Convert content array format if needed
189 |                 if isinstance(msg.get("content"), list):
190 |                     content_parts = []
191 |                     for part in msg["content"]:
192 |                         if part.get("type") == "input_text":
193 |                             content_parts.append({"type": "text", "text": part["text"]})
194 |                         elif part.get("type") == "input_image":
195 |                             content_parts.append({
196 |                                 "type": "image_url",
197 |                                 "image_url": {"url": part["image_url"]}
198 |                             })
199 |                         else:
200 |                             content_parts.append(part)
201 |                     messages.append({
202 |                         "role": msg["role"],
203 |                         "content": content_parts
204 |                     })
205 |                 else:
206 |                     messages.append(msg)
207 |             return messages
208 |         else:
209 |             raise ValueError("Input must be string or list of messages")
210 |     
211 |     async def cleanup(self):
212 |         """Clean up resources."""
213 |         if self.computer:
214 |             try:
215 |                 await self.computer.__aexit__(None, None, None)
216 |             except Exception as e:
217 |                 logger.error(f"Error cleaning up computer: {e}")
218 |             finally:
219 |                 self.computer = None
220 |         self.agent = None
221 | 
222 |     @staticmethod
223 |     @contextmanager
224 |     def _env_overrides(env: Dict[str, str]):
225 |         """Temporarily apply environment variable overrides for the current process.
226 |         Restores previous values after the context exits.
227 | 
228 |         Args:
229 |             env: Mapping of env var names to override for this request.
230 |         """
231 |         if not env:
232 |             # No-op context
233 |             yield
234 |             return
235 | 
236 |         original: Dict[str, Optional[str]] = {}
237 |         try:
238 |             for k, v in env.items():
239 |                 original[k] = os.environ.get(k)
240 |                 os.environ[k] = str(v)
241 |             yield
242 |         finally:
243 |             for k, old in original.items():
244 |                 if old is None:
245 |                     # Was not set before
246 |                     os.environ.pop(k, None)
247 |                 else:
248 |                     os.environ[k] = old
249 | 
```

--------------------------------------------------------------------------------
/.github/workflows/publish-lume.yml:
--------------------------------------------------------------------------------

```yaml
  1 | name: Publish Notarized Lume
  2 | 
  3 | on:
  4 |   push:
  5 |     tags:
  6 |       - "lume-v*"
  7 |   workflow_dispatch:
  8 |     inputs:
  9 |       version:
 10 |         description: "Version to notarize (without v prefix)"
 11 |         required: true
 12 |         default: "0.1.0"
 13 |   workflow_call:
 14 |     inputs:
 15 |       version:
 16 |         description: "Version to notarize"
 17 |         required: true
 18 |         type: string
 19 |     secrets:
 20 |       APPLICATION_CERT_BASE64:
 21 |         required: true
 22 |       INSTALLER_CERT_BASE64:
 23 |         required: true
 24 |       CERT_PASSWORD:
 25 |         required: true
 26 |       APPLE_ID:
 27 |         required: true
 28 |       TEAM_ID:
 29 |         required: true
 30 |       APP_SPECIFIC_PASSWORD:
 31 |         required: true
 32 |       DEVELOPER_NAME:
 33 |         required: true
 34 | 
 35 | permissions:
 36 |   contents: write
 37 | 
 38 | env:
 39 |   APPLICATION_CERT_BASE64: ${{ secrets.APPLICATION_CERT_BASE64 }}
 40 |   INSTALLER_CERT_BASE64: ${{ secrets.INSTALLER_CERT_BASE64 }}
 41 |   CERT_PASSWORD: ${{ secrets.CERT_PASSWORD }}
 42 |   APPLE_ID: ${{ secrets.APPLE_ID }}
 43 |   TEAM_ID: ${{ secrets.TEAM_ID }}
 44 |   APP_SPECIFIC_PASSWORD: ${{ secrets.APP_SPECIFIC_PASSWORD }}
 45 |   DEVELOPER_NAME: ${{ secrets.DEVELOPER_NAME }}
 46 | 
 47 | jobs:
 48 |   notarize:
 49 |     runs-on: macos-15
 50 |     outputs:
 51 |       sha256_checksums: ${{ steps.generate_checksums.outputs.checksums }}
 52 |       version: ${{ steps.set_version.outputs.version }}
 53 |     steps:
 54 |       - uses: actions/checkout@v4
 55 | 
 56 |       - name: Select Xcode 16
 57 |         run: |
 58 |           sudo xcode-select -s /Applications/Xcode_16.app
 59 |           xcodebuild -version
 60 | 
 61 |       - name: Install dependencies
 62 |         run: |
 63 |           brew install cpio
 64 | 
 65 |       - name: Create .release directory
 66 |         run: mkdir -p .release
 67 | 
 68 |       - name: Set version
 69 |         id: set_version
 70 |         run: |
 71 |           # Determine version from tag or input
 72 |           if [[ "$GITHUB_REF" == refs/tags/lume-v* ]]; then
 73 |             VERSION="${GITHUB_REF#refs/tags/lume-v}"
 74 |             echo "Using version from tag: $VERSION"
 75 |           elif [[ -n "${{ inputs.version }}" ]]; then
 76 |             VERSION="${{ inputs.version }}"
 77 |             echo "Using version from input: $VERSION"
 78 |           elif [[ -n "${{ inputs.version }}" ]]; then
 79 |             VERSION="${{ inputs.version }}"
 80 |             echo "Using version from workflow_call input: $VERSION"
 81 |           else
 82 |             echo "Error: No version found in tag or input"
 83 |             exit 1
 84 |           fi
 85 | 
 86 |           # Update version in Main.swift
 87 |           echo "Updating version in Main.swift to $VERSION"
 88 |           sed -i '' "s/static let current: String = \".*\"/static let current: String = \"$VERSION\"/" libs/lume/src/Main.swift
 89 | 
 90 |           # Set output for later steps
 91 |           echo "version=$VERSION" >> $GITHUB_OUTPUT
 92 | 
 93 |       - name: Import Certificates
 94 |         env:
 95 |           APPLICATION_CERT_BASE64: ${{ secrets.APPLICATION_CERT_BASE64 }}
 96 |           INSTALLER_CERT_BASE64: ${{ secrets.INSTALLER_CERT_BASE64 }}
 97 |           CERT_PASSWORD: ${{ secrets.CERT_PASSWORD }}
 98 |           KEYCHAIN_PASSWORD: "temp_password"
 99 |         run: |
100 |           # Create a temporary keychain
101 |           security create-keychain -p "$KEYCHAIN_PASSWORD" build.keychain
102 |           security default-keychain -s build.keychain
103 |           security unlock-keychain -p "$KEYCHAIN_PASSWORD" build.keychain
104 |           security set-keychain-settings -t 3600 -l build.keychain
105 | 
106 |           # Import certificates
107 |           echo $APPLICATION_CERT_BASE64 | base64 --decode > application.p12
108 |           echo $INSTALLER_CERT_BASE64 | base64 --decode > installer.p12
109 | 
110 |           # Import certificates silently (minimize output)
111 |           security import application.p12 -k build.keychain -P "$CERT_PASSWORD" -T /usr/bin/codesign -T /usr/bin/pkgbuild > /dev/null 2>&1
112 |           security import installer.p12 -k build.keychain -P "$CERT_PASSWORD" -T /usr/bin/codesign -T /usr/bin/pkgbuild > /dev/null 2>&1
113 | 
114 |           # Allow codesign to access the certificates (minimal output)
115 |           security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k "$KEYCHAIN_PASSWORD" build.keychain > /dev/null 2>&1
116 | 
117 |           # Verify certificates were imported
118 |           echo "Verifying signing identities..."
119 |           CERT_COUNT=$(security find-identity -v -p codesigning build.keychain | grep -c "Developer ID Application" || echo "0")
120 |           INSTALLER_COUNT=$(security find-identity -v build.keychain | grep -c "Developer ID Installer" || echo "0")
121 | 
122 |           if [ "$CERT_COUNT" -eq 0 ]; then
123 |             echo "Error: No Developer ID Application certificate found"
124 |             security find-identity -v -p codesigning build.keychain
125 |             exit 1
126 |           fi
127 | 
128 |           if [ "$INSTALLER_COUNT" -eq 0 ]; then
129 |             echo "Error: No Developer ID Installer certificate found"  
130 |             security find-identity -v build.keychain
131 |             exit 1
132 |           fi
133 | 
134 |           echo "Found $CERT_COUNT Developer ID Application certificate(s) and $INSTALLER_COUNT Developer ID Installer certificate(s)"
135 |           echo "All required certificates verified successfully"
136 | 
137 |           # Clean up certificate files
138 |           rm application.p12 installer.p12
139 | 
140 |       - name: Build and Notarize
141 |         id: build_notarize
142 |         env:
143 |           APPLE_ID: ${{ secrets.APPLE_ID }}
144 |           TEAM_ID: ${{ secrets.TEAM_ID }}
145 |           APP_SPECIFIC_PASSWORD: ${{ secrets.APP_SPECIFIC_PASSWORD }}
146 |           # These will now reference the imported certificates
147 |           CERT_APPLICATION_NAME: "Developer ID Application: ${{ secrets.DEVELOPER_NAME }} (${{ secrets.TEAM_ID }})"
148 |           CERT_INSTALLER_NAME: "Developer ID Installer: ${{ secrets.DEVELOPER_NAME }} (${{ secrets.TEAM_ID }})"
149 |           VERSION: ${{ steps.set_version.outputs.version }}
150 |         working-directory: ./libs/lume
151 |         run: |
152 |           # Minimal debug information
153 |           echo "Starting build process..."
154 |           echo "Swift version: $(swift --version | head -n 1)"
155 |           echo "Building version: $VERSION"
156 | 
157 |           # Ensure .release directory exists
158 |           mkdir -p .release
159 |           chmod 755 .release
160 | 
161 |           # Build the project first (redirect verbose output)
162 |           echo "Building project..."
163 |           swift build --configuration release > build.log 2>&1
164 |           echo "Build completed."
165 | 
166 |           # Run the notarization script with LOG_LEVEL env var
167 |           chmod +x scripts/build/build-release-notarized.sh
168 |           cd scripts/build
169 |           LOG_LEVEL=minimal ./build-release-notarized.sh
170 | 
171 |           # Return to the lume directory
172 |           cd ../..
173 | 
174 |           # Debug: List what files were actually created
175 |           echo "Files in .release directory:"
176 |           find .release -type f -name "*.tar.gz" -o -name "*.pkg.tar.gz"
177 | 
178 |           # Get architecture for output filename
179 |           ARCH=$(uname -m)
180 |           OS_IDENTIFIER="darwin-${ARCH}"
181 | 
182 |           # Output paths for later use
183 |           echo "tarball_path=.release/lume-${VERSION}-${OS_IDENTIFIER}.tar.gz" >> $GITHUB_OUTPUT
184 |           echo "pkg_path=.release/lume-${VERSION}-${OS_IDENTIFIER}.pkg.tar.gz" >> $GITHUB_OUTPUT
185 | 
186 |       - name: Generate SHA256 Checksums
187 |         id: generate_checksums
188 |         working-directory: ./libs/lume/.release
189 |         run: |
190 |           # Use existing checksums file if it exists, otherwise generate one
191 |           if [ -f "checksums.txt" ]; then
192 |             echo "Using existing checksums file"
193 |             cat checksums.txt
194 |           else
195 |             echo "## SHA256 Checksums" > checksums.txt
196 |             echo '```' >> checksums.txt
197 |             shasum -a 256 lume-*.tar.gz >> checksums.txt
198 |             echo '```' >> checksums.txt
199 |           fi
200 | 
201 |           checksums=$(cat checksums.txt)
202 |           echo "checksums<<EOF" >> $GITHUB_OUTPUT
203 |           echo "$checksums" >> $GITHUB_OUTPUT
204 |           echo "EOF" >> $GITHUB_OUTPUT
205 | 
206 |           # Debug: Show all files in the release directory
207 |           echo "All files in release directory:"
208 |           ls -la
209 | 
210 |       - name: Create Standard Version Releases
211 |         working-directory: ./libs/lume/.release
212 |         run: |
213 |           VERSION=${{ steps.set_version.outputs.version }}
214 |           ARCH=$(uname -m)
215 |           OS_IDENTIFIER="darwin-${ARCH}"
216 | 
217 |           # Create OS-tagged symlinks
218 |           ln -sf "lume-${VERSION}-${OS_IDENTIFIER}.tar.gz" "lume-darwin.tar.gz"
219 |           ln -sf "lume-${VERSION}-${OS_IDENTIFIER}.pkg.tar.gz" "lume-darwin.pkg.tar.gz"
220 | 
221 |           # Create simple symlinks
222 |           ln -sf "lume-${VERSION}-${OS_IDENTIFIER}.tar.gz" "lume.tar.gz"
223 |           ln -sf "lume-${VERSION}-${OS_IDENTIFIER}.pkg.tar.gz" "lume.pkg.tar.gz"
224 | 
225 |           # List all files (including symlinks)
226 |           echo "Files with symlinks in release directory:"
227 |           ls -la
228 | 
229 |       - name: Upload Notarized Package (Tarball)
230 |         uses: actions/upload-artifact@v4
231 |         with:
232 |           name: lume-notarized-tarball
233 |           path: ./libs/lume/${{ steps.build_notarize.outputs.tarball_path }}
234 |           if-no-files-found: error
235 | 
236 |       - name: Upload Notarized Package (Installer)
237 |         uses: actions/upload-artifact@v4
238 |         with:
239 |           name: lume-notarized-installer
240 |           path: ./libs/lume/${{ steps.build_notarize.outputs.pkg_path }}
241 |           if-no-files-found: error
242 | 
243 |       - name: Create Release
244 |         if: startsWith(github.ref, 'refs/tags/lume-v')
245 |         uses: softprops/action-gh-release@v1
246 |         with:
247 |           files: |
248 |             ./libs/lume/${{ steps.build_notarize.outputs.tarball_path }}
249 |             ./libs/lume/${{ steps.build_notarize.outputs.pkg_path }}
250 |             ./libs/lume/.release/lume-darwin.tar.gz
251 |             ./libs/lume/.release/lume-darwin.pkg.tar.gz
252 |             ./libs/lume/.release/lume.tar.gz
253 |             ./libs/lume/.release/lume.pkg.tar.gz
254 |           body: |
255 |             ${{ steps.generate_checksums.outputs.checksums }}
256 | 
257 |             ### Installation with script
258 | 
259 |             /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
260 |             ```
261 |           generate_release_notes: true
262 |           make_latest: true
263 | 
```

--------------------------------------------------------------------------------
/scripts/playground-docker.sh:
--------------------------------------------------------------------------------

```bash
  1 | #!/bin/bash
  2 | 
  3 | set -e
  4 | 
  5 | # Colors for output
  6 | GREEN='\033[0;32m'
  7 | BLUE='\033[0;34m'
  8 | RED='\033[0;31m'
  9 | YELLOW='\033[1;33m'
 10 | NC='\033[0m' # No Color
 11 | 
 12 | # Print with color
 13 | print_info() {
 14 |     echo -e "${BLUE}==> $1${NC}"
 15 | }
 16 | 
 17 | print_success() {
 18 |     echo -e "${GREEN}==> $1${NC}"
 19 | }
 20 | 
 21 | print_error() {
 22 |     echo -e "${RED}==> $1${NC}"
 23 | }
 24 | 
 25 | print_warning() {
 26 |     echo -e "${YELLOW}==> $1${NC}"
 27 | }
 28 | 
 29 | echo "🚀 Launching Cua Computer-Use Agent UI..."
 30 | 
 31 | # Check if Docker is installed
 32 | if ! command -v docker &> /dev/null; then
 33 |     print_error "Docker is not installed!"
 34 |     echo ""
 35 |     echo "To use Cua with Docker containers, you need to install Docker first:"
 36 |     echo ""
 37 |     echo "📦 Install Docker:"
 38 |     echo "  • macOS: Download Docker Desktop from https://docker.com/products/docker-desktop"
 39 |     echo "  • Windows: Download Docker Desktop from https://docker.com/products/docker-desktop"
 40 |     echo "  • Linux: Follow instructions at https://docs.docker.com/engine/install/"
 41 |     echo ""
 42 |     echo "After installing Docker, run this script again."
 43 |     exit 1
 44 | fi
 45 | 
 46 | # Check if Docker daemon is running
 47 | if ! docker info &> /dev/null; then
 48 |     print_error "Docker is installed but not running!"
 49 |     echo ""
 50 |     echo "Please start Docker Desktop and try again."
 51 |     exit 1
 52 | fi
 53 | 
 54 | print_success "Docker is installed and running!"
 55 | 
 56 | # Save the original working directory
 57 | ORIGINAL_DIR="$(pwd)"
 58 | 
 59 | DEMO_DIR="$HOME/.cua"
 60 | mkdir -p "$DEMO_DIR"
 61 | 
 62 | 
 63 | # Check if we're already in the cua repository
 64 | # Look for the specific trycua identifier in pyproject.toml
 65 | if [[ -f "pyproject.toml" ]] && grep -q "[email protected]" "pyproject.toml"; then
 66 |   print_success "Already in Cua repository - using current directory"
 67 |   REPO_DIR="$ORIGINAL_DIR"
 68 |   USE_EXISTING_REPO=true
 69 | else
 70 |   # Directories used by the script when not in repo
 71 |   REPO_DIR="$DEMO_DIR/cua"
 72 |   USE_EXISTING_REPO=false
 73 | fi
 74 | 
 75 | # Function to clean up on exit
 76 | cleanup() {
 77 |   cd "$ORIGINAL_DIR" 2>/dev/null || true
 78 | }
 79 | trap cleanup EXIT
 80 | 
 81 | echo ""
 82 | echo "Choose your Cua setup:"
 83 | echo "1) ☁️  Cua Cloud Sandbox (works on any system)"
 84 | echo "2) 🖥️  Local macOS VMs (requires Apple Silicon Mac + macOS 15+)"
 85 | echo "3) 🖥️  Local Windows VMs (requires Windows 10 / 11)"
 86 | echo ""
 87 | read -p "Enter your choice (1, 2, or 3): " CHOICE
 88 | 
 89 | if [[ "$CHOICE" == "1" ]]; then
 90 |   # Cua Cloud Sandbox setup
 91 |   echo ""
 92 |   print_info "Setting up Cua Cloud Sandbox..."
 93 |   echo ""
 94 |   
 95 |   # Check if existing .env.local already has CUA_API_KEY
 96 |   REPO_ENV_FILE="$REPO_DIR/.env.local"
 97 |   CURRENT_ENV_FILE="$ORIGINAL_DIR/.env.local"
 98 |   
 99 |   CUA_API_KEY=""
100 |   
101 |   # First check current directory
102 |   if [[ -f "$CURRENT_ENV_FILE" ]] && grep -q "CUA_API_KEY=" "$CURRENT_ENV_FILE"; then
103 |     EXISTING_CUA_KEY=$(grep "CUA_API_KEY=" "$CURRENT_ENV_FILE" | cut -d'=' -f2- | tr -d '"' | tr -d "'" | xargs)
104 |     if [[ -n "$EXISTING_CUA_KEY" && "$EXISTING_CUA_KEY" != "your_cua_api_key_here" && "$EXISTING_CUA_KEY" != "" ]]; then
105 |       CUA_API_KEY="$EXISTING_CUA_KEY"
106 |     fi
107 |   fi
108 |   
109 |   # Then check repo directory if not found in current dir
110 |   if [[ -z "$CUA_API_KEY" ]] && [[ -f "$REPO_ENV_FILE" ]] && grep -q "CUA_API_KEY=" "$REPO_ENV_FILE"; then
111 |     EXISTING_CUA_KEY=$(grep "CUA_API_KEY=" "$REPO_ENV_FILE" | cut -d'=' -f2- | tr -d '"' | tr -d "'" | xargs)
112 |     if [[ -n "$EXISTING_CUA_KEY" && "$EXISTING_CUA_KEY" != "your_cua_api_key_here" && "$EXISTING_CUA_KEY" != "" ]]; then
113 |       CUA_API_KEY="$EXISTING_CUA_KEY"
114 |     fi
115 |   fi
116 |   
117 |   # If no valid API key found, prompt for one
118 |   if [[ -z "$CUA_API_KEY" ]]; then
119 |     echo "To use Cua Cloud Sandbox, you need to:"
120 |     echo "1. Sign up at https://trycua.com"
121 |     echo "2. Create a Cloud Sandbox"
122 |     echo "3. Generate an Api Key"
123 |     echo ""
124 |     read -p "Enter your Cua Api Key: " CUA_API_KEY
125 |     
126 |     if [[ -z "$CUA_API_KEY" ]]; then
127 |       print_error "Cua Api Key is required for Cloud Sandbox."
128 |       exit 1
129 |     fi
130 |   else
131 |     print_success "Found existing CUA API key"
132 |   fi
133 |   
134 |   USE_CLOUD=true
135 |   COMPUTER_TYPE="cloud"
136 | 
137 | elif [[ "$CHOICE" == "2" ]]; then
138 |   # Local macOS VM setup
139 |   echo ""
140 |   print_info "Setting up local macOS VMs..."
141 |   
142 |   # Check for Apple Silicon Mac
143 |   if [[ $(uname -s) != "Darwin" || $(uname -m) != "arm64" ]]; then
144 |     print_error "Local macOS VMs require an Apple Silicon Mac (M1/M2/M3/M4)."
145 |     echo "💡 Consider using Cua Cloud Sandbox instead (option 1)."
146 |     exit 1
147 |   fi
148 | 
149 |   # Check for macOS 15 (Sequoia) or newer
150 |   OSVERSION=$(sw_vers -productVersion)
151 |   if [[ $(echo "$OSVERSION 15.0" | tr " " "\n" | sort -V | head -n 1) != "15.0" ]]; then
152 |     print_error "Local macOS VMs require macOS 15 (Sequoia) or newer. You have $OSVERSION."
153 |     echo "💡 Consider using Cua Cloud Sandbox instead (option 1)."
154 |     exit 1
155 |   fi
156 | 
157 |   USE_CLOUD=false
158 |   COMPUTER_TYPE="macos"
159 | 
160 | elif [[ "$CHOICE" == "3" ]]; then
161 |   # Local Windows VM setup
162 |   echo ""
163 |   print_info "Setting up local Windows VMs..."
164 |   
165 |   # Check if we're on Windows
166 |   if [[ $(uname -s) != MINGW* && $(uname -s) != CYGWIN* && $(uname -s) != MSYS* ]]; then
167 |     print_error "Local Windows VMs require Windows 10 or 11."
168 |     echo "💡 Consider using Cua Cloud Sandbox instead (option 1)."
169 |     echo ""
170 |     echo "🔗 If you are using WSL, refer to the blog post to get started: https://www.trycua.com/blog/windows-sandbox"
171 |     exit 1
172 |   fi
173 | 
174 |   USE_CLOUD=false
175 |   COMPUTER_TYPE="windows"
176 | 
177 | else
178 |   print_error "Invalid choice. Please run the script again and choose 1, 2, or 3."
179 |   exit 1
180 | fi
181 | 
182 | print_success "All checks passed! 🎉"
183 | 
184 | # Create demo directory and handle repository
185 | if [[ "$USE_EXISTING_REPO" == "true" ]]; then
186 |   print_info "Using existing repository in current directory"
187 |   cd "$REPO_DIR"
188 | else  
189 |   # Clone or update the repository
190 |   if [[ ! -d "$REPO_DIR" ]]; then
191 |     print_info "Cloning Cua repository..."
192 |     cd "$DEMO_DIR"
193 |     git clone https://github.com/trycua/cua.git
194 |   else
195 |     print_info "Updating Cua repository..."
196 |     cd "$REPO_DIR"
197 |     git pull origin main
198 |   fi
199 |   
200 |   cd "$REPO_DIR"
201 | fi
202 | 
203 | # Create .env.local file with API keys
204 | ENV_FILE="$REPO_DIR/.env.local"
205 | if [[ ! -f "$ENV_FILE" ]]; then
206 |   cat > "$ENV_FILE" << EOF
207 | # Uncomment and add your API keys here
208 | # OPENAI_API_KEY=your_openai_api_key_here
209 | # ANTHROPIC_API_KEY=your_anthropic_api_key_here
210 | CUA_API_KEY=your_cua_api_key_here
211 | EOF
212 |   print_success "Created .env.local file with API key placeholders"
213 | else
214 |   print_success "Found existing .env.local file - keeping your current settings"
215 | fi
216 | 
217 | if [[ "$USE_CLOUD" == "true" ]]; then
218 |   # Add CUA API key to .env.local if not already present
219 |   if ! grep -q "CUA_API_KEY" "$ENV_FILE"; then
220 |     echo "CUA_API_KEY=$CUA_API_KEY" >> "$ENV_FILE"
221 |     print_success "Added CUA_API_KEY to .env.local"
222 |   elif grep -q "CUA_API_KEY=your_cua_api_key_here" "$ENV_FILE"; then
223 |     # Update placeholder with actual key
224 |     sed -i.bak "s/CUA_API_KEY=your_cua_api_key_here/CUA_API_KEY=$CUA_API_KEY/" "$ENV_FILE"
225 |     print_success "Updated CUA_API_KEY in .env.local"
226 |   fi
227 | fi
228 | 
229 | # Build the Docker image if it doesn't exist
230 | print_info "Checking Docker image..."
231 | if ! docker image inspect cua-dev-image &> /dev/null; then
232 |   print_info "Building Docker image (this may take a while)..."
233 |   ./scripts/run-docker-dev.sh build
234 | else
235 |   print_success "Docker image already exists"
236 | fi
237 | 
238 | # Install Lume if needed for local VMs
239 | if [[ "$USE_CLOUD" == "false" && "$COMPUTER_TYPE" == "macos" ]]; then
240 |   if ! command -v lume &> /dev/null; then
241 |     print_info "Installing Lume CLI..."
242 |     curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh | bash
243 |     
244 |     # Add lume to PATH for this session if it's not already there
245 |     if ! command -v lume &> /dev/null; then
246 |       export PATH="$PATH:$HOME/.local/bin"
247 |     fi
248 |   fi
249 | 
250 |   # Pull the macOS CUA image if not already present
251 |   if ! lume ls | grep -q "macos-sequoia-cua"; then
252 |     # Check available disk space
253 |     IMAGE_SIZE_GB=30
254 |     AVAILABLE_SPACE_KB=$(df -k $HOME | tail -1 | awk '{print $4}')
255 |     AVAILABLE_SPACE_GB=$(($AVAILABLE_SPACE_KB / 1024 / 1024))
256 |     
257 |     echo "📊 The macOS CUA image will use approximately ${IMAGE_SIZE_GB}GB of disk space."
258 |     echo "   You currently have ${AVAILABLE_SPACE_GB}GB available on your system."
259 |     
260 |     # Prompt for confirmation
261 |     read -p "   Continue? [y]/n: " CONTINUE
262 |     CONTINUE=${CONTINUE:-y}
263 |     
264 |     if [[ $CONTINUE =~ ^[Yy]$ ]]; then
265 |       print_info "Pulling macOS CUA image (this may take a while)..."
266 |       
267 |       # Use caffeinate on macOS to prevent system sleep during the pull
268 |       if command -v caffeinate &> /dev/null; then
269 |         print_info "Using caffeinate to prevent system sleep during download..."
270 |         caffeinate -i lume pull macos-sequoia-cua:latest
271 |       else
272 |         lume pull macos-sequoia-cua:latest
273 |       fi
274 |     else
275 |       print_error "Installation cancelled."
276 |       exit 1
277 |     fi
278 |   fi
279 | 
280 |   # Check if the VM is running
281 |   print_info "Checking if the macOS CUA VM is running..."
282 |   VM_RUNNING=$(lume ls | grep "macos-sequoia-cua" | grep "running" || echo "")
283 | 
284 |   if [ -z "$VM_RUNNING" ]; then
285 |     print_info "Starting the macOS CUA VM in the background..."
286 |     lume run macos-sequoia-cua:latest &
287 |     # Wait a moment for the VM to initialize
288 |     sleep 5
289 |     print_success "VM started successfully."
290 |   else
291 |     print_success "macOS CUA VM is already running."
292 |   fi
293 | fi
294 | 
295 | # Create a convenience script to run the demo
296 | cat > "$DEMO_DIR/start_ui.sh" << EOF
297 | #!/bin/bash
298 | cd "$REPO_DIR"
299 | ./scripts/run-docker-dev.sh run agent_ui_examples.py
300 | EOF
301 | chmod +x "$DEMO_DIR/start_ui.sh"
302 | 
303 | print_success "Setup complete!"
304 | 
305 | if [[ "$USE_CLOUD" == "true" ]]; then
306 |   echo "☁️  Cua Cloud Sandbox setup complete!"
307 | else
308 |   echo "🖥️  Cua Local VM setup complete!"
309 | fi
310 | 
311 | echo "📝 Edit $ENV_FILE to update your API keys"
312 | echo "🖥️  Start the playground by running: $DEMO_DIR/start_ui.sh"
313 | 
314 | # Start the demo automatically
315 | echo
316 | print_info "Starting the Cua Computer-Use Agent UI..."
317 | echo ""
318 | 
319 | print_success "Cua Computer-Use Agent UI is now running at http://localhost:7860/"
320 | echo
321 | echo "🌐 Open your browser and go to: http://localhost:7860/"
322 | echo
323 | "$DEMO_DIR/start_ui.sh"
324 | 
```

--------------------------------------------------------------------------------
/tests/test_mcp_server_streaming.py:
--------------------------------------------------------------------------------

```python
  1 | import asyncio
  2 | import importlib.util
  3 | import sys
  4 | import types
  5 | from pathlib import Path
  6 | 
  7 | import pytest
  8 | 
  9 | 
 10 | def _install_stub_module(name: str, module: types.ModuleType, registry: dict[str, types.ModuleType | None]) -> None:
 11 |     registry[name] = sys.modules.get(name)
 12 |     sys.modules[name] = module
 13 | 
 14 | 
 15 | @pytest.fixture
 16 | def server_module():
 17 |     stubbed_modules: dict[str, types.ModuleType | None] = {}
 18 | 
 19 |     # Stub MCP Context primitives
 20 |     mcp_module = types.ModuleType("mcp")
 21 |     mcp_module.__path__ = []  # mark as package
 22 | 
 23 |     mcp_server_module = types.ModuleType("mcp.server")
 24 |     mcp_server_module.__path__ = []
 25 | 
 26 |     fastmcp_module = types.ModuleType("mcp.server.fastmcp")
 27 | 
 28 |     class _StubContext:
 29 |         async def yield_message(self, *args, **kwargs):
 30 |             return None
 31 | 
 32 |         async def yield_tool_call(self, *args, **kwargs):
 33 |             return None
 34 | 
 35 |         async def yield_tool_output(self, *args, **kwargs):
 36 |             return None
 37 | 
 38 |         def report_progress(self, *_args, **_kwargs):
 39 |             return None
 40 | 
 41 |         def info(self, *_args, **_kwargs):
 42 |             return None
 43 | 
 44 |         def error(self, *_args, **_kwargs):
 45 |             return None
 46 | 
 47 |     class _StubImage:
 48 |         def __init__(self, format: str, data: bytes):
 49 |             self.format = format
 50 |             self.data = data
 51 | 
 52 |     class _StubFastMCP:
 53 |         def __init__(self, name: str):
 54 |             self.name = name
 55 |             self._tools: dict[str, types.FunctionType] = {}
 56 | 
 57 |         def tool(self, *args, **kwargs):
 58 |             def decorator(func):
 59 |                 self._tools[func.__name__] = func
 60 |                 return func
 61 | 
 62 |             return decorator
 63 | 
 64 |         def run(self):
 65 |             return None
 66 | 
 67 |     fastmcp_module.Context = _StubContext
 68 |     fastmcp_module.FastMCP = _StubFastMCP
 69 |     fastmcp_module.Image = _StubImage
 70 | 
 71 |     _install_stub_module("mcp", mcp_module, stubbed_modules)
 72 |     _install_stub_module("mcp.server", mcp_server_module, stubbed_modules)
 73 |     _install_stub_module("mcp.server.fastmcp", fastmcp_module, stubbed_modules)
 74 | 
 75 |     # Stub Computer module to avoid heavy dependencies
 76 |     computer_module = types.ModuleType("computer")
 77 | 
 78 |     class _StubInterface:
 79 |         async def screenshot(self) -> bytes:  # pragma: no cover - default stub
 80 |             return b""
 81 | 
 82 |     class _StubComputer:
 83 |         def __init__(self, *args, **kwargs):
 84 |             self.interface = _StubInterface()
 85 | 
 86 |         async def run(self):  # pragma: no cover - default stub
 87 |             return None
 88 | 
 89 |     class _StubVMProviderType:
 90 |         CLOUD = "cloud"
 91 |         LOCAL = "local"
 92 | 
 93 |     computer_module.Computer = _StubComputer
 94 |     computer_module.VMProviderType = _StubVMProviderType
 95 | 
 96 |     _install_stub_module("computer", computer_module, stubbed_modules)
 97 | 
 98 |     # Stub agent module so server can import ComputerAgent
 99 |     agent_module = types.ModuleType("agent")
100 | 
101 |     class _StubComputerAgent:
102 |         def __init__(self, *args, **kwargs):
103 |             pass
104 | 
105 |         async def run(self, *_args, **_kwargs):  # pragma: no cover - default stub
106 |             if False:  # pragma: no cover
107 |                 yield {}
108 |             return
109 | 
110 |     agent_module.ComputerAgent = _StubComputerAgent
111 | 
112 |     _install_stub_module("agent", agent_module, stubbed_modules)
113 | 
114 |     module_name = "mcp_server_server_under_test"
115 |     module_path = Path("libs/python/mcp-server/mcp_server/server.py").resolve()
116 |     spec = importlib.util.spec_from_file_location(module_name, module_path)
117 |     server_module = importlib.util.module_from_spec(spec)
118 |     assert spec and spec.loader
119 |     spec.loader.exec_module(server_module)
120 | 
121 |     server_instance = getattr(server_module, "server", None)
122 |     if server_instance is not None and hasattr(server_instance, "_tools"):
123 |         for name, func in server_instance._tools.items():
124 |             setattr(server_module, name, func)
125 | 
126 |     try:
127 |         yield server_module
128 |     finally:
129 |         sys.modules.pop(module_name, None)
130 |         for name, original in stubbed_modules.items():
131 |             if original is None:
132 |                 sys.modules.pop(name, None)
133 |             else:
134 |                 sys.modules[name] = original
135 | 
136 | 
137 | class FakeContext:
138 |     def __init__(self) -> None:
139 |         self.events: list[tuple] = []
140 |         self.progress_updates: list[float] = []
141 | 
142 |     def info(self, message: str) -> None:
143 |         self.events.append(("info", message))
144 | 
145 |     def error(self, message: str) -> None:
146 |         self.events.append(("error", message))
147 | 
148 |     def report_progress(self, value: float) -> None:
149 |         self.progress_updates.append(value)
150 | 
151 |     async def yield_message(self, *, role: str, content):
152 |         timestamp = asyncio.get_running_loop().time()
153 |         self.events.append(("message", role, content, timestamp))
154 | 
155 |     async def yield_tool_call(self, *, name: str | None, call_id: str, input):
156 |         timestamp = asyncio.get_running_loop().time()
157 |         self.events.append(("tool_call", name, call_id, input, timestamp))
158 | 
159 |     async def yield_tool_output(self, *, call_id: str, output, is_error: bool = False):
160 |         timestamp = asyncio.get_running_loop().time()
161 |         self.events.append(("tool_output", call_id, output, is_error, timestamp))
162 | 
163 | 
164 | def test_run_cua_task_streams_partial_results(server_module):
165 |     async def _run_test():
166 |         class FakeAgent:
167 |             script = []
168 | 
169 |             def __init__(self, *args, **kwargs):
170 |                 pass
171 | 
172 |             async def run(self, messages):  # type: ignore[override]
173 |                 for factory, delay in type(self).script:
174 |                     yield factory(messages)
175 |                     if delay:
176 |                         await asyncio.sleep(delay)
177 | 
178 |         FakeAgent.script = [
179 |             (
180 |                 lambda _messages: {
181 |                     "output": [
182 |                         {
183 |                             "type": "message",
184 |                             "role": "assistant",
185 |                             "content": [
186 |                                 {"type": "output_text", "text": "First chunk"}
187 |                             ],
188 |                         }
189 |                     ]
190 |                 },
191 |                 0.0,
192 |             ),
193 |             (
194 |                 lambda _messages: {
195 |                     "output": [
196 |                         {
197 |                             "type": "tool_use",
198 |                             "id": "call_1",
199 |                             "name": "computer",
200 |                             "input": {"action": "click"},
201 |                         },
202 |                         {
203 |                             "type": "computer_call_output",
204 |                             "call_id": "call_1",
205 |                             "output": [
206 |                                 {"type": "text", "text": "Tool completed"}
207 |                             ],
208 |                         },
209 |                     ]
210 |                 },
211 |                 0.05,
212 |             ),
213 |         ]
214 | 
215 |         class FakeInterface:
216 |             def __init__(self) -> None:
217 |                 self.calls = 0
218 | 
219 |             async def screenshot(self) -> bytes:
220 |                 self.calls += 1
221 |                 return b"final-image"
222 | 
223 |         fake_interface = FakeInterface()
224 |         server_module.global_computer = types.SimpleNamespace(interface=fake_interface)
225 |         server_module.ComputerAgent = FakeAgent  # type: ignore[assignment]
226 | 
227 |         ctx = FakeContext()
228 |         task = asyncio.create_task(server_module.run_cua_task(ctx, "open settings"))
229 | 
230 |         await asyncio.sleep(0.01)
231 |         assert not task.done(), "Task should still be running to simulate long operation"
232 |         message_events = [event for event in ctx.events if event[0] == "message"]
233 |         assert message_events, "Expected message event before task completion"
234 | 
235 |         text_result, image = await task
236 | 
237 |         assert "First chunk" in text_result
238 |         assert "Tool completed" in text_result
239 |         assert image.data == b"final-image"
240 |         assert fake_interface.calls == 1
241 | 
242 |         tool_call_events = [event for event in ctx.events if event[0] == "tool_call"]
243 |         tool_output_events = [event for event in ctx.events if event[0] == "tool_output"]
244 |         assert tool_call_events and tool_output_events
245 |         assert tool_call_events[0][2] == "call_1"
246 |         assert tool_output_events[0][1] == "call_1"
247 | 
248 |     asyncio.run(_run_test())
249 | 
250 | 
251 | def test_run_multi_cua_tasks_reports_progress(server_module, monkeypatch):
252 |     async def _run_test():
253 |         class FakeAgent:
254 |             script = []
255 | 
256 |             def __init__(self, *args, **kwargs):
257 |                 pass
258 | 
259 |             async def run(self, messages):  # type: ignore[override]
260 |                 for factory, delay in type(self).script:
261 |                     yield factory(messages)
262 |                     if delay:
263 |                         await asyncio.sleep(delay)
264 | 
265 |         FakeAgent.script = [
266 |             (
267 |                 lambda messages: {
268 |                     "output": [
269 |                         {
270 |                             "type": "message",
271 |                             "role": "assistant",
272 |                             "content": [
273 |                                 {
274 |                                     "type": "output_text",
275 |                                     "text": f"Result for {messages[0].get('content')}",
276 |                                 }
277 |                             ],
278 |                         }
279 |                     ]
280 |                 },
281 |                 0.0,
282 |             )
283 |         ]
284 | 
285 |         server_module.ComputerAgent = FakeAgent  # type: ignore[assignment]
286 | 
287 |         class FakeInterface:
288 |             async def screenshot(self) -> bytes:
289 |                 return b"progress-image"
290 | 
291 |         server_module.global_computer = types.SimpleNamespace(interface=FakeInterface())
292 | 
293 |         ctx = FakeContext()
294 | 
295 |         results = await server_module.run_multi_cua_tasks(ctx, ["a", "b", "c"])
296 | 
297 |         assert len(results) == 3
298 |         assert results[0][0] == "Result for a"
299 |         assert ctx.progress_updates[0] == pytest.approx(0.0)
300 |         assert ctx.progress_updates[-1] == pytest.approx(1.0)
301 |         assert len(ctx.progress_updates) == 6
302 | 
303 |     asyncio.run(_run_test())
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/providers/cloud/provider.py:
--------------------------------------------------------------------------------

```python
  1 | """Cloud VM provider implementation using CUA Public API.
  2 | 
  3 | Implements the following public API endpoints:
  4 | 
  5 | - GET /v1/vms
  6 | - POST /v1/vms/:name/start
  7 | - POST /v1/vms/:name/stop
  8 | - POST /v1/vms/:name/restart
  9 | """
 10 | 
 11 | import logging
 12 | from typing import Dict, List, Optional, Any
 13 | 
 14 | from ..base import BaseVMProvider, VMProviderType
 15 | from ..types import ListVMsResponse, MinimalVM
 16 | 
 17 | # Setup logging
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | import asyncio
 21 | import aiohttp
 22 | from urllib.parse import urlparse
 23 | import os
 24 | 
 25 | 
 26 | DEFAULT_API_BASE = os.getenv("CUA_API_BASE", "https://api.cua.ai")
 27 | 
 28 | class CloudProvider(BaseVMProvider):
 29 |     """Cloud VM Provider implementation."""
 30 |     def __init__(
 31 |         self,
 32 |         api_key: str,
 33 |         verbose: bool = False,
 34 |         api_base: Optional[str] = None,
 35 |         **kwargs,
 36 |     ):
 37 |         """
 38 |         Args:
 39 |             api_key: API key for authentication
 40 |             name: Name of the VM
 41 |             verbose: Enable verbose logging
 42 |         """
 43 |         assert api_key, "api_key required for CloudProvider"
 44 |         self.api_key = api_key
 45 |         self.verbose = verbose
 46 |         self.api_base = (api_base or DEFAULT_API_BASE).rstrip("/")
 47 | 
 48 |     @property
 49 |     def provider_type(self) -> VMProviderType:
 50 |         return VMProviderType.CLOUD
 51 | 
 52 |     async def __aenter__(self):
 53 |         return self
 54 | 
 55 |     async def __aexit__(self, exc_type, exc_val, exc_tb):
 56 |         pass
 57 | 
 58 |     async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
 59 |         """Get VM information by querying the VM status endpoint.
 60 | 
 61 |         - Build hostname via get_ip(name) → "{name}.containers.cloud.trycua.com"
 62 |         - Probe https://{hostname}:8443/status with a short timeout
 63 |         - If JSON contains a "status" field, return it; otherwise infer
 64 |         - Fallback to DNS resolve check to distinguish unknown vs not_found
 65 |         """
 66 |         hostname = await self.get_ip(name=name)
 67 | 
 68 |         # Try HTTPS probe to the computer-server status endpoint (8443)
 69 |         try:
 70 |             timeout = aiohttp.ClientTimeout(total=3)
 71 |             async with aiohttp.ClientSession(timeout=timeout) as session:
 72 |                 url = f"https://{hostname}:8443/status"
 73 |                 async with session.get(url, allow_redirects=False) as resp:
 74 |                     status_code = resp.status
 75 |                     vm_status: str
 76 |                     vm_os_type: Optional[str] = None
 77 |                     if status_code == 200:
 78 |                         try:
 79 |                             data = await resp.json(content_type=None)
 80 |                             vm_status = str(data.get("status", "ok"))
 81 |                             vm_os_type = str(data.get("os_type"))
 82 |                         except Exception:
 83 |                             vm_status = "unknown"
 84 |                     elif status_code < 500:
 85 |                         vm_status = "unknown"
 86 |                     else:
 87 |                         vm_status = "unknown"
 88 |                     return {
 89 |                         "name": name,
 90 |                         "status": "running" if vm_status == "ok" else vm_status,
 91 |                         "api_url": f"https://{hostname}:8443",
 92 |                         "os_type": vm_os_type,
 93 |                     }
 94 |         except Exception:
 95 |             return {"name": name, "status": "not_found", "api_url": f"https://{hostname}:8443"}
 96 | 
 97 |     async def list_vms(self) -> ListVMsResponse:
 98 |         url = f"{self.api_base}/v1/vms"
 99 |         headers = {
100 |             "Authorization": f"Bearer {self.api_key}",
101 |             "Accept": "application/json",
102 |         }
103 |         async with aiohttp.ClientSession() as session:
104 |             async with session.get(url, headers=headers) as resp:
105 |                 if resp.status == 200:
106 |                     try:
107 |                         data = await resp.json(content_type=None)
108 |                     except Exception:
109 |                         text = await resp.text()
110 |                         logger.error(f"Failed to parse list_vms JSON: {text}")
111 |                         return []
112 |                     if isinstance(data, list):
113 |                         # Enrich with convenience URLs when possible.
114 |                         enriched: List[Dict[str, Any]] = []
115 |                         for item in data:
116 |                             vm = dict(item) if isinstance(item, dict) else {}
117 |                             name = vm.get("name")
118 |                             password = vm.get("password")
119 |                             if isinstance(name, str) and name:
120 |                                 host = f"{name}.containers.cloud.trycua.com"
121 |                                 # api_url: always set if missing
122 |                                 if not vm.get("api_url"):
123 |                                     vm["api_url"] = f"https://{host}:8443"
124 |                                 # vnc_url: only when password available
125 |                                 if not vm.get("vnc_url") and isinstance(password, str) and password:
126 |                                     vm[
127 |                                         "vnc_url"
128 |                                     ] = f"https://{host}/vnc.html?autoconnect=true&password={password}"
129 |                             enriched.append(vm)
130 |                         return enriched  # type: ignore[return-value]
131 |                     logger.warning("Unexpected response for list_vms; expected list")
132 |                     return []
133 |                 elif resp.status == 401:
134 |                     logger.error("Unauthorized: invalid CUA API key for list_vms")
135 |                     return []
136 |                 else:
137 |                     text = await resp.text()
138 |                     logger.error(f"list_vms failed: HTTP {resp.status} - {text}")
139 |                     return []
140 | 
141 |     async def run_vm(self, name: str, image: Optional[str] = None, run_opts: Optional[Dict[str, Any]] = None, storage: Optional[str] = None) -> Dict[str, Any]:
142 |         """Start a VM via public API. Returns a minimal status."""
143 |         url = f"{self.api_base}/v1/vms/{name}/start"
144 |         headers = {
145 |             "Authorization": f"Bearer {self.api_key}",
146 |             "Accept": "application/json",
147 |         }
148 |         async with aiohttp.ClientSession() as session:
149 |             async with session.post(url, headers=headers) as resp:
150 |                 if resp.status in (200, 201, 202, 204):
151 |                     return {"name": name, "status": "starting"}
152 |                 elif resp.status == 404:
153 |                     return {"name": name, "status": "not_found"}
154 |                 elif resp.status == 401:
155 |                     return {"name": name, "status": "unauthorized"}
156 |                 else:
157 |                     text = await resp.text()
158 |                     return {"name": name, "status": "error", "message": text}
159 | 
160 |     async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
161 |         """Stop a VM via public API."""
162 |         url = f"{self.api_base}/v1/vms/{name}/stop"
163 |         headers = {
164 |             "Authorization": f"Bearer {self.api_key}",
165 |             "Accept": "application/json",
166 |         }
167 |         async with aiohttp.ClientSession() as session:
168 |             async with session.post(url, headers=headers) as resp:
169 |                 if resp.status in (200, 202):
170 |                     # Spec says 202 with {"status":"stopping"}
171 |                     body_status: Optional[str] = None
172 |                     try:
173 |                         data = await resp.json(content_type=None)
174 |                         body_status = data.get("status") if isinstance(data, dict) else None
175 |                     except Exception:
176 |                         body_status = None
177 |                     return {"name": name, "status": body_status or "stopping"}
178 |                 elif resp.status == 404:
179 |                     return {"name": name, "status": "not_found"}
180 |                 elif resp.status == 401:
181 |                     return {"name": name, "status": "unauthorized"}
182 |                 else:
183 |                     text = await resp.text()
184 |                     return {"name": name, "status": "error", "message": text}
185 | 
186 |     async def restart_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
187 |         """Restart a VM via public API."""
188 |         url = f"{self.api_base}/v1/vms/{name}/restart"
189 |         headers = {
190 |             "Authorization": f"Bearer {self.api_key}",
191 |             "Accept": "application/json",
192 |         }
193 |         async with aiohttp.ClientSession() as session:
194 |             async with session.post(url, headers=headers) as resp:
195 |                 if resp.status in (200, 202):
196 |                     # Spec says 202 with {"status":"restarting"}
197 |                     body_status: Optional[str] = None
198 |                     try:
199 |                         data = await resp.json(content_type=None)
200 |                         body_status = data.get("status") if isinstance(data, dict) else None
201 |                     except Exception:
202 |                         body_status = None
203 |                     return {"name": name, "status": body_status or "restarting"}
204 |                 elif resp.status == 404:
205 |                     return {"name": name, "status": "not_found"}
206 |                 elif resp.status == 401:
207 |                     return {"name": name, "status": "unauthorized"}
208 |                 else:
209 |                     text = await resp.text()
210 |                     return {"name": name, "status": "error", "message": text}
211 | 
212 |     async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
213 |         logger.warning("CloudProvider.update_vm is not implemented via public API")
214 |         return {"name": name, "status": "unchanged", "message": "update_vm not supported by public API"}
215 | 
216 |     async def get_ip(self, name: Optional[str] = None, storage: Optional[str] = None, retry_delay: int = 2) -> str:
217 |         """
218 |         Return the VM's IP address as '{container_name}.containers.cloud.trycua.com'.
219 |         Uses the provided 'name' argument (the VM name requested by the caller),
220 |         falling back to self.name only if 'name' is None.
221 |         Retries up to 3 times with retry_delay seconds if hostname is not available.
222 |         """
223 |         if name is None:
224 |             raise ValueError("VM name is required for CloudProvider.get_ip")
225 |         return f"{name}.containers.cloud.trycua.com"
226 | 
```

--------------------------------------------------------------------------------
/libs/lume/scripts/install.sh:
--------------------------------------------------------------------------------

```bash
  1 | #!/bin/bash
  2 | set -e
  3 | 
  4 | # Lume Installer
  5 | # This script installs Lume to your system
  6 | 
  7 | # Define colors for output
  8 | BOLD=$(tput bold)
  9 | NORMAL=$(tput sgr0)
 10 | RED=$(tput setaf 1)
 11 | GREEN=$(tput setaf 2)
 12 | BLUE=$(tput setaf 4)
 13 | YELLOW=$(tput setaf 3)
 14 | 
 15 | # Check if running as root or with sudo
 16 | if [ "$(id -u)" -eq 0 ] || [ -n "$SUDO_USER" ]; then
 17 |   echo "${RED}Error: Do not run this script with sudo or as root.${NORMAL}"
 18 |   echo "If you need to install to a system directory, create it first with proper permissions:"
 19 |   echo "  sudo mkdir -p /desired/directory && sudo chown $(whoami) /desired/directory"
 20 |   echo "Then run the installer normally:"
 21 |   echo "  ./install.sh --install-dir=/desired/directory"
 22 |   exit 1
 23 | fi
 24 | 
 25 | # Default installation directory (user-specific, doesn't require sudo)
 26 | DEFAULT_INSTALL_DIR="$HOME/.local/bin"
 27 | INSTALL_DIR="${INSTALL_DIR:-$DEFAULT_INSTALL_DIR}"
 28 | 
 29 | # GitHub info
 30 | GITHUB_REPO="trycua/cua"
 31 | LATEST_RELEASE_URL="https://api.github.com/repos/$GITHUB_REPO/releases/latest"
 32 | 
 33 | # Option to skip background service setup (default: install it)
 34 | INSTALL_BACKGROUND_SERVICE=true
 35 | 
 36 | # Default port for lume serve (default: 7777)
 37 | LUME_PORT=7777
 38 | 
 39 | # Parse command line arguments
 40 | while [ "$#" -gt 0 ]; do
 41 |   case "$1" in
 42 |     --install-dir)
 43 |       INSTALL_DIR="$2"
 44 |       shift
 45 |       ;;
 46 |     --port)
 47 |       LUME_PORT="$2"
 48 |       shift
 49 |       ;;
 50 |     --no-background-service)
 51 |       INSTALL_BACKGROUND_SERVICE=false
 52 |       ;;
 53 |     --help)
 54 |       echo "${BOLD}${BLUE}Lume Installer${NORMAL}"
 55 |       echo "Usage: $0 [OPTIONS]"
 56 |       echo ""
 57 |       echo "Options:"
 58 |       echo "  --install-dir DIR         Install to the specified directory (default: $DEFAULT_INSTALL_DIR)"
 59 |       echo "  --port PORT              Specify the port for lume serve (default: 7777)"
 60 |       echo "  --no-background-service   Do not setup the Lume background service (LaunchAgent)"
 61 |       echo "  --help                    Display this help message"
 62 |       echo ""
 63 |       echo "Examples:"
 64 |       echo "  $0                                   # Install to $DEFAULT_INSTALL_DIR and setup background service"
 65 |       echo "  $0 --install-dir=/usr/local/bin      # Install to system directory (may require root privileges)"
 66 |       echo "  $0 --port 7778                       # Use port 7778 instead of the default 7777"
 67 |       echo "  $0 --no-background-service           # Install without setting up the background service"
 68 |       echo "  INSTALL_DIR=/opt/lume $0             # Install to /opt/lume (legacy env var support)"
 69 |       exit 0
 70 |       ;;
 71 |     *)
 72 |       echo "${RED}Unknown option: $1${NORMAL}"
 73 |       echo "Use --help for usage information"
 74 |       exit 1
 75 |       ;;
 76 |   esac
 77 |   shift
 78 | done
 79 | 
 80 | echo "${BOLD}${BLUE}Lume Installer${NORMAL}"
 81 | echo "This script will install Lume to your system."
 82 | 
 83 | # Check if we're running with appropriate permissions
 84 | check_permissions() {
 85 |   # System directories that typically require root privileges
 86 |   SYSTEM_DIRS=("/usr/local/bin" "/usr/bin" "/bin" "/opt")
 87 |   
 88 |   NEEDS_ROOT=false
 89 |   for DIR in "${SYSTEM_DIRS[@]}"; do
 90 |     if [[ "$INSTALL_DIR" == "$DIR"* ]] && [ ! -w "$INSTALL_DIR" ]; then
 91 |       NEEDS_ROOT=true
 92 |       break
 93 |     fi
 94 |   done
 95 |   
 96 |   if [ "$NEEDS_ROOT" = true ]; then
 97 |     echo "${YELLOW}Warning: Installing to $INSTALL_DIR may require root privileges.${NORMAL}"
 98 |     echo "Consider these alternatives:"
 99 |     echo "  • Install to a user-writable location: $0 --install-dir=$HOME/.local/bin"
100 |     echo "  • Create the directory with correct permissions first:"
101 |     echo "    sudo mkdir -p $INSTALL_DIR && sudo chown $(whoami) $INSTALL_DIR"
102 |     echo ""
103 |     
104 |     # Check if we already have write permission (might have been set up previously)
105 |     if [ ! -w "$INSTALL_DIR" ] && [ ! -w "$(dirname "$INSTALL_DIR")" ]; then
106 |       echo "${RED}Error: You don't have write permission to $INSTALL_DIR${NORMAL}"
107 |       echo "Please choose a different installation directory or ensure you have the proper permissions."
108 |       exit 1
109 |     fi
110 |   fi
111 | }
112 | 
113 | # Detect OS and architecture
114 | detect_platform() {
115 |   OS=$(uname -s | tr '[:upper:]' '[:lower:]')
116 |   ARCH=$(uname -m)
117 |   
118 |   if [ "$OS" != "darwin" ]; then
119 |     echo "${RED}Error: Currently only macOS is supported.${NORMAL}"
120 |     exit 1
121 |   fi
122 |   
123 |   if [ "$ARCH" != "arm64" ]; then
124 |     echo "${RED}Error: Lume only supports macOS on Apple Silicon (ARM64).${NORMAL}"
125 |     exit 1
126 |   fi
127 |   
128 |   PLATFORM="darwin-arm64"
129 |   echo "Detected platform: ${BOLD}$PLATFORM${NORMAL}"
130 | }
131 | 
132 | # Create temporary directory
133 | create_temp_dir() {
134 |   TEMP_DIR=$(mktemp -d)
135 |   echo "Using temporary directory: $TEMP_DIR"
136 |   
137 |   # Make sure we clean up on exit
138 |   trap 'rm -rf "$TEMP_DIR"' EXIT
139 | }
140 | 
141 | # Download the latest release
142 | download_release() {
143 |   echo "Downloading latest Lume release..."
144 |   
145 |   # Use the direct download link with the non-versioned symlink
146 |   DOWNLOAD_URL="https://github.com/$GITHUB_REPO/releases/latest/download/lume.tar.gz"
147 |   echo "Downloading from: $DOWNLOAD_URL"
148 |   
149 |   # Download the tarball
150 |   if command -v curl &> /dev/null; then
151 |     curl -L --progress-bar "$DOWNLOAD_URL" -o "$TEMP_DIR/lume.tar.gz"
152 |     
153 |     # Verify the download was successful
154 |     if [ ! -s "$TEMP_DIR/lume.tar.gz" ]; then
155 |       echo "${RED}Error: Failed to download Lume.${NORMAL}"
156 |       echo "The download URL may be incorrect or the file may not exist."
157 |       exit 1
158 |     fi
159 |     
160 |     # Verify the file is a valid archive
161 |     if ! tar -tzf "$TEMP_DIR/lume.tar.gz" > /dev/null 2>&1; then
162 |       echo "${RED}Error: The downloaded file is not a valid tar.gz archive.${NORMAL}"
163 |       echo "Let's try the alternative URL..."
164 |       
165 |       # Try alternative URL
166 |       ALT_DOWNLOAD_URL="https://github.com/$GITHUB_REPO/releases/latest/download/lume-$PLATFORM.tar.gz"
167 |       echo "Downloading from alternative URL: $ALT_DOWNLOAD_URL"
168 |       curl -L --progress-bar "$ALT_DOWNLOAD_URL" -o "$TEMP_DIR/lume.tar.gz"
169 |       
170 |       # Check again
171 |       if ! tar -tzf "$TEMP_DIR/lume.tar.gz" > /dev/null 2>&1; then
172 |         echo "${RED}Error: Could not download a valid Lume archive.${NORMAL}"
173 |         echo "Please try installing Lume manually from: https://github.com/$GITHUB_REPO/releases/latest"
174 |         exit 1
175 |       fi
176 |     fi
177 |   else
178 |     echo "${RED}Error: curl is required but not installed.${NORMAL}"
179 |     exit 1
180 |   fi
181 | }
182 | 
183 | # Extract and install
184 | install_binary() {
185 |   echo "Extracting archive..."
186 |   tar -xzf "$TEMP_DIR/lume.tar.gz" -C "$TEMP_DIR"
187 |   
188 |   echo "Installing to $INSTALL_DIR..."
189 |   
190 |   # Create install directory if it doesn't exist
191 |   mkdir -p "$INSTALL_DIR"
192 |   
193 |   # Move the binary to the installation directory
194 |   mv "$TEMP_DIR/lume" "$INSTALL_DIR/"
195 |   
196 |   # Make the binary executable
197 |   chmod +x "$INSTALL_DIR/lume"
198 |   
199 |   echo "${GREEN}Installation complete!${NORMAL}"
200 |   echo "Lume has been installed to ${BOLD}$INSTALL_DIR/lume${NORMAL}"
201 |   
202 |   # Check if the installation directory is in PATH
203 |   if [ -n "${PATH##*$INSTALL_DIR*}" ]; then
204 |     SHELL_NAME=$(basename "$SHELL")
205 |     echo "${YELLOW}Warning: $INSTALL_DIR is not in your PATH.${NORMAL}"
206 |     case "$SHELL_NAME" in
207 |       zsh)
208 |         echo "To add it, run:"
209 |         echo "  echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.zprofile"
210 |         ;;
211 |       bash)
212 |         echo "To add it, run:"
213 |         echo "  echo 'export PATH=\"\$PATH:$INSTALL_DIR\"' >> ~/.bash_profile"
214 |         ;;
215 |       fish)
216 |         echo "To add it, run:"
217 |         echo "  echo 'fish_add_path $INSTALL_DIR' >> ~/.config/fish/config.fish"
218 |         ;;
219 |       *)
220 |         echo "Add $INSTALL_DIR to your PATH in your shell profile file."
221 |         ;;
222 |     esac
223 |   fi
224 | }
225 | 
226 | # Main installation flow
227 | main() {
228 |   check_permissions
229 |   detect_platform
230 |   create_temp_dir
231 |   download_release
232 |   install_binary
233 | 
234 |   echo ""
235 |   echo "${GREEN}${BOLD}Lume has been successfully installed!${NORMAL}"
236 |   echo "Run ${BOLD}lume${NORMAL} to get started."
237 | 
238 |   if [ "$INSTALL_BACKGROUND_SERVICE" = true ]; then
239 |     # --- Setup background service (LaunchAgent) for Lume ---
240 |     SERVICE_NAME="com.trycua.lume_daemon"
241 |     PLIST_PATH="$HOME/Library/LaunchAgents/$SERVICE_NAME.plist"
242 |     LUME_BIN="$INSTALL_DIR/lume"
243 | 
244 |     echo ""
245 |     echo "Setting up LaunchAgent to run lume daemon on login..."
246 | 
247 |     # Create LaunchAgents directory if it doesn't exist
248 |     mkdir -p "$HOME/Library/LaunchAgents"
249 | 
250 |     # Unload existing service if present
251 |     if [ -f "$PLIST_PATH" ]; then
252 |       echo "Existing LaunchAgent found. Unloading..."
253 |       launchctl unload "$PLIST_PATH" 2>/dev/null || true
254 |     fi
255 | 
256 |     # Create the plist file
257 |     cat <<EOF > "$PLIST_PATH"
258 | <?xml version="1.0" encoding="UTF-8"?>
259 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
260 | <plist version="1.0">
261 | <dict>
262 |     <key>Label</key>
263 |     <string>$SERVICE_NAME</string>
264 |     <key>ProgramArguments</key>
265 |     <array>
266 |         <string>$LUME_BIN</string>
267 |         <string>serve</string>
268 |         <string>--port</string>
269 |         <string>$LUME_PORT</string>
270 |     </array>
271 |     <key>RunAtLoad</key>
272 |     <true/>
273 |     <key>KeepAlive</key>
274 |     <true/>
275 |     <key>WorkingDirectory</key>
276 |     <string>$HOME</string>
277 |     <key>EnvironmentVariables</key>
278 |     <dict>
279 |         <key>PATH</key>
280 |         <string>/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$HOME/.local/bin</string>
281 |         <key>HOME</key>
282 |         <string>$HOME</string>
283 |     </dict>
284 |     <key>StandardOutPath</key>
285 |     <string>/tmp/lume_daemon.log</string>
286 |     <key>StandardErrorPath</key>
287 |     <string>/tmp/lume_daemon.error.log</string>
288 |     <key>ProcessType</key>
289 |     <string>Interactive</string>
290 |     <key>SessionType</key>
291 |     <string>Aqua</string>
292 | </dict>
293 | </plist>
294 | EOF
295 | 
296 |     # Set permissions
297 |     chmod 644 "$PLIST_PATH"
298 |     touch /tmp/lume_daemon.log /tmp/lume_daemon.error.log
299 |     chmod 644 /tmp/lume_daemon.log /tmp/lume_daemon.error.log
300 | 
301 |     # Load the LaunchAgent
302 |     echo "Loading LaunchAgent..."
303 |     launchctl unload "$PLIST_PATH" 2>/dev/null || true
304 |     launchctl load "$PLIST_PATH"
305 | 
306 |     echo "${GREEN}Lume daemon LaunchAgent installed and loaded. It will start automatically on login!${NORMAL}"
307 |     echo "To check status: launchctl list | grep $SERVICE_NAME"
308 |     echo "To view logs: tail -f /tmp/lume_daemon.log"
309 |     echo ""
310 |     echo "To remove the lume daemon service, run:"
311 |     echo "  launchctl unload \"$PLIST_PATH\""
312 |     echo "  rm \"$PLIST_PATH\""
313 |   else
314 |     SERVICE_NAME="com.trycua.lume_daemon"
315 |     PLIST_PATH="$HOME/Library/LaunchAgents/$SERVICE_NAME.plist"
316 |     if [ -f "$PLIST_PATH" ]; then
317 |       echo "Removing existing Lume background service (LaunchAgent)..."
318 |       launchctl unload "$PLIST_PATH" 2>/dev/null || true
319 |       rm "$PLIST_PATH"
320 |       echo "Lume background service (LaunchAgent) removed."
321 |     else
322 |       echo "Skipping Lume background service (LaunchAgent) setup as requested (use --no-background-service)."
323 |     fi
324 |   fi
325 | }
326 | 
327 | # Run the installation
328 | main
329 | 
```

--------------------------------------------------------------------------------
/blog/hack-the-north.md:
--------------------------------------------------------------------------------

```markdown
  1 | # What happens when hackathon judging is a public benchmark (Hack the North edition)
  2 | 
  3 | *Written by Francesco Bonacci — Reviewed by Parth Patel (HUD W25) — Sept 25, 2025*
  4 | 
  5 | ## Prologue
  6 | 
  7 | Hack the North ran Sept 12–14 at the University of Waterloo. Official count this year: **1,778 hackers**, and a [Guinness World Record for the most people building interlocking plastic brick sculptures simultaneously](https://uwaterloo.ca/news/eweal-making-hackathons-fun-again-breaking-guinness-world-record).
  8 | 
  9 | Our team arrived from Europe and the US one day before the hackathon, after a summer scattered post–YC X25, waiting for our O-1 visas. **HUD**’s founders Parth and Jay flew in from SF to help us run evaluations, and Michael and Parth from **Ollama** joined as co-sponsors.
 10 | 
 11 | Our plan was ambitious: run the **first state-of-the-art Computer-Use Agents track**, score it on a public benchmark, and give the top performer a guaranteed YC interview. (Interview ≠ offer. YC didn’t judge.)
 12 | 
 13 | The rest, as they say, was a 36h story worth telling—and a playbook worth sharing for anyone thinking about running or sponsoring this type of hackathon track.
 14 | 
 15 | ![hack-cua-ollama-hud](./assets/hack-cua-ollama-hud.jpeg)
 16 | 
 17 | ## The sign-up problem we had to invent
 18 | 
 19 | We joined as a sponsor at the last minute, thanks to a push from our friend @Michael Chiang at Ollama—Waterloo alum, naturally. It’s kind of an open secret that UWaterloo turns out some of the sharpest hackers around (*no pun intended, HackMIT*). It was a bit of a scramble, but also great timing—our Agent framework had just finished a major refactor, with support for **100+ VLM configurations** now live. Naturally, we wanted to stress-test it at scale—and see whether teams could come up with SOTA-level setups. *This wasn’t a blank-slate, build-whatever-you-want kind of track.*
 20 | 
 21 | From day one, though, we knew we’d have to fight for sign-ups. This was a niche track, and a guaranteed YC interview alone wouldn’t be enough to pull people in.
 22 | 
 23 | Unfortunately, Hack the North (HTN) didn’t offer an interest form to help us estimate demand, which made capacity planning tricky—especially with early-stage infra. Stress-testing takes foresight, and multimodal language model usage is still costly (~1.5× to 3–4× the price of comparable text-only models).
 24 | 
 25 | On top of that, we were discouraged from external promotion on [lu.ma](http://lu.ma). So we spun up our own sign-up page at **trycua.com/hackathon** and built ad-hoc Discord channels to share track details. We emphasized—repeatedly—that only students already accepted to Hack the North should register.
 26 | 
 27 | *(Moral: the “measure-zero effect”—no matter how many times you say it, some people won’t see it. Plenty of invalid sign-ups still slipped through.)*
 28 | 
 29 | Even so, having your own form is absolutely worth it: it gives you an **early funnel**, surfaces demand signals ahead of time, and—crucially—**lets you require platform sign-up before kickoff**. In our case, Hack the North didn’t provide Devpost access until the very end, so our form was the only way to build a working roster.
 30 | 
 31 | Only a small trickle of sign-ups came through by the time the event kicked off—too few to plan around, but clearly the right kind of crowd. Several were already familiar with computer-use agents; one was even interning at Shopify, working on this space.
 32 | 
 33 | ## At the Sponsor Booth
 34 | 
 35 | Day 0 on campus made the difference. We arrived a couple of hours early to collect swag shipments (around 1,200 stickers of our new **Cua-la** mascot, plus t-shirts and hats—always plan ~1.5× the estimated number of hackers!). After walking the sponsor floor and explaining the track at our booth, ~40 hackers signed up.
 36 | 
 37 | **Moral:** sponsor booths are still the most effective way to recruit for a track.
 38 | 
 39 | **Suggestions to maximize booth time (for HTN this is only ~24 of the total 36 hours):**
 40 | 
 41 | - **Be unmistakable.** Run a mini-challenge and a visible giveaway. We offered 5 × $200 Anthropic credits as a lightning raffle and constantly advertised in HTN Slack. Shout-out to our neighbors at **Mintlify**, who dressed their teammate as a mint plant - memorable and effective.
 42 | - **Create multiple touchpoints.** Hand out flyers and QR codes, and ask nearby booths to cross-refer. Big thanks to the YC team for flyer space and student connections - and to Michael (Ollama) for pointing visitors our way.
 43 | - **Never leave the booth empty.** Keep someone at the booth at all times and rotate shifts. With four founding engineers on-site, coverage was easy. Even after hacking kicked off, the booth stayed a point of reference - and even then multiple participants DM’d us asking where to meet up.
 44 | - **Students are organic DevRel.** Our runner-up, Adam, hung out with us at the booth, pulling more people in. Peer-to-peer energy creates the network effect you need!
 45 | 
 46 | ![hack-booth](./assets/hack-booth.png)
 47 | 
 48 | *(Our Founding Engineer, Morgan, hangs out with students at the stand, while Adam (runner-up) hacks on the side.)*
 49 | 
 50 | ## 02:30 a.m. is still prime time at a hackathon
 51 | 
 52 | Hack the North gives sponsors a 30-minute API Workshop during the early hours of the event—a perfect moment to shift from talking to building.
 53 | 
 54 | Our slot landed at **2:30 a.m.** (*perks of the cheapest sponsor tier*). Thirty students showed up, energy surprisingly high. James, our new Founding DevRel Engineer, led the session and nailed it.
 55 | 
 56 | **Our track rules were simple:**
 57 | 
 58 | 1. Build a Computer-Use Agent with the [Cua framework](https://github.com/trycua/cua)
 59 | 2. Benchmark the agent on [HUD](https://www.hud.so)
 60 | 3. Use [OSWorld-Tiny](https://huggingface.co/datasets/ddupont/OSWorld-Tiny-Public): a 14-task distillation of the full benchmark (~360 tasks, >1h)
 61 | 
 62 | **Suggestions:**
 63 | 
 64 | - **Leave something tangible.** We provided a Jupyter Notebook teams could run immediately.
 65 | - **Narrow scope, strong starts.** The more focused the challenge, the more **robust starting points** you should provide.
 66 | - **Want the details?** [Here’s the notebook we left participants](https://github.com/trycua/cua/blob/main/notebooks/sota_hackathon.ipynb).
 67 | 
 68 | ![hack-booth](./assets/hack-workshop.jpeg)
 69 | 
 70 | *(Our CUA Workshop at 2:30 AM.)*
 71 | 
 72 | ## Making it possible to focus on the work
 73 | 
 74 | If you’re an OSS framework, it’s tempting to have hackers self-host on laptops. **Don’t.** You’ll spend the workshop debugging setups instead of reviewing ideas.
 75 | 
 76 | **Lesson learned:** within hours, we shifted to **cloud-only Sandboxes**. Payoff: consistent environments, faster starts, far less tech support.
 77 | 
 78 | We provided:
 79 | 
 80 | - **Credits:** $200 Cua Cloud + $200 HUD per team (manual top-ups for visible progress)
 81 | - **LLMs/VLMs:** Anthropic assigned $50 per participant—tight for VLM iteration—so we added capped access under our org
 82 | - **Pre-kickoff provisioning:** Platform sign-up auto-created projects, keys, and sandboxes
 83 | 
 84 | **Takeaway:** every minute not spent on setup is a minute gained for iterating.
 85 | 
 86 | ## 12 Hours in the Hackathon
 87 | 
 88 | **After the workshop buzz.** Morning interest was high, but Docker setup + requiring focus on a single track thinned the crowd. Most sponsor prizes are broad (“use our product and you qualify”), letting students stack tracks. Ours required commitment. Upside: those who stayed shipped sharper, higher-quality submissions.
 89 | 
 90 | **The bell curve of submissions.** Most entries used *claude-sonnet-4-20250514*—proof that docs and public leaderboards ([OSWorld](https://os-world.github.io/#benchmark)) guide choices. Results clustered around the safe pick, with fewer pushing boundaries.
 91 | 
 92 | **Who went beyond the baseline.** A few tried multi-agent/tool graphs. One standout—[**cuala**](https://github.com/YeIIcw/cuala)—was a clean reference: deterministic actions, verifiable state changes, callbacks for saving images and trajectories.
 93 | 
 94 | **Bottom line:** Early excitement is easy; keeping teams engaged requires reducing friction and offering multiple entry points.
 95 | 
 96 | ### What broke (and why)
 97 | 
 98 | We skipped a full end-to-end **Cua × HUD** dry-run. It showed.
 99 | 
100 | - Hackers ran out of inference credits. Desktop tasks are token-heavy. A full OSWorld run (200 max steps) for *computer-use-preview* (OpenAI Operator API) can cost >$600. Serious attempts: ~400k tokens × 14 tasks.
101 | - Python version/build mismatches surfaced, requiring debug time across both OSS repos.
102 | - Our Cua framework lacked a **Response Agent** to complete evaluation loops. Some runs stalled until patched.
103 | 
104 | ## Scoring and Results
105 | 
106 | ### Participation & Outcomes
107 | 
108 | - ~**30** hackers gave the track a serious try; **5** crossed the finish line
109 | - All submissions were **solo**, mostly undergrads
110 | - Judging: OSWorld-Tiny on HUD, with Cua + HUD reruns to verify scores
111 | - Final leaderboard: [HUD Leaderboard](https://www.hud.so/leaderboards/ddupont/OSWorld-Tiny-Public)
112 | 
113 | ![hack-leaderboard](./assets/hack-leaderboard.png)
114 | 
115 | *(Leaderboard on HUD)*
116 | 
117 | ### Winners
118 | 
119 | **🥇 Winner — Ram**  
120 | - Devpost: https://devpost.com/software/sota-computer-use-agent-challenge  
121 | - Code: https://github.com/Ram-Raghav-S/cua/tree/ram  
122 | - Score: 68.3%
123 | 
124 | **🥈 Runner-up — Aryan**  
125 | - Devpost: https://devpost.com/software/loopdeloop-computer-use-agent-sota-attempt  
126 | - Code: https://github.com/Tumph/cua  
127 | - Score: 55.9%
128 | 
129 | **🥉 Special Mention — Adam**  
130 | - Devpost: https://devpost.com/software/cuala  
131 | - Code: https://github.com/YeIIcw/cuala  
132 | - Score: 42.1%
133 | 
134 | ![hack-winners](./assets/hack-winners.jpeg)
135 | 
136 | *(Our finalists before the award ceremony)*
137 | 
138 | ## What We’d Keep
139 | 
140 | - **Sponsor Hack the North again**
141 | - **Keep a visible, staffed booth**
142 | - **Publish a compact FAQ**
143 | - **Simple, transparent scoring**
144 | 
145 | ## What We’d Change
146 | 
147 | - **Run a full Cua × HUD dry-run under load**
148 | - **Offer multiple on-ramps (evals, creative, RL)**
149 | - **Keep a private eval set for judging**
150 | - **Default to cloud sandboxes**
151 | - **Handle ops earlier (swag, signage, QR codes)**
152 | - **Reward generalization, not lucky runs**
153 | 
154 | ## Closing Thoughts
155 | 
156 | Our first outing as sponsors wasn’t perfect, but it gave us a working playbook: **provision cloud early, keep scoring simple, always dry-run infra, and make the booth unforgettable**.
157 | 
158 | If more hackathon tracks leaned on **public benchmarks**, weekends like this would produce fewer demos-for-show and more measurable progress.
159 | 
160 | **P.S.** Huge thanks to the Ollama and HUD teams for co-sponsoring the track, and to our YC Partner Diana for offering a **guaranteed YC interview** as first prize.
161 | 
162 | Whether you’re a hacker who wants to participate, or a company looking to sponsor, let’s talk — we’re especially excited to support benchmark-first hackathon tracks in the Bay Area this year.
163 | 
164 | ![hack-closing-ceremony](./assets/hack-closing-ceremony.jpg)
165 | 
166 | *(HTN Closing Ceremony — Cua Track Winner Announcement)*
```

--------------------------------------------------------------------------------
/libs/typescript/computer/src/interface/base.ts:
--------------------------------------------------------------------------------

```typescript
  1 | /**
  2 |  * Base interface for computer control.
  3 |  */
  4 | 
  5 | import pino from 'pino';
  6 | import WebSocket from 'ws';
  7 | import type { ScreenSize } from '../types';
  8 | 
  9 | export type MouseButton = 'left' | 'middle' | 'right';
 10 | 
 11 | export interface CursorPosition {
 12 |   x: number;
 13 |   y: number;
 14 | }
 15 | 
 16 | export interface AccessibilityNode {
 17 |   role: string;
 18 |   title?: string;
 19 |   value?: string;
 20 |   description?: string;
 21 |   bounds?: {
 22 |     x: number;
 23 |     y: number;
 24 |     width: number;
 25 |     height: number;
 26 |   };
 27 |   children?: AccessibilityNode[];
 28 | }
 29 | 
 30 | /**
 31 |  * Base class for computer control interfaces.
 32 |  */
 33 | export abstract class BaseComputerInterface {
 34 |   protected ipAddress: string;
 35 |   protected username: string;
 36 |   protected password: string;
 37 |   protected closed = false;
 38 |   protected commandLock: Promise<unknown> = Promise.resolve();
 39 |   protected ws: WebSocket;
 40 |   protected apiKey?: string;
 41 |   protected vmName?: string;
 42 | 
 43 |   protected logger = pino({ name: 'computer.interface-base' });
 44 | 
 45 |   constructor(
 46 |     ipAddress: string,
 47 |     username = 'lume',
 48 |     password = 'lume',
 49 |     apiKey?: string,
 50 |     vmName?: string
 51 |   ) {
 52 |     this.ipAddress = ipAddress;
 53 |     this.username = username;
 54 |     this.password = password;
 55 |     this.apiKey = apiKey;
 56 |     this.vmName = vmName;
 57 | 
 58 |     // Initialize WebSocket with headers if needed
 59 |     const headers: { [key: string]: string } = {};
 60 |     if (this.apiKey && this.vmName) {
 61 |       headers['X-API-Key'] = this.apiKey;
 62 |       headers['X-VM-Name'] = this.vmName;
 63 |     }
 64 | 
 65 |     // Create the WebSocket instance
 66 |     this.ws = new WebSocket(this.wsUri, { headers });
 67 |   }
 68 | 
 69 |   /**
 70 |    * Get the WebSocket URI for connection.
 71 |    * Subclasses can override this to customize the URI.
 72 |    */
 73 |   protected get wsUri(): string {
 74 |     const protocol = this.apiKey ? 'wss' : 'ws';
 75 | 
 76 |     // Check if ipAddress already includes a port
 77 |     if (this.ipAddress.includes(':')) {
 78 |       return `${protocol}://${this.ipAddress}/ws`;
 79 |     }
 80 | 
 81 |     // Otherwise, append the default port
 82 |     const port = this.apiKey ? '8443' : '8000';
 83 |     return `${protocol}://${this.ipAddress}:${port}/ws`;
 84 |   }
 85 | 
 86 |   /**
 87 |    * Wait for interface to be ready.
 88 |    * @param timeout Maximum time to wait in seconds
 89 |    * @throws Error if interface is not ready within timeout
 90 |    */
 91 |   async waitForReady(timeout = 60): Promise<void> {
 92 |     const startTime = Date.now();
 93 | 
 94 |     while (Date.now() - startTime < timeout * 1000) {
 95 |       try {
 96 |         await this.connect();
 97 |         return;
 98 |       } catch (error) {
 99 |         console.log(error);
100 |         // Wait a bit before retrying
101 |         this.logger.error(
102 |           `Error connecting to websocket: ${JSON.stringify(error)}`
103 |         );
104 |         await new Promise((resolve) => setTimeout(resolve, 1000));
105 |       }
106 |     }
107 | 
108 |     throw new Error(`Interface not ready after ${timeout} seconds`);
109 |   }
110 | 
111 |   /**
112 |    * Authenticate with the WebSocket server.
113 |    * This should be called immediately after the WebSocket connection is established.
114 |    */
115 |   private async authenticate(): Promise<void> {
116 |     if (!this.apiKey || !this.vmName) {
117 |       // No authentication needed
118 |       return;
119 |     }
120 | 
121 |     this.logger.info('Performing authentication handshake...');
122 |     const authMessage = {
123 |       command: 'authenticate',
124 |       params: {
125 |         api_key: this.apiKey,
126 |         container_name: this.vmName,
127 |       },
128 |     };
129 | 
130 |     return new Promise<void>((resolve, reject) => {
131 |       const authHandler = (data: WebSocket.RawData) => {
132 |         try {
133 |           const authResult = JSON.parse(data.toString());
134 |           if (!authResult.success) {
135 |             const errorMsg = authResult.error || 'Authentication failed';
136 |             this.logger.error(`Authentication failed: ${errorMsg}`);
137 |             this.ws.close();
138 |             reject(new Error(`Authentication failed: ${errorMsg}`));
139 |           } else {
140 |             this.logger.info('Authentication successful');
141 |             this.ws.off('message', authHandler);
142 |             resolve();
143 |           }
144 |         } catch (error) {
145 |           this.ws.off('message', authHandler);
146 |           reject(error);
147 |         }
148 |       };
149 | 
150 |       this.ws.on('message', authHandler);
151 |       this.ws.send(JSON.stringify(authMessage));
152 |     });
153 |   }
154 | 
155 |   /**
156 |    * Connect to the WebSocket server.
157 |    */
158 |   public async connect(): Promise<void> {
159 |     // If the WebSocket is already open, check if we need to authenticate
160 |     if (this.ws.readyState === WebSocket.OPEN) {
161 |       this.logger.info(
162 |         'Websocket is open, ensuring authentication is complete.'
163 |       );
164 |       return this.authenticate();
165 |     }
166 | 
167 |     // If the WebSocket is closed or closing, reinitialize it
168 |     if (
169 |       this.ws.readyState === WebSocket.CLOSED ||
170 |       this.ws.readyState === WebSocket.CLOSING
171 |     ) {
172 |       this.logger.info('Websocket is closed. Reinitializing connection.');
173 |       const headers: { [key: string]: string } = {};
174 |       if (this.apiKey && this.vmName) {
175 |         headers['X-API-Key'] = this.apiKey;
176 |         headers['X-VM-Name'] = this.vmName;
177 |       }
178 |       this.ws = new WebSocket(this.wsUri, { headers });
179 |       return this.authenticate();
180 |     }
181 | 
182 |     // Connect and authenticate
183 |     return new Promise((resolve, reject) => {
184 |       const onOpen = async () => {
185 |         try {
186 |           // Always authenticate immediately after connection
187 |           await this.authenticate();
188 |           resolve();
189 |         } catch (error) {
190 |           reject(error);
191 |         }
192 |       };
193 | 
194 |       // If already connecting, wait for it to complete then authenticate
195 |       if (this.ws.readyState === WebSocket.CONNECTING) {
196 |         this.ws.addEventListener('open', onOpen, { once: true });
197 |         this.ws.addEventListener('error', (error) => reject(error), {
198 |           once: true,
199 |         });
200 |         return;
201 |       }
202 | 
203 |       // Set up event handlers
204 |       this.ws.on('open', onOpen);
205 | 
206 |       this.ws.on('error', (error: Error) => {
207 |         reject(error);
208 |       });
209 | 
210 |       this.ws.on('close', () => {
211 |         if (!this.closed) {
212 |           // Attempt to reconnect
213 |           setTimeout(() => this.connect(), 1000);
214 |         }
215 |       });
216 |     });
217 |   }
218 | 
219 |   /**
220 |    * Send a command to the WebSocket server.
221 |    */
222 |   public async sendCommand(
223 |     command: string,
224 |     params: { [key: string]: unknown } = {}
225 |   ): Promise<{ [key: string]: unknown }> {
226 |     // Create a new promise for this specific command
227 |     const commandPromise = new Promise<{ [key: string]: unknown }>(
228 |       (resolve, reject) => {
229 |         // Chain it to the previous commands
230 |         const executeCommand = async (): Promise<{
231 |           [key: string]: unknown;
232 |         }> => {
233 |           if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
234 |             await this.connect();
235 |           }
236 | 
237 |           return new Promise<{ [key: string]: unknown }>(
238 |             (innerResolve, innerReject) => {
239 |               const messageHandler = (data: WebSocket.RawData) => {
240 |                 try {
241 |                   const response = JSON.parse(data.toString());
242 |                   if (response.error) {
243 |                     innerReject(new Error(response.error));
244 |                   } else {
245 |                     innerResolve(response);
246 |                   }
247 |                 } catch (error) {
248 |                   innerReject(error);
249 |                 }
250 |                 this.ws.off('message', messageHandler);
251 |               };
252 | 
253 |               this.ws.on('message', messageHandler);
254 |               const wsCommand = { command, params };
255 |               this.ws.send(JSON.stringify(wsCommand));
256 |             }
257 |           );
258 |         };
259 | 
260 |         // Add this command to the lock chain
261 |         this.commandLock = this.commandLock.then(() =>
262 |           executeCommand().then(resolve, reject)
263 |         );
264 |       }
265 |     );
266 | 
267 |     return commandPromise;
268 |   }
269 | 
270 |   /**
271 |    * Check if the WebSocket is connected.
272 |    */
273 |   public isConnected(): boolean {
274 |     return this.ws && this.ws.readyState === WebSocket.OPEN;
275 |   }
276 | 
277 |   /**
278 |    * Close the interface connection.
279 |    */
280 |   disconnect(): void {
281 |     this.closed = true;
282 |     if (this.ws && this.ws.readyState === WebSocket.OPEN) {
283 |       this.ws.close();
284 |     } else if (this.ws && this.ws.readyState === WebSocket.CONNECTING) {
285 |       // If still connecting, terminate the connection attempt
286 |       this.ws.terminate();
287 |     }
288 |   }
289 | 
290 |   /**
291 |    * Force close the interface connection.
292 |    * By default, this just calls close(), but subclasses can override
293 |    * to provide more forceful cleanup.
294 |    */
295 |   forceClose(): void {
296 |     this.disconnect();
297 |   }
298 | 
299 |   // Mouse Actions
300 |   abstract mouseDown(
301 |     x?: number,
302 |     y?: number,
303 |     button?: MouseButton
304 |   ): Promise<void>;
305 |   abstract mouseUp(x?: number, y?: number, button?: MouseButton): Promise<void>;
306 |   abstract leftClick(x?: number, y?: number): Promise<void>;
307 |   abstract rightClick(x?: number, y?: number): Promise<void>;
308 |   abstract doubleClick(x?: number, y?: number): Promise<void>;
309 |   abstract moveCursor(x: number, y: number): Promise<void>;
310 |   abstract dragTo(
311 |     x: number,
312 |     y: number,
313 |     button?: MouseButton,
314 |     duration?: number
315 |   ): Promise<void>;
316 |   abstract drag(
317 |     path: Array<[number, number]>,
318 |     button?: MouseButton,
319 |     duration?: number
320 |   ): Promise<void>;
321 | 
322 |   // Keyboard Actions
323 |   abstract keyDown(key: string): Promise<void>;
324 |   abstract keyUp(key: string): Promise<void>;
325 |   abstract typeText(text: string): Promise<void>;
326 |   abstract pressKey(key: string): Promise<void>;
327 |   abstract hotkey(...keys: string[]): Promise<void>;
328 | 
329 |   // Scrolling Actions
330 |   abstract scroll(x: number, y: number): Promise<void>;
331 |   abstract scrollDown(clicks?: number): Promise<void>;
332 |   abstract scrollUp(clicks?: number): Promise<void>;
333 | 
334 |   // Screen Actions
335 |   abstract screenshot(): Promise<Buffer>;
336 |   abstract getScreenSize(): Promise<ScreenSize>;
337 |   abstract getCursorPosition(): Promise<CursorPosition>;
338 | 
339 |   // Clipboard Actions
340 |   abstract copyToClipboard(): Promise<string>;
341 |   abstract setClipboard(text: string): Promise<void>;
342 | 
343 |   // File System Actions
344 |   abstract fileExists(path: string): Promise<boolean>;
345 |   abstract directoryExists(path: string): Promise<boolean>;
346 |   abstract listDir(path: string): Promise<string[]>;
347 |   abstract readText(path: string): Promise<string>;
348 |   abstract writeText(path: string, content: string): Promise<void>;
349 |   abstract readBytes(path: string): Promise<Buffer>;
350 |   abstract writeBytes(path: string, content: Buffer): Promise<void>;
351 |   abstract deleteFile(path: string): Promise<void>;
352 |   abstract createDir(path: string): Promise<void>;
353 |   abstract deleteDir(path: string): Promise<void>;
354 |   abstract runCommand(command: string): Promise<[string, string]>;
355 | 
356 |   // Accessibility Actions
357 |   abstract getAccessibilityTree(): Promise<AccessibilityNode>;
358 |   abstract toScreenCoordinates(x: number, y: number): Promise<[number, number]>;
359 |   abstract toScreenshotCoordinates(
360 |     x: number,
361 |     y: number
362 |   ): Promise<[number, number]>;
363 | }
364 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/integrations/hud/proxy.py:
--------------------------------------------------------------------------------

```python
  1 | """HUD ComputerAgent wrapper and Fake AsyncOpenAI client.
  2 | 
  3 | Provides FakeAsyncOpenAI that adapts our ComputerAgent to the OpenAI Responses
  4 | interface needed by HUD's OperatorAgent. It implements only `responses.create`
  5 | and returns an OpenAI Response object with `id` and `output` fields, where `output` is a list of
  6 | OpenAI-like response blocks. We intentionally only support a single-step call
  7 | by consuming the first yielded result from `ComputerAgent.run()`.
  8 | """
  9 | 
 10 | import traceback
 11 | import time
 12 | import uuid
 13 | from typing import Any, Dict, List, Optional
 14 | 
 15 | from agent.agent import ComputerAgent as BaseComputerAgent
 16 | from agent.callbacks import PromptInstructionsCallback
 17 | from hud.tools.computer.settings import computer_settings
 18 | from PIL import Image
 19 | from hud.agents import OperatorAgent
 20 | 
 21 | # OpenAI Responses typed models (required)
 22 | from openai.types.responses import (
 23 |     Response,
 24 |     ResponseInputParam,
 25 |     ResponseOutputItem,
 26 |     ResponseComputerToolCall,
 27 |     ResponseOutputMessage,
 28 |     ResponseOutputText,
 29 |     ResponseReasoningItem,
 30 |     ResponseUsage,
 31 | )
 32 | 
 33 | def _map_agent_output_to_openai_blocks(output_items: List[Dict[str, Any]]) -> List[ResponseOutputItem]:
 34 |     """Map our agent output items to OpenAI ResponseOutputItem typed models.
 35 | 
 36 |     Only a subset is supported: computer_call, assistant message (text), and reasoning.
 37 |     Unknown types are ignored.
 38 |     """
 39 |     blocks: List[ResponseOutputItem] = []
 40 |     for item in output_items or []:
 41 |         t = item.get("type")
 42 |         if t == "computer_call":
 43 |             comp = ResponseComputerToolCall.model_validate({
 44 |                 "id": item.get("id") or f"cu_{uuid.uuid4().hex}",
 45 |                 "type": "computer_call",
 46 |                 "call_id": item["call_id"],
 47 |                 "action": item["action"],
 48 |                 "pending_safety_checks": item.get("pending_safety_checks", []),
 49 |                 "status": "completed",
 50 |             })
 51 |             blocks.append(comp)
 52 |             # we will exit early here as the responses api only supports a single step
 53 |             break
 54 |         elif t == "message" and item.get("role") == "assistant":
 55 |             content_blocks: List[ResponseOutputText] = []
 56 |             for c in item.get("content", []) or []:
 57 |                 content_blocks.append(
 58 |                     ResponseOutputText.model_validate({
 59 |                         "type": "output_text",
 60 |                         "text": c["text"],
 61 |                         "annotations": [],
 62 |                     })
 63 |                 )
 64 |             if content_blocks:
 65 |                 msg = ResponseOutputMessage.model_validate({
 66 |                     "id": item.get("id") or f"msg_{uuid.uuid4()}",
 67 |                     "type": "message",
 68 |                     "role": "assistant",
 69 |                     "status": "completed",
 70 |                     "content": [ct.model_dump() for ct in content_blocks],
 71 |                 })
 72 |                 blocks.append(msg)
 73 |         elif t == "reasoning":
 74 |             reasoning = ResponseReasoningItem.model_validate({
 75 |                 "id": item.get("id") or f"rsn_{uuid.uuid4()}",
 76 |                 "type": "reasoning",
 77 |                 "summary": item["summary"],
 78 |             })
 79 |             blocks.append(reasoning)
 80 |         # Unhandled types are ignored
 81 |     return blocks
 82 | 
 83 | def _to_plain_dict_list(items: Any) -> List[Dict[str, Any]]:
 84 |     out: List[Dict[str, Any]] = []
 85 |     for it in list(items):
 86 |         if hasattr(it, "model_dump"):
 87 |             out.append(it.model_dump())  # type: ignore[attr-defined]
 88 |         elif isinstance(it, dict):
 89 |             out.append(it)
 90 |         else:
 91 |             # Strict: rely on default __dict__ if present
 92 |             out.append(dict(it))  # may raise if not mapping
 93 |     return out
 94 | 
 95 | class FakeAsyncOpenAI:
 96 |     """Minimal fake OpenAI client with only `responses.create` implemented.
 97 | 
 98 |     It uses a provided `ComputerAgent` instance to produce a single-step
 99 |     response compatible with HUD's OperatorAgent loop.
100 |     """
101 | 
102 |     def __init__(self, computer_agent: BaseComputerAgent) -> None:
103 |         self._agent = computer_agent
104 |         self.responses = self._Responses(self)
105 | 
106 |     class _Responses:
107 |         def __init__(self, parent: "FakeAsyncOpenAI") -> None:
108 |             # Caches for cross-call context when using previous_response_id
109 |             self.blocks_cache: Dict[str, ResponseInputParam | ResponseOutputItem] = {}
110 |             self.context_cache: Dict[str, List[str]] = {}
111 |             self.agent = parent._agent
112 | 
113 |         async def create(
114 |             self,
115 |             *,
116 |             model: str,
117 |             input: ResponseInputParam,
118 |             tools: Optional[List[Dict[str, Any]]] = None,
119 |             instructions: Optional[str] = None,
120 |             previous_response_id: Optional[str] = None,
121 |             max_retries: int = 5,
122 |             **_: Any,
123 |         ) -> Any:
124 |             for attempt in range(max_retries):
125 |                 # Prepend cached blocks from previous_response_id to input
126 |                 full_input = input
127 |                 if previous_response_id is not None:
128 |                     prev_block_ids = self.context_cache[previous_response_id]
129 |                     prev_blocks = [self.blocks_cache[b_id] for b_id in prev_block_ids]
130 |                     full_input = _to_plain_dict_list(prev_blocks + input)
131 | 
132 |                 # Pre-pend instructions message
133 |                 effective_input = full_input
134 |                 if instructions:
135 |                     effective_input = [{
136 |                         "role": "user",
137 |                         "content": instructions,
138 |                     }] + full_input
139 | 
140 |                 # Run a single iteration of the ComputerAgent
141 |                 agent_result: Optional[Dict[str, Any]] = None
142 |                 async for result in self.agent.run(effective_input):  # type: ignore[arg-type]
143 |                     agent_result = result
144 |                     break
145 |                 assert agent_result is not None, "Agent failed to produce result"
146 | 
147 |                 output = _map_agent_output_to_openai_blocks(agent_result["output"])
148 |                 usage = agent_result["usage"]
149 | 
150 |                 # Cache conversation context using the last response id
151 |                 block_ids: List[str] = []
152 |                 blocks_to_cache = full_input + output
153 |                 for b in blocks_to_cache:
154 |                     bid = getattr(b, "id", None) or f"tmp-{hash(repr(b))}"
155 |                     self.blocks_cache[bid] = b # type: ignore[assignment]
156 |                     block_ids.append(bid)
157 |                 response_id = agent_result.get("id") or f"fake-{int(time.time()*1000)}"
158 |                 self.context_cache[response_id] = block_ids
159 | 
160 |                 try:
161 |                     return Response.model_validate({
162 |                         "id": response_id,
163 |                         "created_at": time.time(),
164 |                         "object": "response",
165 |                         "model": model,
166 |                         "output": output,
167 |                         "parallel_tool_calls": False,
168 |                         "tool_choice": "auto",
169 |                         "tools": [],
170 |                         "previous_response_id": previous_response_id,
171 |                         "usage": ResponseUsage.model_validate({
172 |                             "input_tokens": usage.get("input_tokens", 0),
173 |                             "output_tokens": usage.get("output_tokens", 0),
174 |                             "total_tokens": usage.get("total_tokens", 0),
175 |                             "input_tokens_details": usage.get("input_tokens_details", { "cached_tokens": 0 }),
176 |                             "output_tokens_details": usage.get("output_tokens_details", { "reasoning_tokens": 0 }),
177 |                         }),
178 |                     })
179 |                 except Exception as e:
180 |                     print(f"Error while validating agent response (attempt {attempt + 1}/{max_retries}): ", e)
181 |                     if attempt == max_retries - 1:
182 |                         print(traceback.format_exc())
183 |                         raise e
184 | 
185 | 
186 | # ---------------------------------------------------------------------------
187 | # Proxy OperatorAgent (moved from __init__.py)
188 | # ---------------------------------------------------------------------------
189 | 
190 | 
191 | class ProxyOperatorAgent(OperatorAgent):
192 |     """OperatorAgent that proxies model calls through our ComputerAgent.
193 | 
194 |     Accepts the same config keys we pass via hud.run_dataset `agent_config`:
195 |     - model: str | None
196 |     - allowed_tools: list[str] | None
197 |     Additional kwargs are forwarded to OperatorAgent (if any are supported).
198 |     """
199 | 
200 |     def __init__(
201 |         self,
202 |         *,
203 |         model: str | None = None,
204 |         allowed_tools: list[str] | None = None,
205 |         trajectory_dir: str | dict | None = None,
206 |         # === ComputerAgent kwargs ===
207 |         tools: list[Any] | None = None,
208 |         custom_loop: Any | None = None,
209 |         only_n_most_recent_images: int | None = None,
210 |         callbacks: list[Any] | None = None,
211 |         instructions: str | None = None,
212 |         verbosity: int | None = None,
213 |         max_retries: int | None = 3,
214 |         screenshot_delay: float | int = 0.5,
215 |         use_prompt_caching: bool | None = False,
216 |         max_trajectory_budget: float | dict | None = None,
217 |         telemetry_enabled: bool | None = True,
218 |         **kwargs: Any,
219 |     ) -> None:
220 |         model = model or "computer-use-preview"
221 |         allowed_tools = allowed_tools or ["openai_computer"]
222 | 
223 |         computer_shim = {
224 |             'screenshot': lambda: Image.new('RGB', (computer_settings.OPENAI_COMPUTER_WIDTH, computer_settings.OPENAI_COMPUTER_HEIGHT)),
225 |             'environment': 'linux',
226 |             'dimensions': (computer_settings.OPENAI_COMPUTER_WIDTH, computer_settings.OPENAI_COMPUTER_HEIGHT)
227 |         }
228 |         # Build tools ensuring the computer_shim is included
229 |         agent_tools: list[Any] = [computer_shim]
230 |         if tools:
231 |             agent_tools.extend(tools)
232 | 
233 |         # Build callbacks, injecting prompt instructions if provided
234 |         agent_callbacks = list(callbacks or [])
235 |         if instructions:
236 |             agent_callbacks.append(PromptInstructionsCallback(instructions))
237 | 
238 |         computer_agent = BaseComputerAgent(
239 |             model=model,
240 |             tools=agent_tools,
241 |             custom_loop=custom_loop,
242 |             only_n_most_recent_images=only_n_most_recent_images,
243 |             callbacks=agent_callbacks,
244 |             verbosity=verbosity,
245 |             trajectory_dir=trajectory_dir,
246 |             max_retries=max_retries,
247 |             screenshot_delay=screenshot_delay,
248 |             use_prompt_caching=use_prompt_caching,
249 |             max_trajectory_budget=max_trajectory_budget,
250 |             telemetry_enabled=telemetry_enabled,
251 |         )
252 |         model_client = FakeAsyncOpenAI(computer_agent)
253 | 
254 |         super().__init__(
255 |             model_client=model_client,  # type: ignore[arg-type]
256 |             model=model,
257 |             allowed_tools=allowed_tools,
258 |             **kwargs,
259 |         )
260 | 
261 | __all__ = [
262 |     "FakeAsyncOpenAI",
263 |     "ProxyOperatorAgent",
264 | ]
265 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/callbacks/logging.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Logging callback for ComputerAgent that provides configurable logging of agent lifecycle events.
  3 | """
  4 | 
  5 | import json
  6 | import logging
  7 | from typing import Dict, List, Any, Optional, Union
  8 | from .base import AsyncCallbackHandler
  9 | 
 10 | 
 11 | def sanitize_image_urls(data: Any) -> Any:
 12 |     """
 13 |     Recursively search for 'image_url' keys and set their values to '[omitted]'.
 14 |     
 15 |     Args:
 16 |         data: Any data structure (dict, list, or primitive type)
 17 |         
 18 |     Returns:
 19 |         A deep copy of the data with all 'image_url' values replaced with '[omitted]'
 20 |     """
 21 |     if isinstance(data, dict):
 22 |         # Create a copy of the dictionary
 23 |         sanitized = {}
 24 |         for key, value in data.items():
 25 |             if key == "image_url":
 26 |                 sanitized[key] = "[omitted]"
 27 |             else:
 28 |                 # Recursively sanitize the value
 29 |                 sanitized[key] = sanitize_image_urls(value)
 30 |         return sanitized
 31 |     
 32 |     elif isinstance(data, list):
 33 |         # Recursively sanitize each item in the list
 34 |         return [sanitize_image_urls(item) for item in data]
 35 |     
 36 |     else:
 37 |         # For primitive types (str, int, bool, None, etc.), return as-is
 38 |         return data
 39 | 
 40 | 
 41 | class LoggingCallback(AsyncCallbackHandler):
 42 |     """
 43 |     Callback handler that logs agent lifecycle events with configurable verbosity.
 44 |     
 45 |     Logging levels:
 46 |     - DEBUG: All events including API calls, message preprocessing, and detailed outputs
 47 |     - INFO: Major lifecycle events (start/end, messages, outputs)  
 48 |     - WARNING: Only warnings and errors
 49 |     - ERROR: Only errors
 50 |     """
 51 |     
 52 |     def __init__(self, logger: Optional[logging.Logger] = None, level: int = logging.INFO):
 53 |         """
 54 |         Initialize the logging callback.
 55 |         
 56 |         Args:
 57 |             logger: Logger instance to use. If None, creates a logger named 'agent.ComputerAgent'
 58 |             level: Logging level (logging.DEBUG, logging.INFO, etc.)
 59 |         """
 60 |         self.logger = logger or logging.getLogger('agent.ComputerAgent')
 61 |         self.level = level
 62 |         
 63 |         # Set up logger if it doesn't have handlers
 64 |         if not self.logger.handlers:
 65 |             handler = logging.StreamHandler()
 66 |             formatter = logging.Formatter(
 67 |                 '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 68 |             )
 69 |             handler.setFormatter(formatter)
 70 |             self.logger.addHandler(handler)
 71 |             self.logger.setLevel(level)
 72 |     
 73 |     def _update_usage(self, usage: Dict[str, Any]) -> None:
 74 |         """Update total usage statistics."""
 75 |         def add_dicts(target: Dict[str, Any], source: Dict[str, Any]) -> None:
 76 |             for key, value in source.items():
 77 |                 if isinstance(value, dict):
 78 |                     if key not in target:
 79 |                         target[key] = {}
 80 |                     add_dicts(target[key], value)
 81 |                 else:
 82 |                     if key not in target:
 83 |                         target[key] = 0
 84 |                     target[key] += value
 85 |         add_dicts(self.total_usage, usage)
 86 |     
 87 |     async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
 88 |         """Called before the run starts."""
 89 |         self.total_usage = {}
 90 |     
 91 |     async def on_usage(self, usage: Dict[str, Any]) -> None:
 92 |         """Called when usage information is received."""
 93 |         self._update_usage(usage)
 94 | 
 95 |     async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
 96 |         """Called after the run ends."""
 97 |         def format_dict(d, indent=0):
 98 |             lines = []
 99 |             prefix = f" - {' ' * indent}"
100 |             for key, value in d.items():
101 |                 if isinstance(value, dict):
102 |                     lines.append(f"{prefix}{key}:")
103 |                     lines.extend(format_dict(value, indent + 1))
104 |                 elif isinstance(value, float):
105 |                     lines.append(f"{prefix}{key}: ${value:.4f}")
106 |                 else:
107 |                     lines.append(f"{prefix}{key}: {value}")
108 |             return lines
109 |         
110 |         formatted_output = "\n".join(format_dict(self.total_usage))
111 |         self.logger.info(f"Total usage:\n{formatted_output}")
112 |     
113 |     async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
114 |         """Called before LLM processing starts."""
115 |         if self.logger.isEnabledFor(logging.INFO):
116 |             self.logger.info(f"LLM processing started with {len(messages)} messages")
117 |         if self.logger.isEnabledFor(logging.DEBUG):
118 |             sanitized_messages = [sanitize_image_urls(msg) for msg in messages]
119 |             self.logger.debug(f"LLM input messages: {json.dumps(sanitized_messages, indent=2)}")
120 |         return messages
121 |     
122 |     async def on_llm_end(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
123 |         """Called after LLM processing ends."""
124 |         if self.logger.isEnabledFor(logging.DEBUG):
125 |             sanitized_messages = [sanitize_image_urls(msg) for msg in messages]
126 |             self.logger.debug(f"LLM output: {json.dumps(sanitized_messages, indent=2)}")
127 |         return messages
128 |     
129 |     async def on_computer_call_start(self, item: Dict[str, Any]) -> None:
130 |         """Called when a computer call starts."""
131 |         action = item.get("action", {})
132 |         action_type = action.get("type", "unknown")
133 |         action_args = {k: v for k, v in action.items() if k != "type"}
134 |         
135 |         # INFO level logging for the action
136 |         self.logger.info(f"Computer: {action_type}({action_args})")
137 |         
138 |         # DEBUG level logging for full details
139 |         if self.logger.isEnabledFor(logging.DEBUG):
140 |             self.logger.debug(f"Computer call started: {json.dumps(action, indent=2)}")
141 |     
142 |     async def on_computer_call_end(self, item: Dict[str, Any], result: Any) -> None:
143 |         """Called when a computer call ends."""
144 |         if self.logger.isEnabledFor(logging.DEBUG):
145 |             action = item.get("action", "unknown")
146 |             self.logger.debug(f"Computer call completed: {json.dumps(action, indent=2)}")
147 |             if result:
148 |                 sanitized_result = sanitize_image_urls(result)
149 |                 self.logger.debug(f"Computer call result: {json.dumps(sanitized_result, indent=2)}")
150 |     
151 |     async def on_function_call_start(self, item: Dict[str, Any]) -> None:
152 |         """Called when a function call starts."""
153 |         name = item.get("name", "unknown")
154 |         arguments = item.get("arguments", "{}")
155 |         
156 |         # INFO level logging for the function call
157 |         self.logger.info(f"Function: {name}({arguments})")
158 |         
159 |         # DEBUG level logging for full details
160 |         if self.logger.isEnabledFor(logging.DEBUG):
161 |             self.logger.debug(f"Function call started: {name}")
162 |     
163 |     async def on_function_call_end(self, item: Dict[str, Any], result: Any) -> None:
164 |         """Called when a function call ends."""
165 |         # INFO level logging for function output (similar to function_call_output)
166 |         if result:
167 |             # Handle both list and direct result formats
168 |             if isinstance(result, list) and len(result) > 0:
169 |                 output = result[0].get("output", str(result)) if isinstance(result[0], dict) else str(result[0])
170 |             else:
171 |                 output = str(result)
172 |             
173 |             # Truncate long outputs
174 |             if len(output) > 100:
175 |                 output = output[:100] + "..."
176 |             
177 |             self.logger.info(f"Output: {output}")
178 |         
179 |         # DEBUG level logging for full details
180 |         if self.logger.isEnabledFor(logging.DEBUG):
181 |             name = item.get("name", "unknown")
182 |             self.logger.debug(f"Function call completed: {name}")
183 |             if result:
184 |                 self.logger.debug(f"Function call result: {json.dumps(result, indent=2)}")
185 |     
186 |     async def on_text(self, item: Dict[str, Any]) -> None:
187 |         """Called when a text message is encountered."""
188 |         # Get the role to determine if it's Agent or User
189 |         role = item.get("role", "unknown")
190 |         content_items = item.get("content", [])
191 |         
192 |         # Process content items to build display text
193 |         text_parts = []
194 |         for content_item in content_items:
195 |             content_type = content_item.get("type", "output_text")
196 |             if content_type == "output_text":
197 |                 text_content = content_item.get("text", "")
198 |                 if not text_content.strip():
199 |                     text_parts.append("[empty]")
200 |                 else:
201 |                     # Truncate long text and add ellipsis
202 |                     if len(text_content) > 2048:
203 |                         text_parts.append(text_content[:2048] + "...")
204 |                     else:
205 |                         text_parts.append(text_content)
206 |             else:
207 |                 # Non-text content, show as [type]
208 |                 text_parts.append(f"[{content_type}]")
209 |         
210 |         # Join all text parts
211 |         display_text = ''.join(text_parts) if text_parts else "[empty]"
212 |         
213 |         # Log with appropriate level and format
214 |         if role == "assistant":
215 |             self.logger.info(f"Agent: {display_text}")
216 |         elif role == "user":
217 |             self.logger.info(f"User: {display_text}")
218 |         else:
219 |             # Fallback for unknown roles, use debug level
220 |             if self.logger.isEnabledFor(logging.DEBUG):
221 |                 self.logger.debug(f"Text message ({role}): {display_text}")
222 |     
223 |     async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
224 |         """Called when an API call is about to start."""
225 |         if self.logger.isEnabledFor(logging.DEBUG):
226 |             model = kwargs.get("model", "unknown")
227 |             self.logger.debug(f"API call starting for model: {model}")
228 |             # Log sanitized messages if present
229 |             if "messages" in kwargs:
230 |                 sanitized_messages = sanitize_image_urls(kwargs["messages"])
231 |                 self.logger.debug(f"API call messages: {json.dumps(sanitized_messages, indent=2)}")
232 |             elif "input" in kwargs:
233 |                 sanitized_input = sanitize_image_urls(kwargs["input"])
234 |                 self.logger.debug(f"API call input: {json.dumps(sanitized_input, indent=2)}")
235 |     
236 |     async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
237 |         """Called when an API call has completed."""
238 |         if self.logger.isEnabledFor(logging.DEBUG):
239 |             model = kwargs.get("model", "unknown")
240 |             self.logger.debug(f"API call completed for model: {model}")
241 |             self.logger.debug(f"API call result: {json.dumps(sanitize_image_urls(result), indent=2)}")
242 | 
243 |     async def on_screenshot(self, item: Union[str, bytes], name: str = "screenshot") -> None:
244 |         """Called when a screenshot is taken."""
245 |         if self.logger.isEnabledFor(logging.DEBUG):
246 |             image_size = len(item) / 1024
247 |             self.logger.debug(f"Screenshot captured: {name} {image_size:.2f} KB")
```
Page 9/21FirstPrevNextLast