#
tokens: 49854/50000 35/616 files (page 5/28)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 5 of 28. Use http://codebase.md/trycua/cua?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .cursorignore
├── .dockerignore
├── .editorconfig
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── scripts
│   │   ├── get_pyproject_version.py
│   │   └── tests
│   │       ├── __init__.py
│   │       ├── README.md
│   │       └── test_get_pyproject_version.py
│   └── workflows
│       ├── bump-version.yml
│       ├── ci-lume.yml
│       ├── docker-publish-cua-linux.yml
│       ├── docker-publish-cua-windows.yml
│       ├── docker-publish-kasm.yml
│       ├── docker-publish-xfce.yml
│       ├── docker-reusable-publish.yml
│       ├── link-check.yml
│       ├── lint.yml
│       ├── npm-publish-cli.yml
│       ├── npm-publish-computer.yml
│       ├── npm-publish-core.yml
│       ├── publish-lume.yml
│       ├── pypi-publish-agent.yml
│       ├── pypi-publish-computer-server.yml
│       ├── pypi-publish-computer.yml
│       ├── pypi-publish-core.yml
│       ├── pypi-publish-mcp-server.yml
│       ├── pypi-publish-som.yml
│       ├── pypi-reusable-publish.yml
│       ├── python-tests.yml
│       ├── test-cua-models.yml
│       └── test-validation-script.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .prettierignore
├── .prettierrc.yaml
├── .vscode
│   ├── docs.code-workspace
│   ├── extensions.json
│   ├── launch.json
│   ├── libs-ts.code-workspace
│   ├── lume.code-workspace
│   ├── lumier.code-workspace
│   ├── py.code-workspace
│   └── settings.json
├── blog
│   ├── app-use.md
│   ├── assets
│   │   ├── composite-agents.png
│   │   ├── docker-ubuntu-support.png
│   │   ├── hack-booth.png
│   │   ├── hack-closing-ceremony.jpg
│   │   ├── hack-cua-ollama-hud.jpeg
│   │   ├── hack-leaderboard.png
│   │   ├── hack-the-north.png
│   │   ├── hack-winners.jpeg
│   │   ├── hack-workshop.jpeg
│   │   ├── hud-agent-evals.png
│   │   └── trajectory-viewer.jpeg
│   ├── bringing-computer-use-to-the-web.md
│   ├── build-your-own-operator-on-macos-1.md
│   ├── build-your-own-operator-on-macos-2.md
│   ├── cloud-windows-ga-macos-preview.md
│   ├── composite-agents.md
│   ├── computer-use-agents-for-growth-hacking.md
│   ├── cua-hackathon.md
│   ├── cua-playground-preview.md
│   ├── cua-vlm-router.md
│   ├── hack-the-north.md
│   ├── hud-agent-evals.md
│   ├── human-in-the-loop.md
│   ├── introducing-cua-cli.md
│   ├── introducing-cua-cloud-containers.md
│   ├── lume-to-containerization.md
│   ├── neurips-2025-cua-papers.md
│   ├── sandboxed-python-execution.md
│   ├── training-computer-use-models-trajectories-1.md
│   ├── trajectory-viewer.md
│   ├── ubuntu-docker-support.md
│   └── windows-sandbox.md
├── CONTRIBUTING.md
├── Development.md
├── Dockerfile
├── docs
│   ├── .env.example
│   ├── .gitignore
│   ├── content
│   │   └── docs
│   │       ├── agent-sdk
│   │       │   ├── agent-loops.mdx
│   │       │   ├── benchmarks
│   │       │   │   ├── index.mdx
│   │       │   │   ├── interactive.mdx
│   │       │   │   ├── introduction.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── osworld-verified.mdx
│   │       │   │   ├── screenspot-pro.mdx
│   │       │   │   └── screenspot-v2.mdx
│   │       │   ├── callbacks
│   │       │   │   ├── agent-lifecycle.mdx
│   │       │   │   ├── cost-saving.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── logging.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── pii-anonymization.mdx
│   │       │   │   └── trajectories.mdx
│   │       │   ├── chat-history.mdx
│   │       │   ├── custom-tools.mdx
│   │       │   ├── customizing-computeragent.mdx
│   │       │   ├── integrations
│   │       │   │   ├── hud.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── observability.mdx
│   │       │   ├── mcp-server
│   │       │   │   ├── client-integrations.mdx
│   │       │   │   ├── configuration.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── llm-integrations.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── tools.mdx
│   │       │   │   └── usage.mdx
│   │       │   ├── message-format.mdx
│   │       │   ├── meta.json
│   │       │   ├── migration-guide.mdx
│   │       │   ├── prompt-caching.mdx
│   │       │   ├── supported-agents
│   │       │   │   ├── composed-agents.mdx
│   │       │   │   ├── computer-use-agents.mdx
│   │       │   │   ├── grounding-models.mdx
│   │       │   │   ├── human-in-the-loop.mdx
│   │       │   │   └── meta.json
│   │       │   ├── supported-model-providers
│   │       │   │   ├── cua-vlm-router.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   └── local-models.mdx
│   │       │   ├── telemetry.mdx
│   │       │   └── usage-tracking.mdx
│   │       ├── cli-playbook
│   │       │   ├── commands.mdx
│   │       │   ├── index.mdx
│   │       │   └── meta.json
│   │       ├── computer-sdk
│   │       │   ├── cloud-vm-management.mdx
│   │       │   ├── commands.mdx
│   │       │   ├── computer-server
│   │       │   │   ├── Commands.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── meta.json
│   │       │   │   ├── REST-API.mdx
│   │       │   │   └── WebSocket-API.mdx
│   │       │   ├── computer-ui.mdx
│   │       │   ├── computers.mdx
│   │       │   ├── custom-computer-handlers.mdx
│   │       │   ├── meta.json
│   │       │   ├── sandboxed-python.mdx
│   │       │   └── tracing-api.mdx
│   │       ├── example-usecases
│   │       │   ├── form-filling.mdx
│   │       │   ├── gemini-complex-ui-navigation.mdx
│   │       │   ├── meta.json
│   │       │   ├── post-event-contact-export.mdx
│   │       │   └── windows-app-behind-vpn.mdx
│   │       ├── get-started
│   │       │   ├── meta.json
│   │       │   └── quickstart.mdx
│   │       ├── index.mdx
│   │       ├── macos-vm-cli-playbook
│   │       │   ├── lume
│   │       │   │   ├── cli-reference.mdx
│   │       │   │   ├── faq.md
│   │       │   │   ├── http-api.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   ├── meta.json
│   │       │   │   └── prebuilt-images.mdx
│   │       │   ├── lumier
│   │       │   │   ├── building-lumier.mdx
│   │       │   │   ├── docker-compose.mdx
│   │       │   │   ├── docker.mdx
│   │       │   │   ├── index.mdx
│   │       │   │   ├── installation.mdx
│   │       │   │   └── meta.json
│   │       │   └── meta.json
│   │       └── meta.json
│   ├── next.config.mjs
│   ├── package-lock.json
│   ├── package.json
│   ├── pnpm-lock.yaml
│   ├── postcss.config.mjs
│   ├── public
│   │   └── img
│   │       ├── agent_gradio_ui.png
│   │       ├── agent.png
│   │       ├── bg-dark.jpg
│   │       ├── bg-light.jpg
│   │       ├── cli.png
│   │       ├── computer.png
│   │       ├── grounding-with-gemini3.gif
│   │       ├── hero.png
│   │       ├── laminar_trace_example.png
│   │       ├── som_box_threshold.png
│   │       └── som_iou_threshold.png
│   ├── README.md
│   ├── source.config.ts
│   ├── src
│   │   ├── app
│   │   │   ├── (home)
│   │   │   │   ├── [[...slug]]
│   │   │   │   │   └── page.tsx
│   │   │   │   └── layout.tsx
│   │   │   ├── api
│   │   │   │   ├── posthog
│   │   │   │   │   └── [...path]
│   │   │   │   │       └── route.ts
│   │   │   │   └── search
│   │   │   │       └── route.ts
│   │   │   ├── favicon.ico
│   │   │   ├── global.css
│   │   │   ├── layout.config.tsx
│   │   │   ├── layout.tsx
│   │   │   ├── llms.mdx
│   │   │   │   └── [[...slug]]
│   │   │   │       └── route.ts
│   │   │   ├── llms.txt
│   │   │   │   └── route.ts
│   │   │   ├── robots.ts
│   │   │   └── sitemap.ts
│   │   ├── assets
│   │   │   ├── discord-black.svg
│   │   │   ├── discord-white.svg
│   │   │   ├── logo-black.svg
│   │   │   └── logo-white.svg
│   │   ├── components
│   │   │   ├── analytics-tracker.tsx
│   │   │   ├── cookie-consent.tsx
│   │   │   ├── doc-actions-menu.tsx
│   │   │   ├── editable-code-block.tsx
│   │   │   ├── footer.tsx
│   │   │   ├── hero.tsx
│   │   │   ├── iou.tsx
│   │   │   ├── mermaid.tsx
│   │   │   └── page-feedback.tsx
│   │   ├── lib
│   │   │   ├── llms.ts
│   │   │   └── source.ts
│   │   ├── mdx-components.tsx
│   │   └── providers
│   │       └── posthog-provider.tsx
│   └── tsconfig.json
├── examples
│   ├── agent_examples.py
│   ├── agent_ui_examples.py
│   ├── browser_tool_example.py
│   ├── cloud_api_examples.py
│   ├── computer_examples_windows.py
│   ├── computer_examples.py
│   ├── computer_ui_examples.py
│   ├── computer-example-ts
│   │   ├── .env.example
│   │   ├── .gitignore
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── README.md
│   │   ├── src
│   │   │   ├── helpers.ts
│   │   │   └── index.ts
│   │   └── tsconfig.json
│   ├── docker_examples.py
│   ├── evals
│   │   ├── hud_eval_examples.py
│   │   └── wikipedia_most_linked.txt
│   ├── pylume_examples.py
│   ├── sandboxed_functions_examples.py
│   ├── som_examples.py
│   ├── tracing_examples.py
│   ├── utils.py
│   └── winsandbox_example.py
├── img
│   ├── agent_gradio_ui.png
│   ├── agent.png
│   ├── cli.png
│   ├── computer.png
│   ├── logo_black.png
│   └── logo_white.png
├── libs
│   ├── kasm
│   │   ├── Dockerfile
│   │   ├── LICENSE
│   │   ├── README.md
│   │   └── src
│   │       └── ubuntu
│   │           └── install
│   │               └── firefox
│   │                   ├── custom_startup.sh
│   │                   ├── firefox.desktop
│   │                   └── install_firefox.sh
│   ├── lume
│   │   ├── .cursorignore
│   │   ├── CONTRIBUTING.md
│   │   ├── Development.md
│   │   ├── img
│   │   │   └── cli.png
│   │   ├── Package.resolved
│   │   ├── Package.swift
│   │   ├── README.md
│   │   ├── resources
│   │   │   └── lume.entitlements
│   │   ├── scripts
│   │   │   ├── build
│   │   │   │   ├── build-debug.sh
│   │   │   │   ├── build-release-notarized.sh
│   │   │   │   └── build-release.sh
│   │   │   └── install.sh
│   │   ├── src
│   │   │   ├── Commands
│   │   │   │   ├── Clone.swift
│   │   │   │   ├── Config.swift
│   │   │   │   ├── Create.swift
│   │   │   │   ├── Delete.swift
│   │   │   │   ├── Get.swift
│   │   │   │   ├── Images.swift
│   │   │   │   ├── IPSW.swift
│   │   │   │   ├── List.swift
│   │   │   │   ├── Logs.swift
│   │   │   │   ├── Options
│   │   │   │   │   └── FormatOption.swift
│   │   │   │   ├── Prune.swift
│   │   │   │   ├── Pull.swift
│   │   │   │   ├── Push.swift
│   │   │   │   ├── Run.swift
│   │   │   │   ├── Serve.swift
│   │   │   │   ├── Set.swift
│   │   │   │   └── Stop.swift
│   │   │   ├── ContainerRegistry
│   │   │   │   ├── ImageContainerRegistry.swift
│   │   │   │   ├── ImageList.swift
│   │   │   │   └── ImagesPrinter.swift
│   │   │   ├── Errors
│   │   │   │   └── Errors.swift
│   │   │   ├── FileSystem
│   │   │   │   ├── Home.swift
│   │   │   │   ├── Settings.swift
│   │   │   │   ├── VMConfig.swift
│   │   │   │   ├── VMDirectory.swift
│   │   │   │   └── VMLocation.swift
│   │   │   ├── LumeController.swift
│   │   │   ├── Main.swift
│   │   │   ├── Server
│   │   │   │   ├── Handlers.swift
│   │   │   │   ├── HTTP.swift
│   │   │   │   ├── Requests.swift
│   │   │   │   ├── Responses.swift
│   │   │   │   └── Server.swift
│   │   │   ├── Utils
│   │   │   │   ├── CommandRegistry.swift
│   │   │   │   ├── CommandUtils.swift
│   │   │   │   ├── Logger.swift
│   │   │   │   ├── NetworkUtils.swift
│   │   │   │   ├── Path.swift
│   │   │   │   ├── ProcessRunner.swift
│   │   │   │   ├── ProgressLogger.swift
│   │   │   │   ├── String.swift
│   │   │   │   └── Utils.swift
│   │   │   ├── Virtualization
│   │   │   │   ├── DarwinImageLoader.swift
│   │   │   │   ├── DHCPLeaseParser.swift
│   │   │   │   ├── ImageLoaderFactory.swift
│   │   │   │   └── VMVirtualizationService.swift
│   │   │   ├── VM
│   │   │   │   ├── DarwinVM.swift
│   │   │   │   ├── LinuxVM.swift
│   │   │   │   ├── VM.swift
│   │   │   │   ├── VMDetails.swift
│   │   │   │   ├── VMDetailsPrinter.swift
│   │   │   │   ├── VMDisplayResolution.swift
│   │   │   │   └── VMFactory.swift
│   │   │   └── VNC
│   │   │       ├── PassphraseGenerator.swift
│   │   │       └── VNCService.swift
│   │   └── tests
│   │       ├── Mocks
│   │       │   ├── MockVM.swift
│   │       │   ├── MockVMVirtualizationService.swift
│   │       │   └── MockVNCService.swift
│   │       ├── VM
│   │       │   └── VMDetailsPrinterTests.swift
│   │       ├── VMTests.swift
│   │       ├── VMVirtualizationServiceTests.swift
│   │       └── VNCServiceTests.swift
│   ├── lumier
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   └── src
│   │       ├── bin
│   │       │   └── entry.sh
│   │       ├── config
│   │       │   └── constants.sh
│   │       ├── hooks
│   │       │   └── on-logon.sh
│   │       └── lib
│   │           ├── utils.sh
│   │           └── vm.sh
│   ├── python
│   │   ├── agent
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── agent
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── adapters
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── cua_adapter.py
│   │   │   │   │   ├── huggingfacelocal_adapter.py
│   │   │   │   │   ├── human_adapter.py
│   │   │   │   │   ├── mlxvlm_adapter.py
│   │   │   │   │   └── models
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── generic.py
│   │   │   │   │       ├── internvl.py
│   │   │   │   │       ├── opencua.py
│   │   │   │   │       └── qwen2_5_vl.py
│   │   │   │   ├── agent.py
│   │   │   │   ├── callbacks
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── budget_manager.py
│   │   │   │   │   ├── image_retention.py
│   │   │   │   │   ├── logging.py
│   │   │   │   │   ├── operator_validator.py
│   │   │   │   │   ├── pii_anonymization.py
│   │   │   │   │   ├── prompt_instructions.py
│   │   │   │   │   ├── telemetry.py
│   │   │   │   │   └── trajectory_saver.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── computers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cua.py
│   │   │   │   │   └── custom.py
│   │   │   │   ├── decorators.py
│   │   │   │   ├── human_tool
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── server.py
│   │   │   │   │   └── ui.py
│   │   │   │   ├── integrations
│   │   │   │   │   └── hud
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── agent.py
│   │   │   │   │       └── proxy.py
│   │   │   │   ├── loops
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── anthropic.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── composed_grounded.py
│   │   │   │   │   ├── gelato.py
│   │   │   │   │   ├── gemini.py
│   │   │   │   │   ├── generic_vlm.py
│   │   │   │   │   ├── glm45v.py
│   │   │   │   │   ├── gta1.py
│   │   │   │   │   ├── holo.py
│   │   │   │   │   ├── internvl.py
│   │   │   │   │   ├── model_types.csv
│   │   │   │   │   ├── moondream3.py
│   │   │   │   │   ├── omniparser.py
│   │   │   │   │   ├── openai.py
│   │   │   │   │   ├── opencua.py
│   │   │   │   │   ├── uiins.py
│   │   │   │   │   ├── uitars.py
│   │   │   │   │   └── uitars2.py
│   │   │   │   ├── proxy
│   │   │   │   │   ├── examples.py
│   │   │   │   │   └── handlers.py
│   │   │   │   ├── responses.py
│   │   │   │   ├── tools
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── browser_tool.py
│   │   │   │   ├── types.py
│   │   │   │   └── ui
│   │   │   │       ├── __init__.py
│   │   │   │       ├── __main__.py
│   │   │   │       └── gradio
│   │   │   │           ├── __init__.py
│   │   │   │           ├── app.py
│   │   │   │           └── ui_components.py
│   │   │   ├── benchmarks
│   │   │   │   ├── .gitignore
│   │   │   │   ├── contrib.md
│   │   │   │   ├── interactive.py
│   │   │   │   ├── models
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   └── gta1.py
│   │   │   │   ├── README.md
│   │   │   │   ├── ss-pro.py
│   │   │   │   ├── ss-v2.py
│   │   │   │   └── utils.py
│   │   │   ├── example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_computer_agent.py
│   │   ├── bench-ui
│   │   │   ├── bench_ui
│   │   │   │   ├── __init__.py
│   │   │   │   ├── api.py
│   │   │   │   └── child.py
│   │   │   ├── examples
│   │   │   │   ├── folder_example.py
│   │   │   │   ├── gui
│   │   │   │   │   ├── index.html
│   │   │   │   │   ├── logo.svg
│   │   │   │   │   └── styles.css
│   │   │   │   ├── output_overlay.png
│   │   │   │   └── simple_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       └── test_port_detection.py
│   │   ├── computer
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer
│   │   │   │   ├── __init__.py
│   │   │   │   ├── computer.py
│   │   │   │   ├── diorama_computer.py
│   │   │   │   ├── helpers.py
│   │   │   │   ├── interface
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   ├── models.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── logger.py
│   │   │   │   ├── models.py
│   │   │   │   ├── providers
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cloud
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── docker
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── lume
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── lume_api.py
│   │   │   │   │   ├── lumier
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── provider.py
│   │   │   │   │   ├── types.py
│   │   │   │   │   └── winsandbox
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── provider.py
│   │   │   │   │       └── setup_script.ps1
│   │   │   │   ├── tracing_wrapper.py
│   │   │   │   ├── tracing.py
│   │   │   │   ├── ui
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   └── gradio
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── app.py
│   │   │   │   └── utils.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_computer.py
│   │   ├── computer-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── computer_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── browser.py
│   │   │   │   ├── cli.py
│   │   │   │   ├── diorama
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── diorama_computer.py
│   │   │   │   │   ├── diorama.py
│   │   │   │   │   ├── draw.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── safezone.py
│   │   │   │   ├── handlers
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── factory.py
│   │   │   │   │   ├── generic.py
│   │   │   │   │   ├── linux.py
│   │   │   │   │   ├── macos.py
│   │   │   │   │   └── windows.py
│   │   │   │   ├── main.py
│   │   │   │   ├── server.py
│   │   │   │   ├── utils
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── wallpaper.py
│   │   │   │   └── watchdog.py
│   │   │   ├── examples
│   │   │   │   ├── __init__.py
│   │   │   │   └── usage_example.py
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   ├── run_server.py
│   │   │   ├── test_connection.py
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_server.py
│   │   ├── core
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── core
│   │   │   │   ├── __init__.py
│   │   │   │   └── telemetry
│   │   │   │       ├── __init__.py
│   │   │   │       └── posthog.py
│   │   │   ├── poetry.toml
│   │   │   ├── pyproject.toml
│   │   │   ├── README.md
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_telemetry.py
│   │   ├── mcp-server
│   │   │   ├── .bumpversion.cfg
│   │   │   ├── build-extension.py
│   │   │   ├── CONCURRENT_SESSIONS.md
│   │   │   ├── desktop-extension
│   │   │   │   ├── cua-extension.mcpb
│   │   │   │   ├── desktop_extension.png
│   │   │   │   ├── manifest.json
│   │   │   │   ├── README.md
│   │   │   │   ├── requirements.txt
│   │   │   │   ├── run_server.sh
│   │   │   │   └── setup.py
│   │   │   ├── mcp_server
│   │   │   │   ├── __init__.py
│   │   │   │   ├── __main__.py
│   │   │   │   ├── server.py
│   │   │   │   └── session_manager.py
│   │   │   ├── pdm.lock
│   │   │   ├── pyproject.toml
│   │   │   ├── QUICK_TEST_COMMANDS.sh
│   │   │   ├── quick_test_local_option.py
│   │   │   ├── README.md
│   │   │   ├── scripts
│   │   │   │   ├── install_mcp_server.sh
│   │   │   │   └── start_mcp_server.sh
│   │   │   ├── test_mcp_server_local_option.py
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_mcp_server.py
│   │   ├── pylume
│   │   │   └── tests
│   │   │       ├── conftest.py
│   │   │       └── test_pylume.py
│   │   └── som
│   │       ├── .bumpversion.cfg
│   │       ├── LICENSE
│   │       ├── poetry.toml
│   │       ├── pyproject.toml
│   │       ├── README.md
│   │       ├── som
│   │       │   ├── __init__.py
│   │       │   ├── detect.py
│   │       │   ├── detection.py
│   │       │   ├── models.py
│   │       │   ├── ocr.py
│   │       │   ├── util
│   │       │   │   └── utils.py
│   │       │   └── visualization.py
│   │       └── tests
│   │           ├── conftest.py
│   │           └── test_omniparser.py
│   ├── qemu-docker
│   │   ├── linux
│   │   │   ├── Dockerfile
│   │   │   ├── README.md
│   │   │   └── src
│   │   │       ├── entry.sh
│   │   │       └── vm
│   │   │           ├── image
│   │   │           │   └── README.md
│   │   │           └── setup
│   │   │               ├── install.sh
│   │   │               ├── setup-cua-server.sh
│   │   │               └── setup.sh
│   │   ├── README.md
│   │   └── windows
│   │       ├── Dockerfile
│   │       ├── README.md
│   │       └── src
│   │           ├── entry.sh
│   │           └── vm
│   │               ├── image
│   │               │   └── README.md
│   │               └── setup
│   │                   ├── install.bat
│   │                   ├── on-logon.ps1
│   │                   ├── setup-cua-server.ps1
│   │                   ├── setup-utils.psm1
│   │                   └── setup.ps1
│   ├── typescript
│   │   ├── .gitignore
│   │   ├── .nvmrc
│   │   ├── agent
│   │   │   ├── examples
│   │   │   │   ├── playground-example.html
│   │   │   │   └── README.md
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── client.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   └── client.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── computer
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── computer
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── providers
│   │   │   │   │   │   ├── base.ts
│   │   │   │   │   │   ├── cloud.ts
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── base.ts
│   │   │   │   │   ├── factory.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── linux.ts
│   │   │   │   │   ├── macos.ts
│   │   │   │   │   └── windows.ts
│   │   │   │   └── types.ts
│   │   │   ├── tests
│   │   │   │   ├── computer
│   │   │   │   │   └── cloud.test.ts
│   │   │   │   ├── interface
│   │   │   │   │   ├── factory.test.ts
│   │   │   │   │   ├── index.test.ts
│   │   │   │   │   ├── linux.test.ts
│   │   │   │   │   ├── macos.test.ts
│   │   │   │   │   └── windows.test.ts
│   │   │   │   └── setup.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── core
│   │   │   ├── .editorconfig
│   │   │   ├── .gitattributes
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── index.ts
│   │   │   │   └── telemetry
│   │   │   │       ├── clients
│   │   │   │       │   ├── index.ts
│   │   │   │       │   └── posthog.ts
│   │   │   │       └── index.ts
│   │   │   ├── tests
│   │   │   │   └── telemetry.test.ts
│   │   │   ├── tsconfig.json
│   │   │   ├── tsdown.config.ts
│   │   │   └── vitest.config.ts
│   │   ├── cua-cli
│   │   │   ├── .gitignore
│   │   │   ├── .prettierrc
│   │   │   ├── bun.lock
│   │   │   ├── CLAUDE.md
│   │   │   ├── index.ts
│   │   │   ├── package.json
│   │   │   ├── README.md
│   │   │   ├── src
│   │   │   │   ├── auth.ts
│   │   │   │   ├── cli.ts
│   │   │   │   ├── commands
│   │   │   │   │   ├── auth.ts
│   │   │   │   │   └── sandbox.ts
│   │   │   │   ├── config.ts
│   │   │   │   ├── http.ts
│   │   │   │   ├── storage.ts
│   │   │   │   └── util.ts
│   │   │   └── tsconfig.json
│   │   ├── package.json
│   │   ├── pnpm-lock.yaml
│   │   ├── pnpm-workspace.yaml
│   │   └── README.md
│   └── xfce
│       ├── .dockerignore
│       ├── .gitignore
│       ├── Development.md
│       ├── Dockerfile
│       ├── Dockerfile.dev
│       ├── README.md
│       └── src
│           ├── scripts
│           │   ├── resize-display.sh
│           │   ├── start-computer-server.sh
│           │   ├── start-novnc.sh
│           │   ├── start-vnc.sh
│           │   └── xstartup.sh
│           ├── supervisor
│           │   └── supervisord.conf
│           └── xfce-config
│               ├── helpers.rc
│               ├── xfce4-power-manager.xml
│               └── xfce4-session.xml
├── LICENSE.md
├── Makefile
├── notebooks
│   ├── agent_nb.ipynb
│   ├── blog
│   │   ├── build-your-own-operator-on-macos-1.ipynb
│   │   └── build-your-own-operator-on-macos-2.ipynb
│   ├── composite_agents_docker_nb.ipynb
│   ├── computer_nb.ipynb
│   ├── computer_server_nb.ipynb
│   ├── customizing_computeragent.ipynb
│   ├── eval_osworld.ipynb
│   ├── ollama_nb.ipynb
│   ├── README.md
│   ├── sota_hackathon_cloud.ipynb
│   └── sota_hackathon.ipynb
├── package-lock.json
├── package.json
├── pnpm-lock.yaml
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── scripts
│   ├── install-cli.ps1
│   ├── install-cli.sh
│   ├── playground-docker.sh
│   ├── playground.sh
│   ├── run-docker-dev.sh
│   └── typescript-typecheck.js
├── TESTING.md
├── tests
│   ├── agent_loop_testing
│   │   ├── agent_test.py
│   │   └── README.md
│   ├── pytest.ini
│   ├── shell_cmd.py
│   ├── test_files.py
│   ├── test_mcp_server_session_management.py
│   ├── test_mcp_server_streaming.py
│   ├── test_shell_bash.py
│   ├── test_telemetry.py
│   ├── test_tracing.py
│   ├── test_venv.py
│   └── test_watchdog.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/libs/typescript/computer/src/computer/providers/base.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import os from 'node:os';
  2 | import { Telemetry } from '@trycua/core';
  3 | import pino from 'pino';
  4 | import type { OSType } from '../../types';
  5 | import type { BaseComputerConfig, Display, VMProviderType } from '../types';
  6 | 
  7 | const logger = pino({ name: 'computer.provider_base' });
  8 | 
  9 | /**
 10 |  * Base Computer class with shared functionality
 11 |  */
 12 | export abstract class BaseComputer {
 13 |   protected name: string;
 14 |   protected osType: OSType;
 15 |   protected vmProvider?: VMProviderType;
 16 |   protected telemetry: Telemetry;
 17 | 
 18 |   constructor(config: BaseComputerConfig) {
 19 |     this.name = config.name;
 20 |     this.osType = config.osType;
 21 |     this.telemetry = new Telemetry();
 22 |     this.telemetry.recordEvent('module_init', {
 23 |       module: 'computer',
 24 |       version: process.env.npm_package_version,
 25 |       node_version: process.version,
 26 |     });
 27 | 
 28 |     this.telemetry.recordEvent('computer_initialized', {
 29 |       os: os.platform(),
 30 |       os_version: os.version(),
 31 |       node_version: process.version,
 32 |     });
 33 |   }
 34 | 
 35 |   /**
 36 |    * Get the name of the computer
 37 |    */
 38 |   getName(): string {
 39 |     return this.name;
 40 |   }
 41 | 
 42 |   /**
 43 |    * Get the OS type of the computer
 44 |    */
 45 |   getOSType(): OSType {
 46 |     return this.osType;
 47 |   }
 48 | 
 49 |   /**
 50 |    * Get the VM provider type
 51 |    */
 52 |   getVMProviderType(): VMProviderType | undefined {
 53 |     return this.vmProvider;
 54 |   }
 55 | 
 56 |   /**
 57 |    * Shared method available to all computer types
 58 |    */
 59 |   async disconnect(): Promise<void> {
 60 |     logger.info(`Disconnecting from ${this.name}`);
 61 |     // Implementation would go here
 62 |   }
 63 | 
 64 |   /**
 65 |    * Parse display string into Display object
 66 |    * @param display Display string in format "WIDTHxHEIGHT"
 67 |    * @returns Display object
 68 |    */
 69 |   public static parseDisplayString(display: string): Display {
 70 |     const match = display.match(/^(\d+)x(\d+)$/);
 71 |     if (!match) {
 72 |       throw new Error(`Invalid display format: ${display}. Expected format: WIDTHxHEIGHT`);
 73 |     }
 74 | 
 75 |     return {
 76 |       width: Number.parseInt(match[1], 10),
 77 |       height: Number.parseInt(match[2], 10),
 78 |     };
 79 |   }
 80 | 
 81 |   /**
 82 |    * Parse memory string to MB integer.
 83 |    *
 84 |    * Examples:
 85 |    *   "8GB" -> 8192
 86 |    *   "1024MB" -> 1024
 87 |    *   "512" -> 512
 88 |    *
 89 |    * @param memoryStr - Memory string to parse
 90 |    * @returns Memory value in MB
 91 |    */
 92 |   public static parseMemoryString(memoryStr: string): number {
 93 |     if (!memoryStr) {
 94 |       return 0;
 95 |     }
 96 | 
 97 |     // Convert to uppercase for case-insensitive matching
 98 |     const upperStr = memoryStr.toUpperCase().trim();
 99 | 
100 |     // Extract numeric value and unit
101 |     const match = upperStr.match(/^(\d+(?:\.\d+)?)\s*(GB|MB)?$/);
102 |     if (!match) {
103 |       throw new Error(`Invalid memory format: ${memoryStr}`);
104 |     }
105 | 
106 |     const value = Number.parseFloat(match[1]);
107 |     const unit = match[2] || 'MB'; // Default to MB if no unit specified
108 | 
109 |     // Convert to MB
110 |     if (unit === 'GB') {
111 |       return Math.round(value * 1024);
112 |     }
113 |     return Math.round(value);
114 |   }
115 | }
116 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/computer-sdk/computer-server/Commands.mdx:
--------------------------------------------------------------------------------

```markdown
 1 | ---
 2 | title: Supported Commands
 3 | description: List of all commands supported by the Computer Server API (WebSocket and REST).
 4 | ---
 5 | 
 6 | # Commands Reference
 7 | 
 8 | This page lists all supported commands for the Computer Server, available via both WebSocket and REST API endpoints.
 9 | 
10 | | Command                | Description                           |
11 | | ---------------------- | ------------------------------------- |
12 | | version                | Get protocol and package version info |
13 | | run_command            | Run a shell command                   |
14 | | screenshot             | Capture a screenshot                  |
15 | | get_screen_size        | Get the screen size                   |
16 | | get_cursor_position    | Get the current mouse cursor position |
17 | | mouse_down             | Mouse button down                     |
18 | | mouse_up               | Mouse button up                       |
19 | | left_click             | Left mouse click                      |
20 | | right_click            | Right mouse click                     |
21 | | double_click           | Double mouse click                    |
22 | | move_cursor            | Move mouse cursor to coordinates      |
23 | | drag_to                | Drag mouse to coordinates             |
24 | | drag                   | Drag mouse by offset                  |
25 | | key_down               | Keyboard key down                     |
26 | | key_up                 | Keyboard key up                       |
27 | | type_text              | Type text                             |
28 | | press_key              | Press a single key                    |
29 | | hotkey                 | Press a hotkey combination            |
30 | | scroll                 | Scroll the screen                     |
31 | | scroll_down            | Scroll down                           |
32 | | scroll_up              | Scroll up                             |
33 | | copy_to_clipboard      | Copy text to clipboard                |
34 | | set_clipboard          | Set clipboard content                 |
35 | | file_exists            | Check if a file exists                |
36 | | directory_exists       | Check if a directory exists           |
37 | | list_dir               | List files/directories in a directory |
38 | | read_text              | Read text from a file                 |
39 | | write_text             | Write text to a file                  |
40 | | read_bytes             | Read bytes from a file                |
41 | | write_bytes            | Write bytes to a file                 |
42 | | get_file_size          | Get file size                         |
43 | | delete_file            | Delete a file                         |
44 | | create_dir             | Create a directory                    |
45 | | delete_dir             | Delete a directory                    |
46 | | get_accessibility_tree | Get accessibility tree (if supported) |
47 | | find_element           | Find element in accessibility tree    |
48 | | diorama_cmd            | Run a diorama command (if supported)  |
49 | 
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/logger.py:
--------------------------------------------------------------------------------

```python
 1 | """Logging utilities for the Computer module."""
 2 | 
 3 | import logging
 4 | from enum import IntEnum
 5 | 
 6 | 
 7 | # Keep LogLevel for backward compatibility, but it will be deprecated
 8 | class LogLevel(IntEnum):
 9 |     """Log levels for logging. Deprecated - use standard logging levels instead."""
10 | 
11 |     QUIET = 0  # Only warnings and errors
12 |     NORMAL = 1  # Info level, standard output
13 |     VERBOSE = 2  # More detailed information
14 |     DEBUG = 3  # Full debug information
15 | 
16 | 
17 | # Map LogLevel to standard logging levels for backward compatibility
18 | LOGLEVEL_MAP = {
19 |     LogLevel.QUIET: logging.WARNING,
20 |     LogLevel.NORMAL: logging.INFO,
21 |     LogLevel.VERBOSE: logging.DEBUG,
22 |     LogLevel.DEBUG: logging.DEBUG,
23 | }
24 | 
25 | 
26 | class Logger:
27 |     """Logger class for Computer."""
28 | 
29 |     def __init__(self, name: str, verbosity: int):
30 |         """Initialize the logger.
31 | 
32 |         Args:
33 |             name: The name of the logger.
34 |             verbosity: The log level (use standard logging levels like logging.INFO).
35 |                        For backward compatibility, LogLevel enum values are also accepted.
36 |         """
37 |         self.logger = logging.getLogger(name)
38 | 
39 |         # Convert LogLevel enum to standard logging level if needed
40 |         if isinstance(verbosity, LogLevel):
41 |             self.verbosity = LOGLEVEL_MAP.get(verbosity, logging.INFO)
42 |         else:
43 |             self.verbosity = verbosity
44 | 
45 |         self._configure()
46 | 
47 |     def _configure(self):
48 |         """Configure the logger based on log level."""
49 |         # Set the logging level directly
50 |         self.logger.setLevel(self.verbosity)
51 | 
52 |         # Log the verbosity level that was set
53 |         if self.verbosity <= logging.DEBUG:
54 |             self.logger.info("Logger set to DEBUG level")
55 |         elif self.verbosity <= logging.INFO:
56 |             self.logger.info("Logger set to INFO level")
57 |         elif self.verbosity <= logging.WARNING:
58 |             self.logger.warning("Logger set to WARNING level")
59 |         elif self.verbosity <= logging.ERROR:
60 |             self.logger.warning("Logger set to ERROR level")
61 |         elif self.verbosity <= logging.CRITICAL:
62 |             self.logger.warning("Logger set to CRITICAL level")
63 | 
64 |     def debug(self, message: str):
65 |         """Log a debug message if log level is DEBUG or lower."""
66 |         self.logger.debug(message)
67 | 
68 |     def info(self, message: str):
69 |         """Log an info message if log level is INFO or lower."""
70 |         self.logger.info(message)
71 | 
72 |     def verbose(self, message: str):
73 |         """Log a verbose message between INFO and DEBUG levels."""
74 |         # Since there's no standard verbose level,
75 |         # use debug level with [VERBOSE] prefix for backward compatibility
76 |         self.logger.debug(f"[VERBOSE] {message}")
77 | 
78 |     def warning(self, message: str):
79 |         """Log a warning message."""
80 |         self.logger.warning(message)
81 | 
82 |     def error(self, message: str):
83 |         """Log an error message."""
84 |         self.logger.error(message)
85 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/computer-sdk/sandboxed-python.mdx:
--------------------------------------------------------------------------------

```markdown
 1 | ---
 2 | title: Sandboxed Python
 3 | slug: sandboxed-python
 4 | ---
 5 | 
 6 | <Callout>
 7 |   A corresponding <a href="https://github.com/trycua/cua/blob/main/examples/sandboxed_functions_examples.py" target="_blank">Python example</a> is available for this documentation.
 8 | </Callout>
 9 | 
10 | You can run Python functions securely inside a sandboxed virtual environment on a remote Cua Computer. This is useful for executing untrusted user code, isolating dependencies, or providing a safe environment for automation tasks.
11 | 
12 | ## How It Works
13 | 
14 | The `sandboxed` decorator from the Computer SDK wraps a Python function so that it is executed remotely in a specified virtual environment on the target Computer. The function and its arguments are serialized, sent to the remote, and executed in isolation. Results or errors are returned to the caller.
15 | 
16 | ## Example Usage
17 | 
18 | ```python
19 | from computer import Computer
20 | from computer.helpers import sandboxed
21 | 
22 | @sandboxed()
23 | def read_file(location: str) -> str:
24 |     """Read contents of a file"""
25 |     with open(location, 'r') as f:
26 |         return f.read()
27 | 
28 | async def main():
29 |     async with Computer(os_type="linux", provider_type="cloud", name="my-sandbox", api_key="...") as computer:
30 |         # Call the sandboxed function (runs remotely)
31 |         result = await read_file("/etc/hostname")
32 |         print(result)
33 | ```
34 | 
35 | ## Installing Python Packages
36 | 
37 | You can specify the virtual environment name and target computer:
38 | 
39 | ```python
40 | @sandboxed(venv_name="myenv", computer=my_computer, max_retries=5)
41 | def my_function(...):
42 |     ...
43 | ```
44 | 
45 | You can also install packages in the virtual environment using the `venv_install` method:
46 | 
47 | ```python
48 | await my_computer.venv_install("myenv", ["requests"])
49 | ```
50 | 
51 | ## Example: Interacting with macOS Applications
52 | 
53 | You can use sandboxed functions to interact with macOS applications on a local Cua Computer (requires `os_type="darwin"`). This is particularly useful for automation tasks that involve GUI applications.
54 | 
55 | ```python
56 | # Example: Use sandboxed functions to execute code in a Cua Sandbox
57 | from computer.helpers import sandboxed
58 | 
59 | await computer.venv_install("demo_venv", ["macos-pyxa"]) # Install packages in a virtual environment
60 | 
61 | @sandboxed("demo_venv")
62 | def greet_and_print(name):
63 |     """Get the HTML of the current Safari tab"""
64 |     import PyXA
65 |     safari = PyXA.Application("Safari")
66 |     html = safari.current_document.source()
67 |     print(f"Hello from inside the sandbox, {name}!")
68 |     return {"greeted": name, "safari_html": html}
69 | 
70 | # When a @sandboxed function is called, it will execute in the sandbox
71 | result = await greet_and_print("Cua")
72 | # Result: {"greeted": "Cua", "safari_html": "<html>...</html>"}
73 | # stdout and stderr are also captured and printed / raised
74 | print("Result from sandboxed function:", result)
75 | ```
76 | 
77 | ## Error Handling
78 | 
79 | If the remote execution fails, the decorator will retry up to `max_retries` times. If all attempts fail, the last exception is raised locally.
80 | 
```

--------------------------------------------------------------------------------
/libs/python/mcp-server/desktop-extension/manifest.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "manifest_version": "0.2",
 3 |   "name": "cua-mcp-server",
 4 |   "display_name": "CUA Computer-Use Agent",
 5 |   "version": "1.0.0",
 6 |   "description": "Computer-Use Agent (CUA) MCP server for desktop automation and interaction",
 7 |   "long_description": "The CUA Computer-Use Agent extension provides powerful desktop automation capabilities through Claude Desktop. It can take screenshots, interact with applications, and execute complex computer tasks using AI agents. Perfect for automating repetitive desktop workflows, testing applications, and performing computer-based tasks through natural language instructions.",
 8 |   "author": {
 9 |     "name": "Cua",
10 |     "email": "[email protected]",
11 |     "url": "https://trycua.com"
12 |   },
13 |   "repository": {
14 |     "type": "git",
15 |     "url": "https://github.com/trycua/cua"
16 |   },
17 |   "homepage": "https://trycua.com",
18 |   "documentation": "https://docs.trycua.com",
19 |   "support": "https://github.com/trycua/cua/issues",
20 |   "icon": "desktop_extension.png",
21 |   "server": {
22 |     "type": "python",
23 |     "entry_point": "server.py",
24 |     "mcp_config": {
25 |       "command": "${__dirname}/run_server.sh",
26 |       "args": ["${__dirname}/server.py"],
27 |       "env": {
28 |         "PYTHONPATH": "${__dirname}",
29 |         "API_KEY": "${user_config.api_key}",
30 |         "CUA_MODEL_NAME": "${user_config.model_name}",
31 |         "CUA_MAX_IMAGES": "${user_config.max_images}"
32 |       }
33 |     }
34 |   },
35 |   "tools": [
36 |     {
37 |       "name": "screenshot_cua",
38 |       "description": "Take a screenshot of the current desktop screen and return the image"
39 |     },
40 |     {
41 |       "name": "run_cua_task",
42 |       "description": "Run a Computer-Use Agent task on the desktop and return the result with screenshot"
43 |     },
44 |     {
45 |       "name": "run_multi_cua_tasks",
46 |       "description": "Run multiple Computer-Use Agent tasks sequentially or concurrently"
47 |     },
48 |     {
49 |       "name": "get_session_stats",
50 |       "description": "Get statistics about active sessions and resource usage"
51 |     },
52 |     {
53 |       "name": "cleanup_session",
54 |       "description": "Cleanup a specific session and release its resources"
55 |     }
56 |   ],
57 |   "keywords": ["automation", "computer-use", "desktop", "ai-agent", "productivity"],
58 |   "license": "MIT",
59 |   "user_config": {
60 |     "api_key": {
61 |       "type": "string",
62 |       "title": "API Key",
63 |       "description": "Your API key for the AI model (Anthropic, OpenAI, etc.)",
64 |       "sensitive": true,
65 |       "required": true
66 |     },
67 |     "model_name": {
68 |       "type": "string",
69 |       "title": "Model Name",
70 |       "description": "The AI model to use for computer tasks (e.g., anthropic/claude-sonnet-4-20250514, openai/gpt-4o)",
71 |       "default": "anthropic/claude-sonnet-4-20250514",
72 |       "required": false
73 |     },
74 |     "max_images": {
75 |       "type": "number",
76 |       "title": "Maximum Images",
77 |       "description": "Maximum number of recent images to keep in context (default: 3)",
78 |       "default": 3,
79 |       "min": 1,
80 |       "max": 10,
81 |       "required": false
82 |     }
83 |   }
84 | }
85 | 
```

--------------------------------------------------------------------------------
/libs/lume/tests/VNCServiceTests.swift:
--------------------------------------------------------------------------------

```swift
 1 | import Foundation
 2 | import Testing
 3 | @testable import lume
 4 | 
 5 | @Test("VNCService starts correctly")
 6 | func testVNCServiceStart() async throws {
 7 |     let tempDir = try createTempDirectory()
 8 |     let vmDir = VMDirectory(Path(tempDir.path))
 9 |     let service = await MockVNCService(vmDirectory: vmDir)
10 |     
11 |     // Initial state
12 |     let isRunning = await service.isRunning
13 |     let url = await service.url
14 |     #expect(!isRunning)
15 |     #expect(url == nil)
16 |     
17 |     // Start service
18 |     try await service.start(port: 5900, virtualMachine: nil)
19 |     #expect(await service.isRunning)
20 |     #expect(await service.url?.contains("5900") ?? false)
21 | }
22 | 
23 | @Test("VNCService stops correctly")
24 | func testVNCServiceStop() async throws {
25 |     let tempDir = try createTempDirectory()
26 |     let vmDir = VMDirectory(Path(tempDir.path))
27 |     let service = await MockVNCService(vmDirectory: vmDir)
28 |     try await service.start(port: 5900, virtualMachine: nil)
29 |     
30 |     await service.stop()
31 |     let isRunning = await service.isRunning
32 |     let url = await service.url
33 |     #expect(!isRunning)
34 |     #expect(url == nil)
35 | }
36 | 
37 | @Test("VNCService handles client operations")
38 | func testVNCServiceClient() async throws {
39 |     let tempDir = try createTempDirectory()
40 |     let vmDir = VMDirectory(Path(tempDir.path))
41 |     let service = await MockVNCService(vmDirectory: vmDir)
42 |     
43 |     // Should fail when not started
44 |     do {
45 |         try await service.openClient(url: "vnc://localhost:5900")
46 |         #expect(Bool(false), "Expected openClient to throw when not started")
47 |     } catch VMError.vncNotConfigured {
48 |         // Expected error
49 |     } catch {
50 |         #expect(Bool(false), "Expected vncNotConfigured error but got \(error)")
51 |     }
52 |     
53 |     // Start and try client operations
54 |     try await service.start(port: 5900, virtualMachine: nil)
55 |     try await service.openClient(url: "vnc://localhost:5900")
56 |     #expect(await service.clientOpenCount == 1)
57 |     
58 |     // Stop and verify client operations fail
59 |     await service.stop()
60 |     do {
61 |         try await service.openClient(url: "vnc://localhost:5900")
62 |         #expect(Bool(false), "Expected openClient to throw after stopping")
63 |     } catch VMError.vncNotConfigured {
64 |         // Expected error
65 |     } catch {
66 |         #expect(Bool(false), "Expected vncNotConfigured error but got \(error)")
67 |     }
68 | }
69 | 
70 | @Test("VNCService handles virtual machine attachment")
71 | func testVNCServiceVMAttachment() async throws {
72 |     let tempDir = try createTempDirectory()
73 |     let vmDir = VMDirectory(Path(tempDir.path))
74 |     let service = await MockVNCService(vmDirectory: vmDir)
75 |     let mockVM = "mock_vm"
76 |     
77 |     try await service.start(port: 5900, virtualMachine: mockVM)
78 |     let attachedVM = await service.attachedVM
79 |     #expect(attachedVM == mockVM)
80 | }
81 | 
82 | private func createTempDirectory() throws -> URL {
83 |     let tempDir = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString)
84 |     try FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: true)
85 |     return tempDir
86 | } 
```

--------------------------------------------------------------------------------
/docs/content/docs/index.mdx:
--------------------------------------------------------------------------------

```markdown
 1 | ---
 2 | title: Introduction
 3 | ---
 4 | 
 5 | import { Monitor, Code, BookOpen, Zap, Bot, Boxes, Rocket } from 'lucide-react';
 6 | 
 7 | <div className="not-prose -mt-2 mb-6">
 8 |   <p className="text-fd-primary font-semibold text-sm mb-1">Welcome</p>
 9 |   <h1 className="text-3xl font-bold tracking-tight md:text-4xl">Welcome to Cua</h1>
10 | </div>
11 | 
12 | **Cua** is an open-source framework for building, deploying and evaluating Computer-Use Agents - AI systems that autonomously interact with computer interfaces by understanding visual elements and executing actions. Cua provides SDKs for easy integration with 100+ vision-language models (VLMs), supporting everything from simple task automation to complex multi-step workflows across Windows, Linux, and macOS environments.
13 | 
14 | <div className="not-prose relative rounded-xl overflow-hidden my-8 w-full">
15 |   <img src="/docs/img/hero.png" alt="Cua" className="w-full h-auto rounded-xl" />
16 | </div>
17 | 
18 | ## What is a Computer-Use Agent?
19 | 
20 | Computer-Use Agents (CUAs) are AI systems that can autonomously interact with computer interfaces through visual understanding and action execution. They work by capturing screenshots, feeding them to a vision-language model (VLM), and letting the model determine the next action to take - such as clicking, typing, or scrolling - in a continuous loop until the task is complete.
21 | 
22 | ## What is a Computer-Use Sandbox?
23 | 
24 | Computer-Use Sandboxes are isolated, controlled environments where AI agents can safely interact with computer interfaces. They provide a secure execution space for agents to perform actions such as clicking, typing, and running code, test automation workflows, and learn from interactions without affecting production systems.
25 | 
26 | ## Key Features
27 | 
28 | With the **Computer SDK**, you can:
29 | - Automate **Windows, Linux, and macOS** sandboxes with a consistent, pyautogui-like API
30 | - Create & manage sandboxes locally or using **Cua Cloud**
31 | 
32 | With the **Agent SDK**, you can:
33 | - Run computer-use models with a consistent schema
34 | - Benchmark on **OSWorld-Verified**, **SheetBench-V2**, and **ScreenSpot**
35 | - Combine UI grounding models with any LLM using **composed agents**
36 | - Use **100+ models** via API or local inference (Claude, GPT-4, Gemini, Ollama, MLX)
37 | 
38 | ## Get Started
39 | 
40 | Follow the [Quickstart guide](/get-started/quickstart) for step-by-step setup with Python or TypeScript.
41 | 
42 | Check out our [tutorials](https://cua.ai/blog), [examples](https://github.com/trycua/cua/tree/main/examples), and [notebooks](https://github.com/trycua/cua/tree/main/notebooks) to start building with Cua today.
43 | 
44 | <div className="grid grid-cols-2 md:grid-cols-4 gap-2 mt-4 text-sm">
45 |   <Card icon={<Rocket className="w-4 h-4" />} href="/get-started/quickstart" title="Quickstart" />
46 |   <Card icon={<Zap className="w-4 h-4" />} href="/agent-sdk/agent-loops" title="Agent Loops" />
47 |   <Card icon={<BookOpen className="w-4 h-4" />} href="/computer-sdk/computers" title="Computer SDK" />
48 |   <Card icon={<Monitor className="w-4 h-4" />} href="/example-usecases/form-filling" title="Examples" />
49 | </div>
50 | 
```

--------------------------------------------------------------------------------
/examples/computer-example-ts/src/index.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { Computer, OSType } from '@trycua/computer';
  2 | import OpenAI from 'openai';
  3 | import { executeAction } from './helpers';
  4 | 
  5 | import 'dotenv/config';
  6 | 
  7 | const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
  8 | 
  9 | const COMPUTER_USE_PROMPT = 'Open firefox and go to cua.ai';
 10 | 
 11 | // Initialize the Computer Connection
 12 | const computer = new Computer({
 13 |   apiKey: process.env.CUA_API_KEY!,
 14 |   name: process.env.CUA_CONTAINER_NAME!,
 15 |   osType: OSType.LINUX,
 16 | });
 17 | 
 18 | await computer.run();
 19 | // Take the initial screenshot
 20 | const screenshot = await computer.interface.screenshot();
 21 | const screenshotBase64 = screenshot.toString('base64');
 22 | 
 23 | // Setup openai config for computer use
 24 | const computerUseConfig: OpenAI.Responses.ResponseCreateParamsNonStreaming = {
 25 |   model: 'computer-use-preview',
 26 |   tools: [
 27 |     {
 28 |       type: 'computer_use_preview',
 29 |       display_width: 1024,
 30 |       display_height: 768,
 31 |       environment: 'linux', // we're using a linux vm
 32 |     },
 33 |   ],
 34 |   truncation: 'auto',
 35 | };
 36 | 
 37 | // Send initial screenshot to the openai computer use model
 38 | let res = await openai.responses.create({
 39 |   ...computerUseConfig,
 40 |   input: [
 41 |     {
 42 |       role: 'user',
 43 |       content: [
 44 |         // what we want the ai to do
 45 |         { type: 'input_text', text: COMPUTER_USE_PROMPT },
 46 |         // current screenshot of the vm
 47 |         {
 48 |           type: 'input_image',
 49 |           image_url: `data:image/png;base64,${screenshotBase64}`,
 50 |           detail: 'auto',
 51 |         },
 52 |       ],
 53 |     },
 54 |   ],
 55 | });
 56 | 
 57 | // Loop until there are no more computer use actions.
 58 | while (true) {
 59 |   const computerCalls = res.output.filter((o) => o.type === 'computer_call');
 60 |   if (computerCalls.length < 1) {
 61 |     console.log('No more computer calls. Loop complete.');
 62 |     break;
 63 |   }
 64 |   // Get the first call
 65 |   const call = computerCalls[0];
 66 |   const action = call.action;
 67 |   console.log('Received action from OpenAI Responses API:', action);
 68 |   let ackChecks: OpenAI.Responses.ResponseComputerToolCall.PendingSafetyCheck[] = [];
 69 |   if (call.pending_safety_checks.length > 0) {
 70 |     console.log('Safety checks pending:', call.pending_safety_checks);
 71 |     // In a real implementation, you would want to get user confirmation here
 72 |     ackChecks = call.pending_safety_checks;
 73 |   }
 74 | 
 75 |   // Execute the action in the container
 76 |   await executeAction(computer, action);
 77 |   // Wait for changes to process within the container (1sec)
 78 |   await new Promise((resolve) => setTimeout(resolve, 1000));
 79 | 
 80 |   // Capture new screenshot
 81 |   const newScreenshot = await computer.interface.screenshot();
 82 |   const newScreenshotBase64 = newScreenshot.toString('base64');
 83 | 
 84 |   // Screenshot back as computer_call_output
 85 | 
 86 |   res = await openai.responses.create({
 87 |     ...computerUseConfig,
 88 |     previous_response_id: res.id,
 89 |     input: [
 90 |       {
 91 |         type: 'computer_call_output',
 92 |         call_id: call.call_id,
 93 |         acknowledged_safety_checks: ackChecks,
 94 |         output: {
 95 |           type: 'computer_screenshot',
 96 |           image_url: `data:image/png;base64,${newScreenshotBase64}`,
 97 |         },
 98 |       },
 99 |     ],
100 |   });
101 | }
102 | 
103 | process.exit();
104 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/supported-agents/computer-use-agents.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: All‑in‑one CUA Models
  3 | description: Models that support full computer-use agent capabilities with ComputerAgent.run()
  4 | ---
  5 | 
  6 | These models support complete computer-use agent functionality through `ComputerAgent.run()`. They can understand natural language instructions and autonomously perform sequences of actions to complete tasks.
  7 | 
  8 | All agent loops are compatible with any LLM provider supported by LiteLLM.
  9 | 
 10 | See [Running Models Locally](/agent-sdk/supported-model-providers/local-models) for how to use Hugging Face and MLX models on your own machine.
 11 | 
 12 | ## Gemini CUA
 13 | 
 14 | Gemini models with computer-use capabilities:
 15 | 
 16 | - Gemini 2.5 CUA: `gemini-2.5-computer-use-preview-10-2025`
 17 | 
 18 | ```python
 19 | agent = ComputerAgent("gemini-2.5-computer-use-preview-10-2025", tools=[computer])
 20 | async for _ in agent.run("Open Firefox and navigate to github.com"):
 21 |     pass
 22 | ```
 23 | 
 24 | ## Anthropic CUAs
 25 | 
 26 | Claude models with computer-use capabilities:
 27 | 
 28 | - Claude 4.5: `claude-sonnet-4-5-20250929`, `claude-haiku-4-5-20251001`
 29 | - Claude 4.1: `claude-opus-4-1-20250805`
 30 | - Claude 4: `claude-opus-4-20250514`, `claude-sonnet-4-20250514`
 31 | - Claude 3.7: `claude-3-7-sonnet-20250219`
 32 | 
 33 | ```python
 34 | agent = ComputerAgent("claude-sonnet-4-5-20250929", tools=[computer])
 35 | async for _ in agent.run("Open Firefox and navigate to github.com"):
 36 |     pass
 37 | ```
 38 | 
 39 | ## OpenAI CUA Preview
 40 | 
 41 | OpenAI's computer-use preview model:
 42 | 
 43 | - Computer-use-preview: `computer-use-preview`
 44 | 
 45 | ```python
 46 | agent = ComputerAgent("openai/computer-use-preview", tools=[computer])
 47 | async for _ in agent.run("Take a screenshot and describe what you see"):
 48 |     pass
 49 | ```
 50 | 
 51 | ## GLM-4.5V
 52 | 
 53 | Zhipu AI's GLM-4.5V vision-language model with computer-use capabilities:
 54 | 
 55 | - `openrouter/z-ai/glm-4.5v`
 56 | - `huggingface-local/zai-org/GLM-4.5V`
 57 | 
 58 | ```python
 59 | agent = ComputerAgent("openrouter/z-ai/glm-4.5v", tools=[computer])
 60 | async for _ in agent.run("Click on the search bar and type 'hello world'"):
 61 |     pass
 62 | ```
 63 | 
 64 | ## InternVL 3.5
 65 | 
 66 | InternVL 3.5 family:
 67 | 
 68 | - `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}`
 69 | 
 70 | ```python
 71 | agent = ComputerAgent("huggingface-local/OpenGVLab/InternVL3_5-1B", tools=[computer])
 72 | async for _ in agent.run("Open Firefox and navigate to github.com"):
 73 |     pass
 74 | ```
 75 | 
 76 | ## Qwen3 VL
 77 | 
 78 | Qwen3 VL family:
 79 | 
 80 | - `cua/qwen/qwen3-vl-235b` (via CUA VLM Router - recommended)
 81 | 
 82 | ```python
 83 | agent = ComputerAgent("cua/qwen/qwen3-vl-235b", tools=[computer])
 84 | async for _ in agent.run("Open Firefox and navigate to github.com"):
 85 |     pass
 86 | ```
 87 | 
 88 | ## UI-TARS 1.5
 89 | 
 90 | Unified vision-language model for computer-use:
 91 | 
 92 | - `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B`
 93 | - `huggingface/ByteDance-Seed/UI-TARS-1.5-7B` (requires TGI endpoint)
 94 | 
 95 | ```python
 96 | agent = ComputerAgent("huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", tools=[computer])
 97 | async for _ in agent.run("Open the settings menu and change the theme to dark mode"):
 98 |     pass
 99 | ```
100 | 
101 | ---
102 | 
103 | CUAs also support direct click prediction. See [Grounding Models](./grounding-models) for details on `predict_click()`.
104 | 
105 | For details on agent loop behavior and usage, see [Agent Loops](../agent-loops).
106 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/callbacks/pii_anonymization.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | PII anonymization callback handler using Microsoft Presidio for text and image redaction.
  3 | """
  4 | 
  5 | import base64
  6 | import io
  7 | import logging
  8 | from typing import Any, Dict, List, Optional, Tuple
  9 | 
 10 | from .base import AsyncCallbackHandler
 11 | 
 12 | try:
 13 |     # TODO: Add Presidio dependencies
 14 |     from PIL import Image
 15 | 
 16 |     PRESIDIO_AVAILABLE = True
 17 | except ImportError:
 18 |     PRESIDIO_AVAILABLE = False
 19 | 
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | class PIIAnonymizationCallback(AsyncCallbackHandler):
 24 |     """
 25 |     Callback handler that anonymizes PII in text and images using Microsoft Presidio.
 26 | 
 27 |     This handler:
 28 |     1. Anonymizes PII in messages before sending to the agent loop
 29 |     2. Deanonymizes PII in tool calls and message outputs after the agent loop
 30 |     3. Redacts PII from images in computer_call_output messages
 31 |     """
 32 | 
 33 |     def __init__(
 34 |         self,
 35 |         # TODO: Any extra kwargs if needed
 36 |     ):
 37 |         """
 38 |         Initialize the PII anonymization callback.
 39 | 
 40 |         Args:
 41 |             anonymize_text: Whether to anonymize text content
 42 |             anonymize_images: Whether to redact images
 43 |             entities_to_anonymize: List of entity types to anonymize (None for all)
 44 |             anonymization_operator: Presidio operator to use ("replace", "mask", "redact", etc.)
 45 |             image_redaction_color: RGB color for image redaction
 46 |         """
 47 |         if not PRESIDIO_AVAILABLE:
 48 |             raise ImportError(
 49 |                 "Presidio is not available. Install with: "
 50 |                 "pip install cua-agent[pii-anonymization]"
 51 |             )
 52 | 
 53 |         # TODO: Implement __init__
 54 | 
 55 |     async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 56 |         """
 57 |         Anonymize PII in messages before sending to agent loop.
 58 | 
 59 |         Args:
 60 |             messages: List of message dictionaries
 61 | 
 62 |         Returns:
 63 |             List of messages with PII anonymized
 64 |         """
 65 |         anonymized_messages = []
 66 |         for msg in messages:
 67 |             anonymized_msg = await self._anonymize_message(msg)
 68 |             anonymized_messages.append(anonymized_msg)
 69 | 
 70 |         return anonymized_messages
 71 | 
 72 |     async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 73 |         """
 74 |         Deanonymize PII in tool calls and message outputs after agent loop.
 75 | 
 76 |         Args:
 77 |             output: List of output dictionaries
 78 | 
 79 |         Returns:
 80 |             List of output with PII deanonymized for tool calls
 81 |         """
 82 |         deanonymized_output = []
 83 |         for item in output:
 84 |             # Only deanonymize tool calls and computer_call messages
 85 |             if item.get("type") in ["computer_call", "computer_call_output"]:
 86 |                 deanonymized_item = await self._deanonymize_item(item)
 87 |                 deanonymized_output.append(deanonymized_item)
 88 |             else:
 89 |                 deanonymized_output.append(item)
 90 | 
 91 |         return deanonymized_output
 92 | 
 93 |     async def _anonymize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
 94 |         # TODO: Implement _anonymize_message
 95 |         return message
 96 | 
 97 |     async def _deanonymize_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
 98 |         # TODO: Implement _deanonymize_item
 99 |         return item
100 | 
```

--------------------------------------------------------------------------------
/libs/lume/tests/VM/VMDetailsPrinterTests.swift:
--------------------------------------------------------------------------------

```swift
 1 | import Foundation
 2 | import Testing
 3 | 
 4 | @testable import lume
 5 | 
 6 | struct VMDetailsPrinterTests {
 7 | 
 8 |     @Test func printStatus_whenJSON() throws {
 9 |         // Given
10 |         let vms: [VMDetails] = [
11 |             VMDetails(
12 |                 name: "name",
13 |                 os: "os",
14 |                 cpuCount: 2,
15 |                 memorySize: 1024,
16 |                 diskSize: .init(allocated: 24, total: 30),
17 |                 display: "1024x768",
18 |                 status: "status",
19 |                 vncUrl: "vncUrl",
20 |                 ipAddress: "0.0.0.0",
21 |                 locationName: "mockLocation")
22 |         ]
23 |         let jsonEncoder = JSONEncoder()
24 |         jsonEncoder.outputFormatting = .prettyPrinted
25 |         let expectedOutput = try String(data: jsonEncoder.encode(vms), encoding: .utf8)!
26 | 
27 |         // When
28 |         var printedStatus: String?
29 |         try VMDetailsPrinter.printStatus(vms, format: .json, print: { printedStatus = $0 })
30 | 
31 |         // Then
32 |         // Decode both JSONs and compare the actual data structures
33 |         let jsonDecoder = JSONDecoder()
34 |         let printedVMs = try jsonDecoder.decode(
35 |             [VMDetails].self, from: printedStatus!.data(using: .utf8)!)
36 |         let expectedVMs = try jsonDecoder.decode(
37 |             [VMDetails].self, from: expectedOutput.data(using: .utf8)!)
38 | 
39 |         #expect(printedVMs.count == expectedVMs.count)
40 |         for (printed, expected) in zip(printedVMs, expectedVMs) {
41 |             #expect(printed.name == expected.name)
42 |             #expect(printed.os == expected.os)
43 |             #expect(printed.cpuCount == expected.cpuCount)
44 |             #expect(printed.memorySize == expected.memorySize)
45 |             #expect(printed.diskSize.allocated == expected.diskSize.allocated)
46 |             #expect(printed.diskSize.total == expected.diskSize.total)
47 |             #expect(printed.status == expected.status)
48 |             #expect(printed.vncUrl == expected.vncUrl)
49 |             #expect(printed.ipAddress == expected.ipAddress)
50 |         }
51 |     }
52 | 
53 |     @Test func printStatus_whenNotJSON() throws {
54 |         // Given
55 |         let vms: [VMDetails] = [
56 |             VMDetails(
57 |                 name: "name",
58 |                 os: "os",
59 |                 cpuCount: 2,
60 |                 memorySize: 1024,
61 |                 diskSize: .init(allocated: 24, total: 30),
62 |                 display: "1024x768",
63 |                 status: "status",
64 |                 vncUrl: "vncUrl",
65 |                 ipAddress: "0.0.0.0",
66 |                 locationName: "mockLocation")
67 |         ]
68 | 
69 |         // When
70 |         var printedLines: [String] = []
71 |         try VMDetailsPrinter.printStatus(vms, format: .text, print: { printedLines.append($0) })
72 | 
73 |         // Then
74 |         #expect(printedLines.count == 2)
75 | 
76 |         let headerParts = printedLines[0].split(whereSeparator: \.isWhitespace)
77 |         #expect(
78 |             headerParts == [
79 |                 "name", "os", "cpu", "memory", "disk", "display", "status", "storage", "shared_dirs", "ip", "vnc",
80 |             ])
81 | 
82 |         #expect(
83 |             printedLines[1].split(whereSeparator: \.isWhitespace).map(String.init) == [
84 |                 "name", "os", "2", "0.00G", "24.0B/30.0B", "1024x768", "status", "mockLocation",
85 |                 "-",
86 |                 "0.0.0.0",
87 |                 "vncUrl",
88 |             ])
89 |     }
90 | }
91 | 
```

--------------------------------------------------------------------------------
/libs/lume/src/Server/HTTP.swift:
--------------------------------------------------------------------------------

```swift
  1 | import Foundation
  2 | import Network
  3 | 
  4 | enum HTTPError: Error {
  5 |     case internalError
  6 | }
  7 | 
  8 | struct HTTPRequest {
  9 |     let method: String
 10 |     let path: String
 11 |     let headers: [String: String]
 12 |     let body: Data?
 13 |     
 14 |     init?(data: Data) {
 15 |         guard let requestString = String(data: data, encoding: .utf8) else { return nil }
 16 |         let components = requestString.components(separatedBy: "\r\n\r\n")
 17 |         guard components.count >= 1 else { return nil }
 18 |         
 19 |         let headerLines = components[0].components(separatedBy: "\r\n")
 20 |         guard !headerLines.isEmpty else { return nil }
 21 |         
 22 |         // Parse request line
 23 |         let requestLine = headerLines[0].components(separatedBy: " ")
 24 |         guard requestLine.count >= 2 else { return nil }
 25 |         
 26 |         self.method = requestLine[0]
 27 |         self.path = requestLine[1]
 28 |         
 29 |         // Parse headers
 30 |         var headers: [String: String] = [:]
 31 |         for line in headerLines.dropFirst() {
 32 |             let headerComponents = line.split(separator: ":", maxSplits: 1).map(String.init)
 33 |             if headerComponents.count == 2 {
 34 |                 headers[headerComponents[0].trimmingCharacters(in: .whitespaces)] = 
 35 |                     headerComponents[1].trimmingCharacters(in: .whitespaces)
 36 |             }
 37 |         }
 38 |         self.headers = headers
 39 |         
 40 |         // Parse body if present
 41 |         if components.count > 1 {
 42 |             self.body = components[1].data(using: .utf8)
 43 |         } else {
 44 |             self.body = nil
 45 |         }
 46 |     }
 47 | }
 48 | 
 49 | struct HTTPResponse {
 50 |     enum StatusCode: Int {
 51 |         case ok = 200
 52 |         case accepted = 202
 53 |         case badRequest = 400
 54 |         case notFound = 404
 55 |         case internalServerError = 500
 56 |         
 57 |         var description: String {
 58 |             switch self {
 59 |             case .ok: return "OK"
 60 |             case .accepted: return "Accepted"
 61 |             case .badRequest: return "Bad Request"
 62 |             case .notFound: return "Not Found"
 63 |             case .internalServerError: return "Internal Server Error"
 64 |             }
 65 |         }
 66 |     }
 67 |     
 68 |     let statusCode: StatusCode
 69 |     let headers: [String: String]
 70 |     let body: Data?
 71 |     
 72 |     init(statusCode: StatusCode, headers: [String: String] = [:], body: Data? = nil) {
 73 |         self.statusCode = statusCode
 74 |         self.headers = headers
 75 |         self.body = body
 76 |     }
 77 |     
 78 |     init(statusCode: StatusCode, body: String) {
 79 |         self.statusCode = statusCode
 80 |         self.headers = ["Content-Type": "text/plain"]
 81 |         self.body = body.data(using: .utf8)
 82 |     }
 83 |     
 84 |     func serialize() -> Data {
 85 |         var response = "HTTP/1.1 \(statusCode.rawValue) \(statusCode.description)\r\n"
 86 |         
 87 |         var headers = self.headers
 88 |         if let body = body {
 89 |             headers["Content-Length"] = "\(body.count)"
 90 |         }
 91 |         
 92 |         for (key, value) in headers {
 93 |             response += "\(key): \(value)\r\n"
 94 |         }
 95 |         
 96 |         response += "\r\n"
 97 |         
 98 |         var responseData = response.data(using: .utf8) ?? Data()
 99 |         if let body = body {
100 |             responseData.append(body)
101 |         }
102 |         
103 |         return responseData
104 |     }
105 | }
106 | 
107 | final class HTTPServer {
108 |     let port: UInt16
109 |     
110 |     init(port: UInt16) {
111 |         self.port = port
112 |     }
113 | } 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/supported-agents/grounding-models.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Grounding Models
  3 | description: Models that support click prediction with ComputerAgent.predict_click()
  4 | ---
  5 | 
  6 | These models specialize in UI element grounding and click prediction. They can identify precise coordinates for UI elements based on natural language descriptions, but cannot perform autonomous task planning.
  7 | 
  8 | Use `ComputerAgent.predict_click()` to get coordinates for specific UI elements.
  9 | 
 10 | All models that support `ComputerAgent.run()` also support `ComputerAgent.predict_click()`. See [All‑in‑one CUAs](./computer-use-agents).
 11 | 
 12 | ### Anthropic CUAs
 13 | 
 14 | - Claude 4.5: `claude-sonnet-4-5-20250929`
 15 | - Claude 4.1: `claude-opus-4-1-20250805`
 16 | - Claude 4: `claude-opus-4-20250514`, `claude-sonnet-4-20250514`
 17 | - Claude 3.7: `claude-3-7-sonnet-20250219`
 18 | 
 19 | ### OpenAI CUA Preview
 20 | 
 21 | - Computer-use-preview: `computer-use-preview`
 22 | 
 23 | ### UI-TARS 1.5 (Unified VLM with grounding support)
 24 | 
 25 | - `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B`
 26 | - `huggingface/ByteDance-Seed/UI-TARS-1.5-7B` (requires TGI endpoint)
 27 | 
 28 | ## Specialized Grounding Models
 29 | 
 30 | These models are optimized specifically for click prediction and UI element grounding:
 31 | 
 32 | ### OpenCUA
 33 | 
 34 | - `huggingface-local/xlangai/OpenCUA-{7B,32B}`
 35 | 
 36 | ### GTA1 Family
 37 | 
 38 | - `huggingface-local/HelloKKMe/GTA1-{7B,32B,72B}`
 39 | 
 40 | ### Holo 1.5 Family
 41 | 
 42 | - `huggingface-local/Hcompany/Holo1.5-{3B,7B,72B}`
 43 | 
 44 | ### InternVL 3.5 Family
 45 | 
 46 | - `huggingface-local/OpenGVLab/InternVL3_5-{1B,2B,4B,8B,...}`
 47 | 
 48 | ### OmniParser (OCR)
 49 | 
 50 | OCR-focused set-of-marks model that requires an LLM for click prediction:
 51 | 
 52 | - `omniparser` (requires combination with any LiteLLM vision model)
 53 | 
 54 | ### Moondream3 (Local Grounding)
 55 | 
 56 | Moondream3 is a powerful small model that can perform UI grounding and click prediction.
 57 | 
 58 | - `moondream3`
 59 | 
 60 | ## Usage Examples
 61 | 
 62 | ```python
 63 | # Using any grounding model for click prediction
 64 | agent = ComputerAgent("claude-sonnet-4-5-20250929", tools=[computer])
 65 | 
 66 | # Predict coordinates for specific elements
 67 | login_coords = agent.predict_click("find the login button")
 68 | search_coords = agent.predict_click("locate the search text field")
 69 | menu_coords = agent.predict_click("find the hamburger menu icon")
 70 | 
 71 | print(f"Login button: {login_coords}")
 72 | print(f"Search field: {search_coords}")
 73 | print(f"Menu icon: {menu_coords}")
 74 | ```
 75 | 
 76 | ```python
 77 | # OmniParser is just for OCR, so it requires an LLM for predict_click
 78 | agent = ComputerAgent("omniparser+anthropic/claude-sonnet-4-5-20250929", tools=[computer])
 79 | 
 80 | # Predict click coordinates using composed agent
 81 | coords = agent.predict_click("find the submit button")
 82 | print(f"Click coordinates: {coords}")  # (450, 320)
 83 | 
 84 | # Note: Cannot use omniparser alone for click prediction
 85 | # This will raise an error:
 86 | # agent = ComputerAgent("omniparser", tools=[computer])
 87 | # coords = agent.predict_click("find button")  # Error!
 88 | ```
 89 | 
 90 | ```python
 91 | agent = ComputerAgent("huggingface-local/HelloKKMe/GTA1-7B", tools=[computer])
 92 | 
 93 | # Predict click coordinates for UI elements
 94 | coords = agent.predict_click("find the submit button")
 95 | print(f"Click coordinates: {coords}")  # (450, 320)
 96 | 
 97 | # Note: GTA1 cannot perform autonomous task planning
 98 | # This will raise an error:
 99 | # agent.run("Fill out the form and submit it")
100 | ```
101 | 
102 | ---
103 | 
104 | For information on combining grounding models with planning capabilities, see [Composed Agents](./composed-agents) and [All‑in‑one CUAs](./computer-use-agents).
105 | 
```

--------------------------------------------------------------------------------
/libs/python/computer-server/computer_server/server.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Server interface for Computer API.
  3 | Provides a clean API for starting and stopping the server.
  4 | """
  5 | 
  6 | import asyncio
  7 | import logging
  8 | from typing import Optional
  9 | 
 10 | import uvicorn
 11 | from fastapi import FastAPI
 12 | 
 13 | from .main import app as fastapi_app
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | class Server:
 19 |     """
 20 |     Server interface for Computer API.
 21 | 
 22 |     Usage:
 23 |         from computer_api import Server
 24 | 
 25 |         # Synchronous usage
 26 |         server = Server()
 27 |         server.start()  # Blocks until server is stopped
 28 | 
 29 |         # Asynchronous usage
 30 |         server = Server()
 31 |         await server.start_async()  # Starts server in background
 32 |         # Do other things
 33 |         await server.stop()  # Stop the server
 34 |     """
 35 | 
 36 |     def __init__(
 37 |         self,
 38 |         host: str = "0.0.0.0",
 39 |         port: int = 8000,
 40 |         log_level: str = "info",
 41 |         ssl_keyfile: Optional[str] = None,
 42 |         ssl_certfile: Optional[str] = None,
 43 |     ):
 44 |         """
 45 |         Initialize the server.
 46 | 
 47 |         Args:
 48 |             host: Host to bind the server to
 49 |             port: Port to bind the server to
 50 |             log_level: Logging level (debug, info, warning, error, critical)
 51 |             ssl_keyfile: Path to SSL private key file (for HTTPS)
 52 |             ssl_certfile: Path to SSL certificate file (for HTTPS)
 53 |         """
 54 |         self.host = host
 55 |         self.port = port
 56 |         self.log_level = log_level
 57 |         self.ssl_keyfile = ssl_keyfile
 58 |         self.ssl_certfile = ssl_certfile
 59 |         self.app = fastapi_app
 60 |         self._server_task: Optional[asyncio.Task] = None
 61 |         self._should_exit = asyncio.Event()
 62 | 
 63 |     def start(self) -> None:
 64 |         """
 65 |         Start the server synchronously. This will block until the server is stopped.
 66 |         """
 67 |         uvicorn.run(
 68 |             self.app,
 69 |             host=self.host,
 70 |             port=self.port,
 71 |             log_level=self.log_level,
 72 |             ssl_keyfile=self.ssl_keyfile,
 73 |             ssl_certfile=self.ssl_certfile,
 74 |         )
 75 | 
 76 |     async def start_async(self) -> None:
 77 |         """
 78 |         Start the server asynchronously. This will return immediately and the server
 79 |         will run in the background.
 80 |         """
 81 |         server_config = uvicorn.Config(
 82 |             self.app,
 83 |             host=self.host,
 84 |             port=self.port,
 85 |             log_level=self.log_level,
 86 |             ssl_keyfile=self.ssl_keyfile,
 87 |             ssl_certfile=self.ssl_certfile,
 88 |         )
 89 | 
 90 |         self._should_exit.clear()
 91 |         server = uvicorn.Server(server_config)
 92 | 
 93 |         # Create a task to run the server
 94 |         self._server_task = asyncio.create_task(server.serve())
 95 | 
 96 |         # Wait a short time to ensure the server starts
 97 |         await asyncio.sleep(0.5)
 98 | 
 99 |         protocol = "https" if self.ssl_certfile else "http"
100 |         logger.info(f"Server started at {protocol}://{self.host}:{self.port}")
101 | 
102 |     async def stop(self) -> None:
103 |         """
104 |         Stop the server if it's running asynchronously.
105 |         """
106 |         if self._server_task and not self._server_task.done():
107 |             # Signal the server to exit
108 |             self._should_exit.set()
109 | 
110 |             # Cancel the server task
111 |             self._server_task.cancel()
112 | 
113 |             try:
114 |                 await self._server_task
115 |             except asyncio.CancelledError:
116 |                 logger.info("Server stopped")
117 | 
118 |             self._server_task = None
119 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/mcp-server/configuration.mdx:
--------------------------------------------------------------------------------

```markdown
 1 | ---
 2 | title: Configuration
 3 | ---
 4 | 
 5 | The server is configured using environment variables (can be set in the Claude Desktop config):
 6 | 
 7 | | Variable                       | Description                                                                                                                                                                                               | Default                            |
 8 | | ------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------- |
 9 | | `CUA_MODEL_NAME`               | Model string (e.g., "anthropic/claude-sonnet-4-20250514", "openai/computer-use-preview", "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B", "omniparser+litellm/gpt-4o", "omniparser+ollama_chat/gemma3") | anthropic/claude-sonnet-4-20250514 |
10 | | `ANTHROPIC_API_KEY`            | Your Anthropic API key (required for Anthropic models)                                                                                                                                                    | None                               |
11 | | `CUA_MAX_IMAGES`               | Maximum number of images to keep in context                                                                                                                                                               | 3                                  |
12 | | `CUA_USE_HOST_COMPUTER_SERVER` | Target your local desktop instead of a VM. Set to "true" to use your host system. **Warning:** AI models may perform risky actions.                                                                       | false                              |
13 | 
14 | ## Model Configuration
15 | 
16 | The `CUA_MODEL_NAME` environment variable supports various model providers through LiteLLM integration:
17 | 
18 | ### Supported Providers
19 | 
20 | - **Anthropic**: `anthropic/claude-sonnet-4-20250514`,
21 | - **OpenAI**: `openai/computer-use-preview`, `openai/gpt-4o`
22 | - **Local Models**: `huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B`
23 | - **Omni + LiteLLM**: `omniparser+litellm/gpt-4o`, `omniparser+litellm/claude-3-haiku`
24 | - **Ollama**: `omniparser+ollama_chat/gemma3`
25 | 
26 | ### Example Configurations
27 | 
28 | **Claude Desktop Configuration:**
29 | 
30 | ```json
31 | {
32 |   "mcpServers": {
33 |     "cua-agent": {
34 |       "command": "/bin/bash",
35 |       "args": ["~/.cua/start_mcp_server.sh"],
36 |       "env": {
37 |         "CUA_MODEL_NAME": "anthropic/claude-sonnet-4-20250514",
38 |         "ANTHROPIC_API_KEY": "your-anthropic-api-key-here",
39 |         "CUA_MAX_IMAGES": "5",
40 |         "CUA_USE_HOST_COMPUTER_SERVER": "false"
41 |       }
42 |     }
43 |   }
44 | }
45 | ```
46 | 
47 | **Local Model Configuration:**
48 | 
49 | ```json
50 | {
51 |   "mcpServers": {
52 |     "cua-agent": {
53 |       "command": "/bin/bash",
54 |       "args": ["~/.cua/start_mcp_server.sh"],
55 |       "env": {
56 |         "CUA_MODEL_NAME": "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B",
57 |         "CUA_MAX_IMAGES": "3"
58 |       }
59 |     }
60 |   }
61 | }
62 | ```
63 | 
64 | ## Session Management Configuration
65 | 
66 | The MCP server automatically manages sessions with the following defaults:
67 | 
68 | - **Max Concurrent Sessions**: 10
69 | - **Session Timeout**: 10 minutes of inactivity
70 | - **Computer Pool Size**: 5 instances
71 | - **Automatic Cleanup**: Enabled
72 | 
73 | These settings are optimized for typical usage and don't require configuration for most users.
74 | 
```

--------------------------------------------------------------------------------
/libs/lume/src/VM/VMDetailsPrinter.swift:
--------------------------------------------------------------------------------

```swift
 1 | import Foundation
 2 | 
 3 | /// Prints VM status information in a formatted table
 4 | enum VMDetailsPrinter {
 5 |     /// Represents a column in the VM status table
 6 |     private struct Column: Sendable {
 7 |         let header: String
 8 |         let width: Int
 9 |         let getValue: @Sendable (VMDetails) -> String
10 |     }
11 | 
12 |     /// Configuration for all columns in the status table
13 |     private static let columns: [Column] = [
14 |         Column(header: "name", width: 34, getValue: { $0.name }),
15 |         Column(header: "os", width: 8, getValue: { $0.os }),
16 |         Column(header: "cpu", width: 8, getValue: { String($0.cpuCount) }),
17 |         Column(
18 |             header: "memory", width: 8,
19 |             getValue: {
20 |                 String(format: "%.2fG", Float($0.memorySize) / (1024 * 1024 * 1024))
21 |             }),
22 |         Column(
23 |             header: "disk", width: 16,
24 |             getValue: {
25 |                 "\($0.diskSize.formattedAllocated)/\($0.diskSize.formattedTotal)"
26 |             }),
27 |         Column(header: "display", width: 12, getValue: { $0.display }),
28 |         Column(
29 |             header: "status", width: 16,
30 |             getValue: {
31 |                 $0.status
32 |             }),
33 |         Column(header: "storage", width: 16, getValue: { $0.locationName }),
34 |         Column(
35 |             header: "shared_dirs", width: 54,
36 |             getValue: { vm in
37 |                 // Only show shared directories if the VM is running
38 |                 if vm.status == "running", let dirs = vm.sharedDirectories, !dirs.isEmpty {
39 |                     return dirs.map { "\($0.hostPath) (\($0.readOnly ? "ro" : "rw"))" }.joined(separator: ", ")
40 |                 } else {
41 |                     return "-"
42 |                 }
43 |             }),
44 |         Column(
45 |             header: "ip", width: 16,
46 |             getValue: {
47 |                 $0.ipAddress ?? "-"
48 |             }),
49 |         Column(
50 |             header: "vnc", width: 50,
51 |             getValue: {
52 |                 $0.vncUrl ?? "-"
53 |             }),
54 |     ]
55 | 
56 |     /// Prints the status of all VMs in a formatted table
57 |     /// - Parameter vms: Array of VM status objects to display
58 |     static func printStatus(
59 |         _ vms: [VMDetails], format: FormatOption, print: (String) -> Void = { print($0) }
60 |     ) throws {
61 |         if format == .json {
62 |             let jsonEncoder = JSONEncoder()
63 |             jsonEncoder.outputFormatting = .prettyPrinted
64 |             let jsonData = try jsonEncoder.encode(vms)
65 |             let jsonString = String(data: jsonData, encoding: .utf8)!
66 |             print(jsonString)
67 |         } else {
68 |             printHeader(print: print)
69 |             vms.forEach({ vm in 
70 |                 printVM(vm, print: print)
71 |             })
72 |         }
73 |     }
74 | 
75 |     private static func printHeader(print: (String) -> Void = { print($0) }) {
76 |         let paddedHeaders = columns.map { $0.header.paddedToWidth($0.width) }
77 |         print(paddedHeaders.joined())
78 |     }
79 | 
80 |     private static func printVM(_ vm: VMDetails, print: (String) -> Void = { print($0) }) {
81 |         let paddedColumns = columns.map { column in
82 |             column.getValue(vm).paddedToWidth(column.width)
83 |         }
84 |         print(paddedColumns.joined())
85 |     }
86 | }
87 | 
88 | extension String {
89 |     /// Pads the string to the specified width with spaces
90 |     /// - Parameter width: Target width for padding
91 |     /// - Returns: Padded string
92 |     fileprivate func paddedToWidth(_ width: Int) -> String {
93 |         padding(toLength: width, withPad: " ", startingAt: 0)
94 |     }
95 | }
96 | 
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/utils.py:
--------------------------------------------------------------------------------

```python
  1 | import base64
  2 | import io
  3 | import os
  4 | import shlex
  5 | from typing import Any, Dict, Optional, Tuple
  6 | 
  7 | import mslex
  8 | from PIL import Image, ImageDraw
  9 | 
 10 | 
 11 | def decode_base64_image(base64_str: str) -> bytes:
 12 |     """Decode a base64 string into image bytes."""
 13 |     return base64.b64decode(base64_str)
 14 | 
 15 | 
 16 | def encode_base64_image(image_bytes: bytes) -> str:
 17 |     """Encode image bytes to base64 string."""
 18 |     return base64.b64encode(image_bytes).decode("utf-8")
 19 | 
 20 | 
 21 | def bytes_to_image(image_bytes: bytes) -> Image.Image:
 22 |     """Convert bytes to PIL Image.
 23 | 
 24 |     Args:
 25 |         image_bytes: Raw image bytes
 26 | 
 27 |     Returns:
 28 |         PIL.Image: The converted image
 29 |     """
 30 |     return Image.open(io.BytesIO(image_bytes))
 31 | 
 32 | 
 33 | def image_to_bytes(image: Image.Image, format: str = "PNG") -> bytes:
 34 |     """Convert PIL Image to bytes."""
 35 |     buf = io.BytesIO()
 36 |     image.save(buf, format=format)
 37 |     return buf.getvalue()
 38 | 
 39 | 
 40 | def resize_image(image_bytes: bytes, scale_factor: float) -> bytes:
 41 |     """Resize an image by a scale factor.
 42 | 
 43 |     Args:
 44 |         image_bytes: The original image as bytes
 45 |         scale_factor: Factor to scale the image by (e.g., 0.5 for half size, 2.0 for double)
 46 | 
 47 |     Returns:
 48 |         bytes: The resized image as bytes
 49 |     """
 50 |     image = bytes_to_image(image_bytes)
 51 |     if scale_factor != 1.0:
 52 |         new_size = (int(image.width * scale_factor), int(image.height * scale_factor))
 53 |         image = image.resize(new_size, Image.Resampling.LANCZOS)
 54 |     return image_to_bytes(image)
 55 | 
 56 | 
 57 | def draw_box(
 58 |     image_bytes: bytes,
 59 |     x: int,
 60 |     y: int,
 61 |     width: int,
 62 |     height: int,
 63 |     color: str = "#FF0000",
 64 |     thickness: int = 2,
 65 | ) -> bytes:
 66 |     """Draw a box on an image.
 67 | 
 68 |     Args:
 69 |         image_bytes: The original image as bytes
 70 |         x: X coordinate of top-left corner
 71 |         y: Y coordinate of top-left corner
 72 |         width: Width of the box
 73 |         height: Height of the box
 74 |         color: Color of the box in hex format
 75 |         thickness: Thickness of the box border in pixels
 76 | 
 77 |     Returns:
 78 |         bytes: The modified image as bytes
 79 |     """
 80 |     # Convert bytes to PIL Image
 81 |     image = bytes_to_image(image_bytes)
 82 | 
 83 |     # Create drawing context
 84 |     draw = ImageDraw.Draw(image)
 85 | 
 86 |     # Draw rectangle
 87 |     draw.rectangle([(x, y), (x + width, y + height)], outline=color, width=thickness)
 88 | 
 89 |     # Convert back to bytes
 90 |     return image_to_bytes(image)
 91 | 
 92 | 
 93 | def get_image_size(image_bytes: bytes) -> Tuple[int, int]:
 94 |     """Get the dimensions of an image.
 95 | 
 96 |     Args:
 97 |         image_bytes: The image as bytes
 98 | 
 99 |     Returns:
100 |         Tuple[int, int]: Width and height of the image
101 |     """
102 |     image = bytes_to_image(image_bytes)
103 |     return image.size
104 | 
105 | 
106 | def parse_vm_info(vm_info: Dict[str, Any]) -> Optional[Dict[str, Any]]:
107 |     """Parse VM info from pylume response."""
108 |     if not vm_info:
109 |         return None
110 | 
111 | 
112 | def safe_join(argv: list[str]) -> str:
113 |     """
114 |     Return a platform-correct string that safely quotes `argv` for shell execution.
115 | 
116 |     - On POSIX: uses `shlex.join`.
117 |     - On Windows: uses `shlex.join`.
118 | 
119 |     Args:
120 |         argv: iterable of argument strings (will be coerced to str).
121 | 
122 |     Returns:
123 |         A safely quoted command-line string appropriate for the current platform that protects against
124 |         shell injection vulnerabilities.
125 |     """
126 |     if os.name == "nt":
127 |         # On Windows, use mslex for proper quoting
128 |         return mslex.join(argv)
129 |     else:
130 |         # On POSIX systems, use shlex
131 |         return shlex.join(argv)
132 | 
```

--------------------------------------------------------------------------------
/libs/lume/src/VM/DarwinVM.swift:
--------------------------------------------------------------------------------

```swift
 1 | import Foundation
 2 | 
 3 | /// macOS-specific virtual machine implementation
 4 | @MainActor
 5 | final class DarwinVM: VM {
 6 |     private let imageLoader: ImageLoader
 7 | 
 8 |     init(
 9 |         vmDirContext: VMDirContext,
10 |         virtualizationServiceFactory: @escaping (VMVirtualizationServiceContext) throws -> VMVirtualizationService = { try DarwinVirtualizationService(configuration: $0) },
11 |         vncServiceFactory: @escaping (VMDirectory) -> VNCService = { DefaultVNCService(vmDirectory: $0) },
12 |         imageLoader: ImageLoader
13 |     ) {
14 |         self.imageLoader = imageLoader
15 |         super.init(
16 |             vmDirContext: vmDirContext,
17 |             virtualizationServiceFactory: virtualizationServiceFactory,
18 |             vncServiceFactory: vncServiceFactory
19 |         )
20 |     }
21 | 
22 |     override func getOSType() -> String {
23 |         return "macOS"
24 |     }
25 | 
26 |     // MARK: - Installation and Configuration
27 |     
28 |     override func setup(ipswPath: String, cpuCount: Int, memorySize: UInt64, diskSize: UInt64, display: String) async throws {
29 |         let imagePath: Path
30 |         if ipswPath == "latest" {
31 |             Logger.info("Downloading latest supported Image...")
32 |             let downloadedPath = try await self.imageLoader.downloadLatestImage()
33 |             imagePath = Path(downloadedPath.path)
34 |         } else {
35 |             imagePath = Path(ipswPath)
36 |         }
37 | 
38 |         let requirements = try await imageLoader.loadImageRequirements(from: imagePath.url)
39 |         try setDiskSize(diskSize)
40 | 
41 |         let finalCpuCount = max(cpuCount, requirements.minimumSupportedCPUCount)
42 |         try setCpuCount(finalCpuCount)
43 |         if finalCpuCount != cpuCount {
44 |             Logger.info("CPU count overridden due to minimum image requirements", metadata: ["original": "\(cpuCount)", "final": "\(finalCpuCount)"])
45 |         }
46 | 
47 |         let finalMemorySize = max(memorySize, requirements.minimumSupportedMemorySize)
48 |         try setMemorySize(finalMemorySize)
49 |         if finalMemorySize != memorySize {
50 |             Logger.info("Memory size overridden due to minimum image requirements", metadata: ["original": "\(memorySize)", "final": "\(finalMemorySize)"])
51 |         }
52 | 
53 |         try updateVMConfig(
54 |             vmConfig: try VMConfig(
55 |                 os: getOSType(),
56 |                 cpuCount: finalCpuCount,
57 |                 memorySize: finalMemorySize,
58 |                 diskSize: diskSize,
59 |                 macAddress: DarwinVirtualizationService.generateMacAddress(),
60 |                 display: display,
61 |                 hardwareModel: requirements.hardwareModel,
62 |                 machineIdentifier: DarwinVirtualizationService.generateMachineIdentifier()
63 |             )
64 |         )
65 | 
66 |         let service: any VMVirtualizationService = try virtualizationServiceFactory(
67 |             try createVMVirtualizationServiceContext(
68 |                 cpuCount: finalCpuCount,
69 |                 memorySize: finalMemorySize,
70 |                 display: display
71 |             )
72 |         )
73 |         guard let darwinService = service as? DarwinVirtualizationService else {
74 |             throw VMError.internalError("Installation requires DarwinVirtualizationService")
75 |         }
76 | 
77 |         // Create auxiliary storage with hardware model
78 |         try darwinService.createAuxiliaryStorage(at: vmDirContext.nvramPath, hardwareModel: requirements.hardwareModel)
79 | 
80 |         try await darwinService.installMacOS(imagePath: imagePath) { progress in
81 |             Logger.info("Installing macOS", metadata: ["progress": "\(Int(progress * 100))%"])
82 |         }
83 |     }
84 | }
85 | 
```

--------------------------------------------------------------------------------
/libs/python/computer/computer/interface/models.py:
--------------------------------------------------------------------------------

```python
  1 | from dataclasses import dataclass
  2 | from enum import Enum
  3 | from typing import Any, Dict, List, Literal, TypedDict, Union
  4 | 
  5 | 
  6 | @dataclass
  7 | class CommandResult:
  8 |     stdout: str
  9 |     stderr: str
 10 |     returncode: int
 11 | 
 12 |     def __init__(self, stdout: str, stderr: str, returncode: int):
 13 |         self.stdout = stdout
 14 |         self.stderr = stderr
 15 |         self.returncode = returncode
 16 | 
 17 | 
 18 | # Navigation key literals
 19 | NavigationKey = Literal["pagedown", "pageup", "home", "end", "left", "right", "up", "down"]
 20 | 
 21 | # Special key literals
 22 | SpecialKey = Literal["enter", "esc", "tab", "space", "backspace", "del"]
 23 | 
 24 | # Modifier key literals
 25 | ModifierKey = Literal["ctrl", "alt", "shift", "win", "command", "option"]
 26 | 
 27 | # Function key literals
 28 | FunctionKey = Literal["f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12"]
 29 | 
 30 | 
 31 | class Key(Enum):
 32 |     """Keyboard keys that can be used with press_key.
 33 | 
 34 |     These key names map to PyAutoGUI's expected key names.
 35 |     """
 36 | 
 37 |     # Navigation
 38 |     PAGE_DOWN = "pagedown"
 39 |     PAGE_UP = "pageup"
 40 |     HOME = "home"
 41 |     END = "end"
 42 |     LEFT = "left"
 43 |     RIGHT = "right"
 44 |     UP = "up"
 45 |     DOWN = "down"
 46 | 
 47 |     # Special keys
 48 |     RETURN = "enter"
 49 |     ENTER = "enter"
 50 |     ESCAPE = "esc"
 51 |     ESC = "esc"
 52 |     TAB = "tab"
 53 |     SPACE = "space"
 54 |     BACKSPACE = "backspace"
 55 |     DELETE = "del"
 56 | 
 57 |     # Modifier keys
 58 |     ALT = "alt"
 59 |     CTRL = "ctrl"
 60 |     SHIFT = "shift"
 61 |     WIN = "win"
 62 |     COMMAND = "command"
 63 |     OPTION = "option"
 64 | 
 65 |     # Function keys
 66 |     F1 = "f1"
 67 |     F2 = "f2"
 68 |     F3 = "f3"
 69 |     F4 = "f4"
 70 |     F5 = "f5"
 71 |     F6 = "f6"
 72 |     F7 = "f7"
 73 |     F8 = "f8"
 74 |     F9 = "f9"
 75 |     F10 = "f10"
 76 |     F11 = "f11"
 77 |     F12 = "f12"
 78 | 
 79 |     @classmethod
 80 |     def from_string(cls, key: str) -> "Key | str":
 81 |         """Convert a string key name to a Key enum value.
 82 | 
 83 |         Args:
 84 |             key: String key name to convert
 85 | 
 86 |         Returns:
 87 |             Key enum value if the string matches a known key,
 88 |             otherwise returns the original string for single character keys
 89 |         """
 90 |         # Map common alternative names to enum values
 91 |         key_mapping = {
 92 |             "page_down": cls.PAGE_DOWN,
 93 |             "page down": cls.PAGE_DOWN,
 94 |             "pagedown": cls.PAGE_DOWN,
 95 |             "page_up": cls.PAGE_UP,
 96 |             "page up": cls.PAGE_UP,
 97 |             "pageup": cls.PAGE_UP,
 98 |             "return": cls.RETURN,
 99 |             "enter": cls.ENTER,
100 |             "escape": cls.ESCAPE,
101 |             "esc": cls.ESC,
102 |             "delete": cls.DELETE,
103 |             "del": cls.DELETE,
104 |             # Modifier key mappings
105 |             "alt": cls.ALT,
106 |             "ctrl": cls.CTRL,
107 |             "control": cls.CTRL,
108 |             "shift": cls.SHIFT,
109 |             "win": cls.WIN,
110 |             "windows": cls.WIN,
111 |             "super": cls.WIN,
112 |             "command": cls.COMMAND,
113 |             "cmd": cls.COMMAND,
114 |             "⌘": cls.COMMAND,
115 |             "option": cls.OPTION,
116 |             "⌥": cls.OPTION,
117 |         }
118 | 
119 |         normalized = key.lower().strip()
120 |         return key_mapping.get(normalized, key)
121 | 
122 | 
123 | # Combined key type
124 | KeyType = Union[Key, NavigationKey, SpecialKey, ModifierKey, FunctionKey, str]
125 | 
126 | # Key type for mouse actions
127 | MouseButton = Literal["left", "right", "middle"]
128 | 
129 | 
130 | class AccessibilityWindow(TypedDict):
131 |     """Information about a window in the accessibility tree."""
132 | 
133 |     app_name: str
134 |     pid: int
135 |     frontmost: bool
136 |     has_windows: bool
137 |     windows: List[Dict[str, Any]]
138 | 
139 | 
140 | class AccessibilityTree(TypedDict):
141 |     """Complete accessibility tree information."""
142 | 
143 |     success: bool
144 |     frontmost_application: str
145 |     windows: List[AccessibilityWindow]
146 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/callbacks/image_retention.py:
--------------------------------------------------------------------------------

```python
 1 | """
 2 | Image retention callback handler that limits the number of recent images in message history.
 3 | """
 4 | 
 5 | from typing import Any, Dict, List, Optional
 6 | 
 7 | from .base import AsyncCallbackHandler
 8 | 
 9 | 
10 | class ImageRetentionCallback(AsyncCallbackHandler):
11 |     """
12 |     Callback handler that applies image retention policy to limit the number
13 |     of recent images in message history to prevent context window overflow.
14 |     """
15 | 
16 |     def __init__(self, only_n_most_recent_images: Optional[int] = None):
17 |         """
18 |         Initialize the image retention callback.
19 | 
20 |         Args:
21 |             only_n_most_recent_images: If set, only keep the N most recent images in message history
22 |         """
23 |         self.only_n_most_recent_images = only_n_most_recent_images
24 | 
25 |     async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
26 |         """
27 |         Apply image retention policy to messages before sending to agent loop.
28 | 
29 |         Args:
30 |             messages: List of message dictionaries
31 | 
32 |         Returns:
33 |             List of messages with image retention policy applied
34 |         """
35 |         if self.only_n_most_recent_images is None:
36 |             return messages
37 | 
38 |         return self._apply_image_retention(messages)
39 | 
40 |     def _apply_image_retention(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
41 |         """Apply image retention policy to keep only the N most recent images.
42 | 
43 |         Removes computer_call_output items with image_url and their corresponding computer_call items,
44 |         keeping only the most recent N image pairs based on only_n_most_recent_images setting.
45 | 
46 |         Args:
47 |             messages: List of message dictionaries
48 | 
49 |         Returns:
50 |             Filtered list of messages with image retention applied
51 |         """
52 |         if self.only_n_most_recent_images is None:
53 |             return messages
54 | 
55 |         # Gather indices of all computer_call_output messages that contain an image_url
56 |         output_indices: List[int] = []
57 |         for idx, msg in enumerate(messages):
58 |             if msg.get("type") == "computer_call_output":
59 |                 out = msg.get("output")
60 |                 if isinstance(out, dict) and ("image_url" in out):
61 |                     output_indices.append(idx)
62 | 
63 |         # Nothing to trim
64 |         if len(output_indices) <= self.only_n_most_recent_images:
65 |             return messages
66 | 
67 |         # Determine which outputs to keep (most recent N)
68 |         keep_output_indices = set(output_indices[-self.only_n_most_recent_images :])
69 | 
70 |         # Build set of indices to remove in one pass
71 |         to_remove: set[int] = set()
72 | 
73 |         for idx in output_indices:
74 |             if idx in keep_output_indices:
75 |                 continue  # keep this screenshot and its context
76 | 
77 |             to_remove.add(idx)  # remove the computer_call_output itself
78 | 
79 |             # Remove the immediately preceding computer_call with matching call_id (if present)
80 |             call_id = messages[idx].get("call_id")
81 |             prev_idx = idx - 1
82 |             if (
83 |                 prev_idx >= 0
84 |                 and messages[prev_idx].get("type") == "computer_call"
85 |                 and messages[prev_idx].get("call_id") == call_id
86 |             ):
87 |                 to_remove.add(prev_idx)
88 |                 # Check a single reasoning immediately before that computer_call
89 |                 r_idx = prev_idx - 1
90 |                 if r_idx >= 0 and messages[r_idx].get("type") == "reasoning":
91 |                     to_remove.add(r_idx)
92 | 
93 |         # Construct filtered list
94 |         filtered = [m for i, m in enumerate(messages) if i not in to_remove]
95 |         return filtered
96 | 
```

--------------------------------------------------------------------------------
/libs/python/computer-server/computer_server/handlers/factory.py:
--------------------------------------------------------------------------------

```python
  1 | import platform
  2 | import subprocess
  3 | from typing import Tuple, Type
  4 | 
  5 | from computer_server.diorama.base import BaseDioramaHandler
  6 | 
  7 | from .base import (
  8 |     BaseAccessibilityHandler,
  9 |     BaseAutomationHandler,
 10 |     BaseDesktopHandler,
 11 |     BaseFileHandler,
 12 |     BaseWindowHandler,
 13 | )
 14 | 
 15 | # Conditionally import platform-specific handlers
 16 | system = platform.system().lower()
 17 | if system == "darwin":
 18 |     from computer_server.diorama.macos import MacOSDioramaHandler
 19 | 
 20 |     from .macos import MacOSAccessibilityHandler, MacOSAutomationHandler
 21 | elif system == "linux":
 22 |     from .linux import LinuxAccessibilityHandler, LinuxAutomationHandler
 23 | elif system == "windows":
 24 |     from .windows import WindowsAccessibilityHandler, WindowsAutomationHandler
 25 | 
 26 | from .generic import GenericDesktopHandler, GenericFileHandler, GenericWindowHandler
 27 | 
 28 | 
 29 | class HandlerFactory:
 30 |     """Factory for creating OS-specific handlers."""
 31 | 
 32 |     @staticmethod
 33 |     def _get_current_os() -> str:
 34 |         """Determine the current OS.
 35 | 
 36 |         Returns:
 37 |             str: The OS type ('darwin' for macOS, 'linux' for Linux, or 'windows' for Windows)
 38 | 
 39 |         Raises:
 40 |             RuntimeError: If unable to determine the current OS
 41 |         """
 42 |         try:
 43 |             # Use platform.system() as primary method
 44 |             system = platform.system().lower()
 45 |             if system in ["darwin", "linux", "windows"]:
 46 |                 return system
 47 | 
 48 |             # Fallback to uname if platform.system() doesn't return expected values (Unix-like systems only)
 49 |             result = subprocess.run(["uname", "-s"], capture_output=True, text=True)
 50 |             if result.returncode == 0:
 51 |                 return result.stdout.strip().lower()
 52 | 
 53 |             raise RuntimeError(f"Unsupported OS: {system}")
 54 |         except Exception as e:
 55 |             raise RuntimeError(f"Failed to determine current OS: {str(e)}")
 56 | 
 57 |     @staticmethod
 58 |     def create_handlers() -> Tuple[
 59 |         BaseAccessibilityHandler,
 60 |         BaseAutomationHandler,
 61 |         BaseDioramaHandler,
 62 |         BaseFileHandler,
 63 |         BaseDesktopHandler,
 64 |         BaseWindowHandler,
 65 |     ]:
 66 |         """Create and return appropriate handlers for the current OS.
 67 | 
 68 |         Returns:
 69 |             Tuple[BaseAccessibilityHandler, BaseAutomationHandler, BaseDioramaHandler, BaseFileHandler]: A tuple containing
 70 |             the appropriate accessibility, automation, diorama, and file handlers for the current OS.
 71 | 
 72 |         Raises:
 73 |             NotImplementedError: If the current OS is not supported
 74 |             RuntimeError: If unable to determine the current OS
 75 |         """
 76 |         os_type = HandlerFactory._get_current_os()
 77 | 
 78 |         if os_type == "darwin":
 79 |             return (
 80 |                 MacOSAccessibilityHandler(),
 81 |                 MacOSAutomationHandler(),
 82 |                 MacOSDioramaHandler(),
 83 |                 GenericFileHandler(),
 84 |                 GenericDesktopHandler(),
 85 |                 GenericWindowHandler(),
 86 |             )
 87 |         elif os_type == "linux":
 88 |             return (
 89 |                 LinuxAccessibilityHandler(),
 90 |                 LinuxAutomationHandler(),
 91 |                 BaseDioramaHandler(),
 92 |                 GenericFileHandler(),
 93 |                 GenericDesktopHandler(),
 94 |                 GenericWindowHandler(),
 95 |             )
 96 |         elif os_type == "windows":
 97 |             return (
 98 |                 WindowsAccessibilityHandler(),
 99 |                 WindowsAutomationHandler(),
100 |                 BaseDioramaHandler(),
101 |                 GenericFileHandler(),
102 |                 GenericDesktopHandler(),
103 |                 GenericWindowHandler(),
104 |             )
105 |         else:
106 |             raise NotImplementedError(f"OS '{os_type}' is not supported")
107 | 
```

--------------------------------------------------------------------------------
/libs/typescript/computer/src/computer/providers/cloud.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import pino from 'pino';
  2 | import { type BaseComputerInterface, InterfaceFactory } from '../../interface/index';
  3 | import type { CloudComputerConfig, VMProviderType } from '../types';
  4 | import { BaseComputer } from './base';
  5 | 
  6 | const DEFAULT_API_BASE = process.env.CUA_API_BASE || 'https://api.cua.ai';
  7 | 
  8 | interface VMInfo {
  9 |   name: string;
 10 |   host?: string;
 11 |   status?: string;
 12 | }
 13 | 
 14 | /**
 15 |  * Cloud-specific computer implementation
 16 |  */
 17 | export class CloudComputer extends BaseComputer {
 18 |   protected static vmProviderType: VMProviderType.CLOUD;
 19 |   protected apiKey: string;
 20 |   private iface?: BaseComputerInterface;
 21 |   private initialized = false;
 22 |   private cachedHost?: string;
 23 |   private apiBase: string;
 24 | 
 25 |   protected logger = pino({ name: 'computer.provider_cloud' });
 26 | 
 27 |   constructor(config: CloudComputerConfig) {
 28 |     super(config);
 29 |     this.apiKey = config.apiKey;
 30 |     this.apiBase = DEFAULT_API_BASE;
 31 |   }
 32 | 
 33 |   /**
 34 |    * Get the host for this VM.
 35 |    * Returns cached host if available, otherwise falls back to default format.
 36 |    */
 37 |   get ip(): string {
 38 |     return this.cachedHost || `${this.name}.sandbox.cua.ai`;
 39 |   }
 40 | 
 41 |   /**
 42 |    * Fetch VM list from API and cache the host for this VM.
 43 |    */
 44 |   private async fetchAndCacheHost(): Promise<string> {
 45 |     try {
 46 |       const response = await fetch(`${this.apiBase}/v1/vms`, {
 47 |         headers: {
 48 |           Authorization: `Bearer ${this.apiKey}`,
 49 |           Accept: 'application/json',
 50 |         },
 51 |       });
 52 | 
 53 |       if (response.ok) {
 54 |         const vms = (await response.json()) as VMInfo[];
 55 |         const vm = vms.find((v) => v.name === this.name);
 56 |         if (vm?.host) {
 57 |           this.cachedHost = vm.host;
 58 |           this.logger.info(`Cached host from API: ${this.cachedHost}`);
 59 |           return this.cachedHost;
 60 |         }
 61 |       }
 62 |     } catch (error) {
 63 |       this.logger.warn(`Failed to fetch VM list for host lookup: ${error}`);
 64 |     }
 65 | 
 66 |     // Fall back to default format
 67 |     const fallbackHost = `${this.name}.sandbox.cua.ai`;
 68 |     this.cachedHost = fallbackHost;
 69 |     this.logger.info(`Using fallback host: ${fallbackHost}`);
 70 |     return fallbackHost;
 71 |   }
 72 | 
 73 |   /**
 74 |    * Initialize the cloud VM and interface
 75 |    */
 76 |   async run(): Promise<void> {
 77 |     if (this.initialized) {
 78 |       this.logger.info('Computer already initialized, skipping initialization');
 79 |       return;
 80 |     }
 81 | 
 82 |     try {
 83 |       // Fetch the host from API before connecting
 84 |       const ipAddress = await this.fetchAndCacheHost();
 85 |       this.logger.info(`Connecting to cloud VM at ${ipAddress}`);
 86 | 
 87 |       // Create the interface with API key authentication
 88 |       this.iface = InterfaceFactory.createInterfaceForOS(
 89 |         this.osType,
 90 |         ipAddress,
 91 |         this.apiKey,
 92 |         this.name
 93 |       );
 94 | 
 95 |       // Wait for the interface to be ready
 96 |       this.logger.info('Waiting for interface to be ready...');
 97 |       await this.iface.waitForReady();
 98 | 
 99 |       this.initialized = true;
100 |       this.logger.info('Cloud computer ready');
101 |     } catch (error) {
102 |       this.logger.error(`Failed to initialize cloud computer: ${error}`);
103 |       throw new Error(`Failed to initialize cloud computer: ${error}`);
104 |     }
105 |   }
106 | 
107 |   /**
108 |    * Stop the cloud computer (disconnect interface)
109 |    */
110 |   async stop(): Promise<void> {
111 |     this.logger.info('Disconnecting from cloud computer...');
112 | 
113 |     if (this.iface) {
114 |       this.iface.disconnect();
115 |       this.iface = undefined;
116 |     }
117 | 
118 |     this.initialized = false;
119 |     this.logger.info('Disconnected from cloud computer');
120 |   }
121 | 
122 |   /**
123 |    * Get the computer interface
124 |    */
125 |   get interface(): BaseComputerInterface {
126 |     if (!this.iface) {
127 |       throw new Error('Computer not initialized. Call run() first.');
128 |     }
129 |     return this.iface;
130 |   }
131 | 
132 |   /**
133 |    * Disconnect from the cloud computer
134 |    */
135 |   async disconnect(): Promise<void> {
136 |     await this.stop();
137 |   }
138 | }
139 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/migration-guide.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Migration Guide
  3 | ---
  4 | 
  5 | This guide lists **breaking changes** when migrating from the original `ComputerAgent` (v0.3.x) to the rewritten `ComputerAgent` (v0.4.x) and shows old vs new usage for all four agent loops.
  6 | 
  7 | ## Breaking Changes
  8 | 
  9 | - **Initialization:**
 10 |   - `ComputerAgent` (v0.4.x) uses `model` as a string (e.g. "anthropic/claude-sonnet-4-5-20250929") instead of `LLM` and `AgentLoop` objects.
 11 |   - `tools` is a list (can include multiple computers and decorated functions).
 12 |   - `callbacks` are now first-class for extensibility (image retention, budget, trajectory, logging, etc).
 13 | - **No explicit `loop` parameter:**
 14 |   - Loop is inferred from the `model` string (e.g. `anthropic/`, `openai/`, `omniparser+`, `ui-tars`).
 15 | - **No explicit `computer` parameter:**
 16 |   - Computers are added to `tools` list.
 17 | 
 18 | ---
 19 | 
 20 | ## Usage Examples: Old vs New
 21 | 
 22 | ### 1. Anthropic Loop
 23 | 
 24 | **Old:**
 25 | 
 26 | ```python
 27 | async with Computer() as computer:
 28 |     agent = ComputerAgent(
 29 |         computer=computer,
 30 |         loop=AgentLoop.ANTHROPIC,
 31 |         model=LLM(provider=LLMProvider.ANTHROPIC)
 32 |     )
 33 |     async for result in agent.run("Take a screenshot"):
 34 |         print(result)
 35 | ```
 36 | 
 37 | **New:**
 38 | 
 39 | ```python
 40 | async with Computer() as computer:
 41 |     agent = ComputerAgent(
 42 |         model="anthropic/claude-sonnet-4-5-20250929",
 43 |         tools=[computer]
 44 |     )
 45 |     messages = [{"role": "user", "content": "Take a screenshot"}]
 46 |     async for result in agent.run(messages):
 47 |         for item in result["output"]:
 48 |             if item["type"] == "message":
 49 |                 print(item["content"][0]["text"])
 50 | ```
 51 | 
 52 | ### 2. OpenAI Loop
 53 | 
 54 | **Old:**
 55 | 
 56 | ```python
 57 | async with Computer() as computer:
 58 |     agent = ComputerAgent(
 59 |         computer=computer,
 60 |         loop=AgentLoop.OPENAI,
 61 |         model=LLM(provider=LLMProvider.OPENAI)
 62 |     )
 63 |     async for result in agent.run("Take a screenshot"):
 64 |         print(result)
 65 | ```
 66 | 
 67 | **New:**
 68 | 
 69 | ```python
 70 | async with Computer() as computer:
 71 |     agent = ComputerAgent(
 72 |         model="openai/computer-use-preview",
 73 |         tools=[computer]
 74 |     )
 75 |     messages = [{"role": "user", "content": "Take a screenshot"}]
 76 |     async for result in agent.run(messages):
 77 |         for item in result["output"]:
 78 |             if item["type"] == "message":
 79 |                 print(item["content"][0]["text"])
 80 | ```
 81 | 
 82 | ### 3. UI-TARS Loop
 83 | 
 84 | **Old:**
 85 | 
 86 | ```python
 87 | async with Computer() as computer:
 88 |     agent = ComputerAgent(
 89 |         computer=computer,
 90 |         loop=AgentLoop.UITARS,
 91 |         model=LLM(provider=LLMProvider.OAICOMPAT, name="ByteDance-Seed/UI-TARS-1.5-7B", provider_base_url="https://.../v1")
 92 |     )
 93 |     async for result in agent.run("Take a screenshot"):
 94 |         print(result)
 95 | ```
 96 | 
 97 | **New:**
 98 | 
 99 | ```python
100 | async with Computer() as computer:
101 |     agent = ComputerAgent(
102 |         model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B",
103 |         tools=[computer]
104 |     )
105 |     messages = [{"role": "user", "content": "Take a screenshot"}]
106 |     async for result in agent.run(messages):
107 |         for item in result["output"]:
108 |             if item["type"] == "message":
109 |                 print(item["content"][0]["text"])
110 | ```
111 | 
112 | ### 4. Omni Loop
113 | 
114 | **Old:**
115 | 
116 | ```python
117 | async with Computer() as computer:
118 |     agent = ComputerAgent(
119 |         computer=computer,
120 |         loop=AgentLoop.OMNI,
121 |         model=LLM(provider=LLMProvider.OLLAMA, name="gemma3")
122 |     )
123 |     async for result in agent.run("Take a screenshot"):
124 |         print(result)
125 | ```
126 | 
127 | **New:**
128 | 
129 | ```python
130 | async with Computer() as computer:
131 |     agent = ComputerAgent(
132 |         model="omniparser+ollama_chat/gemma3",
133 |         tools=[computer]
134 |     )
135 |     messages = [{"role": "user", "content": "Take a screenshot"}]
136 |     async for result in agent.run(messages):
137 |         for item in result["output"]:
138 |             if item["type"] == "message":
139 |                 print(item["content"][0]["text"])
140 | ```
141 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/macos-vm-cli-playbook/lume/faq.md:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: FAQ
  3 | ---
  4 | 
  5 | ### Where are the VMs stored?
  6 | 
  7 | VMs are stored in `~/.lume` by default. You can configure additional storage locations using the `lume config` command.
  8 | 
  9 | ### How are images cached?
 10 | 
 11 | Images are cached in `~/.lume/cache`. When doing `lume pull <image>`, it will check if the image is already cached. If not, it will download the image and cache it, removing any older versions.
 12 | 
 13 | ### Where is the configuration file stored?
 14 | 
 15 | Lume follows the XDG Base Directory specification for the configuration file:
 16 | 
 17 | - Configuration is stored in `$XDG_CONFIG_HOME/lume/config.yaml` (defaults to `~/.config/lume/config.yaml`)
 18 | 
 19 | By default, other data is stored in:
 20 | 
 21 | - VM data: `~/.lume`
 22 | - Cache files: `~/.lume/cache`
 23 | 
 24 | The config file contains settings for:
 25 | 
 26 | - VM storage locations and the default location
 27 | - Cache directory location
 28 | - Whether caching is enabled
 29 | 
 30 | You can view and modify these settings using the `lume config` commands:
 31 | 
 32 | ```bash
 33 | # View current configuration
 34 | lume config get
 35 | 
 36 | # Manage VM storage locations
 37 | lume config storage list                 # List all VM storage locations
 38 | lume config storage add <name> <path>    # Add a new VM storage location
 39 | lume config storage remove <name>        # Remove a VM storage location
 40 | lume config storage default <name>       # Set the default VM storage location
 41 | 
 42 | # Manage cache settings
 43 | lume config cache get                    # Get current cache directory
 44 | lume config cache set <path>             # Set cache directory
 45 | 
 46 | # Manage image caching settings
 47 | lume config caching get                  # Show current caching status
 48 | lume config caching set <boolean>        # Enable or disable image caching
 49 | ```
 50 | 
 51 | ### How do I use multiple VM storage locations?
 52 | 
 53 | Lume supports storing VMs in different locations (e.g., internal drive, external SSD). After configuring storage locations, you can specify which location to use with the `--storage` parameter in various commands:
 54 | 
 55 | ```bash
 56 | # Create a VM in a specific storage location
 57 | lume create my-vm --os macos --ipsw latest --storage ssd
 58 | 
 59 | # Run a VM from a specific storage location
 60 | lume run my-vm --storage ssd
 61 | 
 62 | # Delete a VM from a specific storage location
 63 | lume delete my-vm --storage ssd
 64 | 
 65 | # Pull an image to a specific storage location
 66 | lume pull macos-sequoia-vanilla:latest --name my-vm --storage ssd
 67 | 
 68 | # Clone a VM between storage locations
 69 | lume clone source-vm cloned-vm --source-storage default --dest-storage ssd
 70 | ```
 71 | 
 72 | If you don't specify a storage location, Lume will use the default one or search across all configured locations.
 73 | 
 74 | ### Are VM disks taking up all the disk space?
 75 | 
 76 | No, macOS uses sparse files, which only allocate space as needed. For example, VM disks totaling 50 GB may only use 20 GB on disk.
 77 | 
 78 | ### How do I get the latest macOS restore image URL?
 79 | 
 80 | ```bash
 81 | lume ipsw
 82 | ```
 83 | 
 84 | ### How do I delete a VM?
 85 | 
 86 | ```bash
 87 | lume delete <name>
 88 | ```
 89 | 
 90 | ### How to Install macOS from an IPSW Image
 91 | 
 92 | #### Create a new macOS VM using the latest supported IPSW image:
 93 | 
 94 | Run the following command to create a new macOS virtual machine using the latest available IPSW image:
 95 | 
 96 | ```bash
 97 | lume create <name> --os macos --ipsw latest
 98 | ```
 99 | 
100 | #### Create a new macOS VM using a specific IPSW image:
101 | 
102 | To create a macOS virtual machine from an older or specific IPSW file, first download the desired IPSW (UniversalMac) from a trusted source.
103 | 
104 | Then, use the downloaded IPSW path:
105 | 
106 | ```bash
107 | lume create <name> --os macos --ipsw <downloaded_ipsw_path>
108 | ```
109 | 
110 | ### How do I install a custom Linux image?
111 | 
112 | The process for creating a custom Linux image differs than macOS, with IPSW restore files not being used. You need to create a linux VM first, then mount a setup image file to the VM for the first boot.
113 | 
114 | ```bash
115 | lume create <name> --os linux
116 | 
117 | lume run <name> --mount <path-to-setup-image>
118 | 
119 | lume run <name>
120 | ```
121 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/agent-sdk/mcp-server/client-integrations.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Client Integrations
  3 | ---
  4 | 
  5 | ## Claude Desktop Integration
  6 | 
  7 | To use with Claude Desktop, add an entry to your Claude Desktop configuration (`claude_desktop_config.json`, typically found in `~/.config/claude-desktop/`):
  8 | 
  9 | ### Package Installation Method
 10 | 
 11 | ```json
 12 | {
 13 |   "mcpServers": {
 14 |     "cua-agent": {
 15 |       "command": "/bin/bash",
 16 |       "args": ["~/.cua/start_mcp_server.sh"],
 17 |       "env": {
 18 |         "CUA_MODEL_NAME": "anthropic/claude-sonnet-4-20250514",
 19 |         "ANTHROPIC_API_KEY": "your-anthropic-api-key-here",
 20 |         "CUA_MAX_IMAGES": "3",
 21 |         "CUA_USE_HOST_COMPUTER_SERVER": "false"
 22 |       }
 23 |     }
 24 |   }
 25 | }
 26 | ```
 27 | 
 28 | ### Development Method
 29 | 
 30 | If you're working with the CUA source code:
 31 | 
 32 | **Standard VM Mode:**
 33 | 
 34 | ```json
 35 | {
 36 |   "mcpServers": {
 37 |     "cua-agent": {
 38 |       "command": "/usr/bin/env",
 39 |       "args": [
 40 |         "bash",
 41 |         "-lc",
 42 |         "export CUA_MODEL_NAME='anthropic/claude-sonnet-4-20250514'; export ANTHROPIC_API_KEY='your-anthropic-api-key-here'; /path/to/cua/libs/python/mcp-server/scripts/start_mcp_server.sh"
 43 |       ]
 44 |     }
 45 |   }
 46 | }
 47 | ```
 48 | 
 49 | **Host Computer Control Mode:**
 50 | 
 51 | ```json
 52 | {
 53 |   "mcpServers": {
 54 |     "cua-agent": {
 55 |       "command": "/usr/bin/env",
 56 |       "args": [
 57 |         "bash",
 58 |         "-lc",
 59 |         "export CUA_MODEL_NAME='anthropic/claude-sonnet-4-20250514'; export ANTHROPIC_API_KEY='your-anthropic-api-key-here'; export CUA_USE_HOST_COMPUTER_SERVER='true'; export CUA_MAX_IMAGES='1'; /path/to/cua/libs/python/mcp-server/scripts/start_mcp_server.sh"
 60 |       ]
 61 |     }
 62 |   }
 63 | }
 64 | ```
 65 | 
 66 | **Note**: Replace `/path/to/cua` with the absolute path to your CUA repository directory.
 67 | 
 68 | **⚠️ Host Computer Control Setup**: When using `CUA_USE_HOST_COMPUTER_SERVER='true'`, you must also:
 69 | 
 70 | 1. Install computer server dependencies: `python3 -m pip install uvicorn fastapi`
 71 | 2. Install the computer server: `python3 -m pip install -e libs/python/computer-server --break-system-packages`
 72 | 3. Start the computer server: `python -m computer_server --log-level debug`
 73 | 4. The AI will have direct access to your desktop - use with caution!
 74 | 
 75 | For more information on MCP with Claude Desktop, see the [official MCP User Guide](https://modelcontextprotocol.io/quickstart/user).
 76 | 
 77 | ## Cursor Integration
 78 | 
 79 | To use with Cursor, add an MCP configuration file in one of these locations:
 80 | 
 81 | - **Project-specific**: Create `.cursor/mcp.json` in your project directory
 82 | - **Global**: Create `~/.cursor/mcp.json` in your home directory
 83 | 
 84 | Example configuration for Cursor:
 85 | 
 86 | ```json
 87 | {
 88 |   "mcpServers": {
 89 |     "cua-agent": {
 90 |       "command": "/bin/bash",
 91 |       "args": ["~/.cua/start_mcp_server.sh"],
 92 |       "env": {
 93 |         "CUA_MODEL_NAME": "anthropic/claude-sonnet-4-20250514",
 94 |         "ANTHROPIC_API_KEY": "your-anthropic-api-key-here"
 95 |       }
 96 |     }
 97 |   }
 98 | }
 99 | ```
100 | 
101 | After configuration, you can simply tell Cursor's Agent to perform computer tasks by explicitly mentioning the CUA agent, such as "Use the computer control tools to open Safari."
102 | 
103 | For more information on MCP with Cursor, see the [official Cursor MCP documentation](https://docs.cursor.com/context/model-context-protocol).
104 | 
105 | ## Other MCP Clients
106 | 
107 | The MCP server is compatible with any MCP-compliant client. The server exposes the following tools:
108 | 
109 | - `run_cua_task` - Execute single computer tasks
110 | - `run_multi_cua_tasks` - Execute multiple tasks (sequential or concurrent)
111 | - `screenshot_cua` - Capture screenshots
112 | - `get_session_stats` - Monitor session statistics
113 | - `cleanup_session` - Manage session lifecycle
114 | 
115 | ### Configuration Options
116 | 
117 | All MCP clients can configure the server using environment variables:
118 | 
119 | - `CUA_MODEL_NAME` - Model to use for task execution
120 | - `CUA_MAX_IMAGES` - Maximum images to keep in context
121 | - `CUA_USE_HOST_COMPUTER_SERVER` - Use host system instead of VM
122 | 
123 | See the [Configuration](/docs/libraries/mcp-server/configuration) page for detailed configuration options.
124 | 
```

--------------------------------------------------------------------------------
/libs/qemu-docker/linux/src/vm/setup/setup-cua-server.sh:
--------------------------------------------------------------------------------

```bash
  1 | #!/bin/bash
  2 | # Setup CUA Computer Server on Linux
  3 | # Creates a system-level systemd service to run computer server in background
  4 | 
  5 | set -e
  6 | 
  7 | USER_NAME="docker"
  8 | USER_HOME="/home/$USER_NAME"
  9 | SCRIPT_DIR="/opt/oem"
 10 | CUA_DIR="/opt/cua-server"
 11 | VENV_DIR="$CUA_DIR/venv"
 12 | SERVICE_NAME="cua-computer-server"
 13 | LOG_FILE="$SCRIPT_DIR/setup.log"
 14 | 
 15 | log() {
 16 |     echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
 17 | }
 18 | 
 19 | log "=== Installing CUA Computer Server ==="
 20 | 
 21 | # Install Python 3 and venv
 22 | log "Installing Python 3 and dependencies..."
 23 | sudo apt-get install -y python3 python3-venv python3-pip python3-tk python3-dev
 24 | 
 25 | # Create CUA directory
 26 | log "Creating CUA directory at $CUA_DIR..."
 27 | sudo mkdir -p "$CUA_DIR"
 28 | sudo chown "$USER_NAME:$USER_NAME" "$CUA_DIR"
 29 | 
 30 | # Create virtual environment
 31 | if [ -f "$VENV_DIR/bin/python" ]; then
 32 |     log "Existing venv detected; skipping creation"
 33 | else
 34 |     log "Creating Python virtual environment at $VENV_DIR..."
 35 |     python3 -m venv "$VENV_DIR"
 36 |     log "Virtual environment created successfully"
 37 | fi
 38 | 
 39 | # Activate and install packages
 40 | log "Upgrading pip, setuptools, and wheel..."
 41 | "$VENV_DIR/bin/pip" install --upgrade pip setuptools wheel
 42 | 
 43 | log "Installing cua-computer-server..."
 44 | "$VENV_DIR/bin/pip" install --upgrade cua-computer-server
 45 | log "cua-computer-server installed successfully"
 46 | 
 47 | # Open firewall for port 5000 (if ufw is available)
 48 | if command -v ufw &> /dev/null; then
 49 |     log "Opening firewall for port 5000..."
 50 |     sudo ufw allow 5000/tcp || true
 51 |     log "Firewall rule added"
 52 | fi
 53 | 
 54 | # Create start script with auto-restart
 55 | START_SCRIPT="$CUA_DIR/start-server.sh"
 56 | log "Creating start script at $START_SCRIPT..."
 57 | 
 58 | cat > "$START_SCRIPT" << 'EOF'
 59 | #!/bin/bash
 60 | # CUA Computer Server Start Script with auto-restart
 61 | 
 62 | CUA_DIR="/opt/cua-server"
 63 | VENV_DIR="$CUA_DIR/venv"
 64 | LOG_FILE="$CUA_DIR/server.log"
 65 | 
 66 | start_server() {
 67 |     echo "$(date '+%Y-%m-%d %H:%M:%S') Updating cua-computer-server..." >> "$LOG_FILE"
 68 |     "$VENV_DIR/bin/pip" install --upgrade cua-computer-server >> "$LOG_FILE" 2>&1
 69 | 
 70 |     echo "$(date '+%Y-%m-%d %H:%M:%S') Starting CUA Computer Server on port 5000..." >> "$LOG_FILE"
 71 |     "$VENV_DIR/bin/python" -m computer_server --port 5000 >> "$LOG_FILE" 2>&1
 72 |     return $?
 73 | }
 74 | 
 75 | while true; do
 76 |     start_server
 77 |     EXIT_CODE=$?
 78 |     echo "$(date '+%Y-%m-%d %H:%M:%S') Server exited with code: $EXIT_CODE. Restarting in 5s..." >> "$LOG_FILE"
 79 |     sleep 5
 80 | done
 81 | EOF
 82 | 
 83 | chmod +x "$START_SCRIPT"
 84 | log "Start script created"
 85 | 
 86 | # Create xhost script for X11 access
 87 | log "Creating xhost script..."
 88 | sudo tee /etc/X11/Xsession.d/99xauth > /dev/null << 'EOF'
 89 | #!/bin/sh
 90 | # Grant local X11 access for CUA Computer Server
 91 | export DISPLAY=:0
 92 | xhost +local: 2>/dev/null || true
 93 | EOF
 94 | sudo chmod +x /etc/X11/Xsession.d/99xauth
 95 | log "X11 access script created"
 96 | 
 97 | # Create system-level systemd service
 98 | log "Creating systemd system service..."
 99 | 
100 | sudo tee /etc/systemd/system/$SERVICE_NAME.service > /dev/null << EOF
101 | [Unit]
102 | Description=CUA Computer Server
103 | After=graphical.target
104 | 
105 | [Service]
106 | Type=simple
107 | ExecStart=$START_SCRIPT
108 | Restart=always
109 | RestartSec=5
110 | Environment=PYTHONUNBUFFERED=1
111 | Environment=DISPLAY=:0
112 | Environment=XAUTHORITY=$USER_HOME/.Xauthority
113 | User=$USER_NAME
114 | WorkingDirectory=$CUA_DIR
115 | 
116 | [Install]
117 | WantedBy=graphical.target
118 | EOF
119 | 
120 | log "Systemd service created at /etc/systemd/system/$SERVICE_NAME.service"
121 | 
122 | # Ensure proper ownership of CUA directory
123 | log "Setting ownership of $CUA_DIR to $USER_NAME..."
124 | sudo chown -R "$USER_NAME:$USER_NAME" "$CUA_DIR"
125 | 
126 | # Enable and start the service
127 | log "Enabling systemd service..."
128 | sudo systemctl daemon-reload
129 | sudo systemctl enable "$SERVICE_NAME.service"
130 | 
131 | log "Starting CUA Computer Server service..."
132 | sudo systemctl start "$SERVICE_NAME.service" || true
133 | 
134 | log "=== CUA Computer Server setup completed ==="
135 | log "Service status: $(sudo systemctl is-active $SERVICE_NAME.service 2>/dev/null || echo 'unknown')"
136 | 
```

--------------------------------------------------------------------------------
/blog/cua-playground-preview.md:
--------------------------------------------------------------------------------

```markdown
 1 | # Cua Playground: Agents + Sandboxes in Your Browser
 2 | 
 3 | Building computer-use agents means constant iteration—writing code, deploying to a sandbox, testing behavior, debugging issues, then repeating the cycle. Every test requires switching between your code editor, terminal, and VNC viewer. Want to try a different prompt? Edit your code, redeploy, and wait for the agent to restart. It works, but it's slow.
 4 | 
 5 | Today we're launching the **Cua Playground**: a browser-based environment for testing computer-use agents without writing code. Send messages to your sandboxes, watch them execute in real-time, and iterate on prompts instantly—all from your dashboard at cua.ai.
 6 | 
 7 | ![Cua Playground](https://github.com/user-attachments/assets/af1071ba-3df3-4e4b-aafb-df8c3d00b0a5)
 8 | 
 9 | **What's new with this release:**
10 | 
11 | - Instant testing—send messages to any running sandbox directly from your browser
12 | - Real-time execution—watch your agent work with live tool call updates and screenshots
13 | - Multi-model support—test with Claude Sonnet 4.5, Haiku 4.5, and more
14 | - Persistent chat history—conversations save automatically to local storage
15 | 
16 | The Playground connects to your existing Cua sandboxes—the same ones you use with the Agent SDK. Select a running sandbox and a model, then start chatting. The agent uses computer-use tools (mouse, keyboard, bash, editor) to complete your tasks, and you see every action it takes.
17 | 
18 | ## Getting Started Today
19 | 
20 | <div align="center">
21 |   <video src="https://github.com/user-attachments/assets/9fef0f30-1024-4833-8b7a-6a2c02d8eb99" width="600" controls></video>
22 | </div>
23 | 
24 | Sign up at [cua.ai/signin](https://cua.ai/signin) and grab your API key from the dashboard. Then navigate to the Playground:
25 | 
26 | 1. Navigate to Dashboard > Playground
27 | 2. Select a sandbox from the dropdown (must be "running" status)
28 | 3. Choose a model (we recommend Claude Sonnet 4.5 to start)
29 | 4. Send a message: "Take a screenshot and describe what you see"
30 | 5. Watch the agent execute computer actions in real-time
31 | 
32 | Example use cases:
33 | 
34 | **Prompt Testing**
35 | 
36 | ```
37 | ❌ "Check the website"
38 | ✅ "Navigate to example.com in Firefox and take a screenshot of the homepage"
39 | ```
40 | 
41 | **Model Comparison**
42 | Run the same task with different models to compare quality, speed, and cost.
43 | 
44 | **Debugging Agent Behavior**
45 | 
46 | 1. Send: "Find the login button and click it"
47 | 2. View tool calls to see each mouse movement
48 | 3. Check screenshots to verify the agent found the right element
49 | 4. Adjust your prompt based on what you observe
50 | 
51 | ## FAQs
52 | 
53 | <details>
54 | <summary><strong>Do I need to know how to code?</strong></summary>
55 | 
56 | No. The Playground is designed for testing agent behavior without writing code. However, for production deployments, you'll need to use the Agent SDK (Python/TypeScript).
57 | 
58 | </details>
59 | 
60 | <details>
61 | <summary><strong>Does this replace the Agent SDK?</strong></summary>
62 | 
63 | No. The Playground is for rapid testing and experimentation. For production deployments, scheduled tasks, or complex workflows, use the Agent SDK.
64 | 
65 | </details>
66 | 
67 | <details>
68 | <summary><strong>How much does it cost?</strong></summary>
69 | 
70 | Playground requests use the same credit system as Agent SDK requests. You're charged for model inference (varies by model) and sandbox runtime (billed per hour while running).
71 | 
72 | </details>
73 | 
74 | <details>
75 | <summary><strong>Why is my sandbox not showing up?</strong></summary>
76 | 
77 | The sandbox must have `status = "running"` to appear in the dropdown. Check Dashboard > Sandboxes to verify status. If stopped, click "Start" and wait ~30 seconds for it to become available.
78 | 
79 | </details>
80 | 
81 | ## Need help?
82 | 
83 | If you hit issues getting the Playground working, reach out in [Discord](https://discord.gg/cua-ai). We respond fast and fix based on what people actually use.
84 | 
85 | ---
86 | 
87 | Get started at [cua.ai](https://cua.ai) or try the Playground at [cua.ai/dashboard/playground](https://cua.ai/dashboard/playground).
88 | 
```

--------------------------------------------------------------------------------
/docs/content/docs/computer-sdk/custom-computer-handlers.mdx:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: Custom Computers
  3 | slug: custom-computer-handlers
  4 | ---
  5 | 
  6 | The Agent SDK supports defining custom computer handlers using a simple dictionary interface. This enables integration with custom automation backends, testing frameworks, or specialized computer control systems.
  7 | 
  8 | ## Example: Defining a Custom Computer Handler
  9 | 
 10 | ```python
 11 | import asyncio
 12 | from PIL import Image
 13 | 
 14 | # Define your custom computer functions
 15 | async def take_screenshot():
 16 |     """Your custom screenshot implementation"""
 17 |     # Return PIL Image, bytes, or base64 string
 18 |     return Image.new('RGB', (1920, 1080), color='white')
 19 | 
 20 | # Create dict-based computer handler - only 'screenshot' is required
 21 | custom_computer = {
 22 |     'screenshot': take_screenshot, # required
 23 | 
 24 |     # everything below is optional
 25 |     'environment': 'linux', # linux, mac, windows, browser
 26 |     'dimensions': (1920, 1080), # (width, height)
 27 |     'click': lambda x, y, button: print(f"Clicking at ({x}, {y}) with {button} button"),
 28 | }
 29 | ```
 30 | 
 31 | You can then use this as a tool for your agent:
 32 | 
 33 | ```python
 34 | from agent import ComputerAgent
 35 | 
 36 | agent = ComputerAgent(
 37 |     model="cua/anthropic/claude-sonnet-4.5",
 38 |     tools=[custom_computer],
 39 | )
 40 | 
 41 | # Agent will automatically convert dict to agent.computers.CustomComputerHandler
 42 | await agent.run("Take a screenshot and click at coordinates 100, 200")
 43 | ```
 44 | 
 45 | ## Class-Based Implementation
 46 | 
 47 | For more complex implementations, you can create a custom class by inheriting from `AsyncComputerHandler`:
 48 | 
 49 | ```python
 50 | from agent.computers import AsyncComputerHandler
 51 | from PIL import Image
 52 | from typing import Literal, List, Dict, Union, Optional
 53 | 
 54 | class MyCustomComputer(AsyncComputerHandler):
 55 |     """Custom computer handler implementation."""
 56 | 
 57 |     def __init__(self):
 58 |         # Initialize your custom computer interface here
 59 |         pass
 60 | 
 61 |     # ==== Computer-Use-Preview Action Space ====
 62 | 
 63 |     async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
 64 |         """Get the current environment type."""
 65 |         ...
 66 | 
 67 |     async def get_dimensions(self) -> tuple[int, int]:
 68 |         """Get screen dimensions as (width, height)."""
 69 |         ...
 70 | 
 71 |     async def screenshot(self) -> str:
 72 |         """Take a screenshot and return as base64 string."""
 73 |         ...
 74 | 
 75 |     async def click(self, x: int, y: int, button: str = "left") -> None:
 76 |         """Click at coordinates with specified button."""
 77 |         ...
 78 | 
 79 |     async def double_click(self, x: int, y: int) -> None:
 80 |         """Double click at coordinates."""
 81 |         ...
 82 | 
 83 |     async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
 84 |         """Scroll at coordinates with specified scroll amounts."""
 85 |         ...
 86 | 
 87 |     async def type(self, text: str) -> None:
 88 |         """Type text."""
 89 |         ...
 90 | 
 91 |     async def wait(self, ms: int = 1000) -> None:
 92 |         """Wait for specified milliseconds."""
 93 |         ...
 94 | 
 95 |     async def move(self, x: int, y: int) -> None:
 96 |         """Move cursor to coordinates."""
 97 |         ...
 98 | 
 99 |     async def keypress(self, keys: Union[List[str], str]) -> None:
100 |         """Press key combination."""
101 |         ...
102 | 
103 |     async def drag(self, path: List[Dict[str, int]]) -> None:
104 |         """Drag along specified path."""
105 |         ...
106 | 
107 |     async def get_current_url(self) -> str:
108 |         """Get current URL (for browser environments)."""
109 |         ...
110 | 
111 |     # ==== Anthropic Action Space ====
112 | 
113 |     async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
114 |         """Left mouse down at coordinates."""
115 |         ...
116 | 
117 |     async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
118 |         """Left mouse up at coordinates."""
119 |         ...
120 | 
121 | # Use with agent
122 | custom_computer = MyCustomComputer()
123 | 
124 | agent = ComputerAgent(
125 |     model="cua/anthropic/claude-sonnet-4.5",
126 |     tools=[custom_computer],
127 | )
128 | 
129 | await agent.run("Take a screenshot and click at coordinates 100, 200")
130 | ```
131 | 
```

--------------------------------------------------------------------------------
/scripts/run-docker-dev.sh:
--------------------------------------------------------------------------------

```bash
  1 | #!/bin/bash
  2 | 
  3 | # Colors for output
  4 | GREEN='\033[0;32m'
  5 | BLUE='\033[0;34m'
  6 | RED='\033[0;31m'
  7 | NC='\033[0m' # No Color
  8 | 
  9 | # Print with color
 10 | print_info() {
 11 |     echo -e "${BLUE}==> $1${NC}"
 12 | }
 13 | 
 14 | print_success() {
 15 |     echo -e "${GREEN}==> $1${NC}"
 16 | }
 17 | 
 18 | print_error() {
 19 |     echo -e "${RED}==> $1${NC}"
 20 | }
 21 | 
 22 | # Docker image name
 23 | IMAGE_NAME="cua-dev-image"
 24 | CONTAINER_NAME="cua-dev-container"
 25 | PLATFORM="linux/arm64"
 26 | 
 27 | # Detect platform based on architecture
 28 | arch=$(uname -m)
 29 | 
 30 | if [[ $arch == x86_64* ]]; then
 31 |     PLATFORM="linux/amd64"
 32 |     print_info "X64 Architecture detected, using platform: ${PLATFORM}"
 33 | elif [[ $arch == i*86 ]]; then
 34 |     PLATFORM="linux/386"
 35 |     print_info "X32 Architecture detected, using platform: ${PLATFORM}"
 36 | elif [[ $arch == arm* ]] || [[ $arch == aarch64 ]]; then
 37 |     PLATFORM="linux/arm64"
 38 |     print_info "ARM Architecture detected, using platform: ${PLATFORM}"
 39 | else
 40 |     # Fallback to amd64 for unknown architectures
 41 |     PLATFORM="linux/amd64"
 42 |     print_info "Unknown architecture ($arch), defaulting to platform: ${PLATFORM}"
 43 | fi
 44 | 
 45 | # Environment variables
 46 | PYTHONPATH="/app/libs/python/core:/app/libs/python/computer:/app/libs/python/agent:/app/libs/python/som:/app/libs/python/computer-server:/app/libs/python/mcp-server"
 47 | 
 48 | # Check if Docker is installed
 49 | if ! command -v docker &> /dev/null; then
 50 |     print_error "Docker is not installed. Please install Docker first."
 51 |     exit 1
 52 | fi
 53 | 
 54 | # Command options
 55 | case "$1" in
 56 |     build)
 57 |         print_info "Building the development Docker image..."
 58 |         print_info "This will install all dependencies but won't include source code"
 59 |         docker build -f Dockerfile --platform=${PLATFORM} -t ${IMAGE_NAME} .
 60 |         print_success "Development Docker image built successfully!"
 61 |         ;;
 62 |     
 63 |     run)
 64 |         # Check for interactive flag
 65 |         if [ "$2" == "--interactive" ]; then
 66 |             print_info "Running the development Docker container with interactive shell..."
 67 |             print_info "Mounting source code from host"
 68 |             print_info "Connecting to host.docker.internal:7777"
 69 |             
 70 |             docker run -it --rm \
 71 |                 --platform=${PLATFORM} \
 72 |                 --name ${CONTAINER_NAME} \
 73 |                 -v "$(pwd):/app" \
 74 |                 -e PYTHONPATH=${PYTHONPATH} \
 75 |                 -e DISPLAY=${DISPLAY:-:0} \
 76 |                 -e PYLUME_HOST="host.docker.internal" \
 77 |                 -p 7860:7860 \
 78 |                 ${IMAGE_NAME} bash
 79 |         else
 80 |             # Run the specified example
 81 |             if [ -z "$2" ]; then
 82 |                 print_error "Please specify an example file, e.g., ./run-docker-dev.sh run computer_examples.py"
 83 |                 exit 1
 84 |             fi
 85 |             print_info "Running example: $2"
 86 |             print_info "Connecting to host.docker.internal:7777"
 87 |             
 88 |             docker run -it --rm \
 89 |                 --platform=${PLATFORM} \
 90 |                 --name ${CONTAINER_NAME} \
 91 |                 -v "$(pwd):/app" \
 92 |                 -e PYTHONPATH=${PYTHONPATH} \
 93 |                 -e DISPLAY=${DISPLAY:-:0} \
 94 |                 -e PYLUME_HOST="host.docker.internal" \
 95 |                 -p 7860:7860 \
 96 |                 ${IMAGE_NAME} python "/app/examples/$2"
 97 |         fi
 98 |         ;;
 99 |     
100 |     stop)
101 |         print_info "Stopping any running containers..."
102 |         docker stop ${CONTAINER_NAME} 2>/dev/null || true
103 |         print_success "Done!"
104 |         ;;
105 |         
106 |     *)
107 |         echo "Usage: $0 {build|run [--interactive] [filename]|stop}"
108 |         echo ""
109 |         echo "Commands:"
110 |         echo "  build                      Build the development Docker image with dependencies"
111 |         echo "  run [example_filename]     Run the specified example file in the container"
112 |         echo "  run --interactive          Run the container with mounted code and get an interactive shell"
113 |         echo "  stop                       Stop the container"
114 |         exit 1
115 | esac
116 | 
117 | exit 0 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/tools/browser_tool.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Browser Tool for agent interactions.
  3 | Allows agents to control a browser programmatically via Playwright.
  4 | """
  5 | 
  6 | import logging
  7 | from typing import TYPE_CHECKING, Optional
  8 | 
  9 | if TYPE_CHECKING:
 10 |     from computer.interface import GenericComputerInterface
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | class BrowserTool:
 16 |     """
 17 |     Browser tool that uses the computer SDK's interface to control a browser.
 18 |     Implements the Fara/Magentic-One agent interface for browser control.
 19 |     """
 20 | 
 21 |     def __init__(
 22 |         self,
 23 |         interface: "GenericComputerInterface",
 24 |     ):
 25 |         """
 26 |         Initialize the BrowserTool.
 27 | 
 28 |         Args:
 29 |             interface: A GenericComputerInterface instance that provides playwright_exec
 30 |         """
 31 |         self.interface = interface
 32 |         self.logger = logger
 33 | 
 34 |     async def _execute_command(self, command: str, params: dict) -> dict:
 35 |         """
 36 |         Execute a browser command via the computer interface.
 37 | 
 38 |         Args:
 39 |             command: Command name
 40 |             params: Command parameters
 41 | 
 42 |         Returns:
 43 |             Response dictionary
 44 |         """
 45 |         try:
 46 |             result = await self.interface.playwright_exec(command, params)
 47 |             if not result.get("success"):
 48 |                 self.logger.error(
 49 |                     f"Browser command '{command}' failed: {result.get('error', 'Unknown error')}"
 50 |                 )
 51 |             return result
 52 |         except Exception as e:
 53 |             self.logger.error(f"Error executing browser command '{command}': {e}")
 54 |             return {"success": False, "error": str(e)}
 55 | 
 56 |     async def visit_url(self, url: str) -> dict:
 57 |         """
 58 |         Navigate to a URL.
 59 | 
 60 |         Args:
 61 |             url: URL to visit
 62 | 
 63 |         Returns:
 64 |             Response dictionary with success status and current URL
 65 |         """
 66 |         return await self._execute_command("visit_url", {"url": url})
 67 | 
 68 |     async def click(self, x: int, y: int) -> dict:
 69 |         """
 70 |         Click at coordinates.
 71 | 
 72 |         Args:
 73 |             x: X coordinate
 74 |             y: Y coordinate
 75 | 
 76 |         Returns:
 77 |             Response dictionary with success status
 78 |         """
 79 |         return await self._execute_command("click", {"x": x, "y": y})
 80 | 
 81 |     async def type(self, text: str) -> dict:
 82 |         """
 83 |         Type text into the focused element.
 84 | 
 85 |         Args:
 86 |             text: Text to type
 87 | 
 88 |         Returns:
 89 |             Response dictionary with success status
 90 |         """
 91 |         return await self._execute_command("type", {"text": text})
 92 | 
 93 |     async def scroll(self, delta_x: int, delta_y: int) -> dict:
 94 |         """
 95 |         Scroll the page.
 96 | 
 97 |         Args:
 98 |             delta_x: Horizontal scroll delta
 99 |             delta_y: Vertical scroll delta
100 | 
101 |         Returns:
102 |             Response dictionary with success status
103 |         """
104 |         return await self._execute_command("scroll", {"delta_x": delta_x, "delta_y": delta_y})
105 | 
106 |     async def web_search(self, query: str) -> dict:
107 |         """
108 |         Navigate to a Google search for the query.
109 | 
110 |         Args:
111 |             query: Search query
112 | 
113 |         Returns:
114 |             Response dictionary with success status and current URL
115 |         """
116 |         return await self._execute_command("web_search", {"query": query})
117 | 
118 |     async def screenshot(self) -> bytes:
119 |         """
120 |         Take a screenshot of the current browser page.
121 | 
122 |         Returns:
123 |             Screenshot image data as bytes (PNG format)
124 |         """
125 |         import base64
126 | 
127 |         result = await self._execute_command("screenshot", {})
128 |         if result.get("success") and result.get("screenshot"):
129 |             # Decode base64 screenshot to bytes
130 |             screenshot_b64 = result["screenshot"]
131 |             screenshot_bytes = base64.b64decode(screenshot_b64)
132 |             return screenshot_bytes
133 |         else:
134 |             error = result.get("error", "Unknown error")
135 |             raise RuntimeError(f"Failed to take screenshot: {error}")
136 | 
```

--------------------------------------------------------------------------------
/libs/lume/src/Commands/Run.swift:
--------------------------------------------------------------------------------

```swift
  1 | import ArgumentParser
  2 | import Foundation
  3 | import Virtualization
  4 | 
  5 | struct Run: AsyncParsableCommand {
  6 |     static let configuration = CommandConfiguration(
  7 |         abstract: "Run a virtual machine"
  8 |     )
  9 | 
 10 |     @Argument(
 11 |         help: "Name of the virtual machine or image to pull and run (format: name or name:tag)",
 12 |         completion: .custom(completeVMName))
 13 |     var name: String
 14 | 
 15 |     @Flag(name: [.short, .long], help: "Do not start the VNC client")
 16 |     var noDisplay: Bool = false
 17 | 
 18 |     @Option(
 19 |         name: [.customLong("shared-dir")],
 20 |         help:
 21 |             "Directory to share with the VM. Can be just a path for read-write access (e.g. ~/src) or path:tag where tag is 'ro' for read-only or 'rw' for read-write (e.g. ~/src:ro)"
 22 |     )
 23 |     var sharedDirectories: [String] = []
 24 | 
 25 |     @Option(
 26 |         help:
 27 |             "For Linux VMs only, a read-only disk image to attach to the VM (e.g. --mount=\"ubuntu.iso\")",
 28 |         completion: .file())
 29 |     var mount: String?
 30 | 
 31 |     @Option(
 32 |         name: [.customLong("usb-storage")],
 33 |         help: "Disk image to attach as a USB mass storage device (e.g. --usb-storage=\"disk.img\")",
 34 |         completion: .file())
 35 |     var usbStorageDevices: [String] = []
 36 | 
 37 |     @Option(help: "Github Container Registry to pull the images from. Defaults to ghcr.io")
 38 |     var registry: String = "ghcr.io"
 39 | 
 40 |     @Option(help: "Organization to pull the images from. Defaults to trycua")
 41 |     var organization: String = "trycua"
 42 | 
 43 |     @Option(
 44 |         name: [.customLong("vnc-port")],
 45 |         help: "Port to use for the VNC server. Defaults to 0 (auto-assign)")
 46 |     var vncPort: Int = 0
 47 | 
 48 |     @Option(help: "For MacOS VMs only, boot into the VM in recovery mode")
 49 |     var recoveryMode: Bool = false
 50 | 
 51 |     @Option(name: .customLong("storage"), help: "VM storage location to use or direct path to VM location")
 52 |     var storage: String?
 53 | 
 54 |     private var parsedSharedDirectories: [SharedDirectory] {
 55 |         get throws {
 56 |             try sharedDirectories.map { dirString -> SharedDirectory in
 57 |                 let components = dirString.split(separator: ":", maxSplits: 1)
 58 |                 let hostPath = String(components[0])
 59 | 
 60 |                 // If no tag is provided, default to read-write
 61 |                 if components.count == 1 {
 62 |                     return SharedDirectory(
 63 |                         hostPath: hostPath,
 64 |                         tag: VZVirtioFileSystemDeviceConfiguration.macOSGuestAutomountTag,
 65 |                         readOnly: false
 66 |                     )
 67 |                 }
 68 | 
 69 |                 // Parse the tag if provided
 70 |                 let tag = String(components[1])
 71 |                 let readOnly: Bool
 72 |                 switch tag.lowercased() {
 73 |                 case "ro":
 74 |                     readOnly = true
 75 |                 case "rw":
 76 |                     readOnly = false
 77 |                 default:
 78 |                     throw ValidationError(
 79 |                         "Invalid tag value. Must be either 'ro' for read-only or 'rw' for read-write"
 80 |                     )
 81 |                 }
 82 | 
 83 |                 return SharedDirectory(
 84 |                     hostPath: hostPath,
 85 |                     tag: VZVirtioFileSystemDeviceConfiguration.macOSGuestAutomountTag,
 86 |                     readOnly: readOnly
 87 |                 )
 88 |             }
 89 |         }
 90 |     }
 91 | 
 92 |     private var parsedUSBStorageDevices: [Path] {
 93 |         usbStorageDevices.map { Path($0) }
 94 |     }
 95 | 
 96 |     init() {
 97 |     }
 98 | 
 99 |     @MainActor
100 |     func run() async throws {
101 |         try await LumeController().runVM(
102 |             name: name,
103 |             noDisplay: noDisplay,
104 |             sharedDirectories: parsedSharedDirectories,
105 |             mount: mount.map { Path($0) },
106 |             registry: registry,
107 |             organization: organization,
108 |             vncPort: vncPort,
109 |             recoveryMode: recoveryMode,
110 |             storage: storage,
111 |             usbMassStoragePaths: parsedUSBStorageDevices.isEmpty ? nil : parsedUSBStorageDevices
112 |         )
113 |     }
114 | }
115 | 
```

--------------------------------------------------------------------------------
/libs/python/mcp-server/test_mcp_server_local_option.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Test script to verify MCP Server local desktop option works correctly.
  3 | 
  4 | This test verifies:
  5 | 1. Default behavior: Computer uses VM
  6 | 2. New behavior: Computer uses host when CUA_USE_HOST_COMPUTER_SERVER=true
  7 | """
  8 | 
  9 | import asyncio
 10 | import os
 11 | import sys
 12 | from pathlib import Path
 13 | 
 14 | # Add the mcp-server module to path
 15 | mcp_server_path = Path(__file__).parent.parent / "libs" / "python" / "mcp-server"
 16 | sys.path.insert(0, str(mcp_server_path.parent.parent.parent / "libs" / "python"))
 17 | 
 18 | import pytest
 19 | 
 20 | 
 21 | @pytest.mark.asyncio
 22 | async def test_default_vm_mode():
 23 |     """Test that the default mode uses VM (not host computer server)."""
 24 |     # Ensure environment variable is not set or is false
 25 |     os.environ.pop("CUA_USE_HOST_COMPUTER_SERVER", None)
 26 | 
 27 |     from mcp_server.session_manager import ComputerPool
 28 | 
 29 |     pool = ComputerPool(max_size=1)
 30 | 
 31 |     try:
 32 |         computer = await pool.acquire()
 33 | 
 34 |         # Verify the computer was initialized
 35 |         assert computer is not None
 36 | 
 37 |         # Check that use_host_computer_server was set to False (default)
 38 |         # This should start a VM
 39 |         print("✓ Default mode: Computer initialized (VM mode expected)")
 40 | 
 41 |         await pool.release(computer)
 42 | 
 43 |     finally:
 44 |         await pool.shutdown()
 45 | 
 46 | 
 47 | @pytest.mark.asyncio
 48 | async def test_local_desktop_mode():
 49 |     """Test that setting CUA_USE_HOST_COMPUTER_SERVER=true uses host."""
 50 |     # Set environment variable to true
 51 |     os.environ["CUA_USE_HOST_COMPUTER_SERVER"] = "true"
 52 | 
 53 |     # Need to reload module to pick up new env var
 54 |     import importlib
 55 | 
 56 |     import mcp_server.session_manager
 57 |     from mcp_server.session_manager import ComputerPool
 58 | 
 59 |     importlib.reload(mcp_server.session_manager)
 60 | 
 61 |     pool = mcp_server.session_manager.ComputerPool(max_size=1)
 62 | 
 63 |     try:
 64 |         computer = await pool.acquire()
 65 | 
 66 |         # Verify the computer was initialized
 67 |         assert computer is not None
 68 | 
 69 |         # Check that use_host_computer_server was set to True
 70 |         print("✓ Local mode: Computer initialized (host mode expected)")
 71 | 
 72 |         await pool.release(computer)
 73 | 
 74 |     finally:
 75 |         await pool.shutdown()
 76 |         # Clean up env var
 77 |         os.environ.pop("CUA_USE_HOST_COMPUTER_SERVER", None)
 78 | 
 79 | 
 80 | @pytest.mark.asyncio
 81 | async def test_env_var_parsing():
 82 |     """Test that various values of CUA_USE_HOST_COMPUTER_SERVER are parsed correctly."""
 83 |     test_cases = [
 84 |         ("true", True),
 85 |         ("True", True),
 86 |         ("TRUE", True),
 87 |         ("1", True),
 88 |         ("yes", True),
 89 |         ("false", False),
 90 |         ("False", False),
 91 |         ("FALSE", False),
 92 |         ("0", False),
 93 |         ("no", False),
 94 |         ("", False),
 95 |         ("random", False),
 96 |     ]
 97 | 
 98 |     for value, expected in test_cases:
 99 |         os.environ["CUA_USE_HOST_COMPUTER_SERVER"] = value
100 | 
101 |         # Check parsing logic
102 |         use_host = os.getenv("CUA_USE_HOST_COMPUTER_SERVER", "false").lower() in (
103 |             "true",
104 |             "1",
105 |             "yes",
106 |         )
107 | 
108 |         assert (
109 |             use_host == expected
110 |         ), f"Failed for value '{value}': expected {expected}, got {use_host}"
111 |         print(f"✓ Env var '{value}' correctly parsed as {expected}")
112 | 
113 |     os.environ.pop("CUA_USE_HOST_COMPUTER_SERVER", None)
114 | 
115 | 
116 | if __name__ == "__main__":
117 |     print("Testing MCP Server Local Desktop Option")
118 |     print("=" * 60)
119 | 
120 |     print("\n1. Testing environment variable parsing...")
121 |     asyncio.run(test_env_var_parsing())
122 | 
123 |     print("\n2. Testing default VM mode...")
124 |     try:
125 |         asyncio.run(test_default_vm_mode())
126 |     except Exception as e:
127 |         print(f"✗ Default VM mode test failed: {e}")
128 |         print("Note: This may require lume/VM setup to fully test")
129 | 
130 |     print("\n3. Testing local desktop mode...")
131 |     try:
132 |         asyncio.run(test_local_desktop_mode())
133 |     except Exception as e:
134 |         print(f"✗ Local desktop mode test failed: {e}")
135 |         print("Note: This may require computer-server to be running locally")
136 | 
137 |     print("\n" + "=" * 60)
138 |     print("Tests completed!")
139 | 
```

--------------------------------------------------------------------------------
/examples/browser_tool_example.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Browser Tool Example
  3 | 
  4 | Demonstrates how to use the BrowserTool to control a browser programmatically
  5 | via the computer server. The browser runs visibly on the XFCE desktop so visual
  6 | agents can see it.
  7 | 
  8 | Prerequisites:
  9 |     - Computer server running (Docker container or local)
 10 |     - For Docker: Container should be running with browser tool support
 11 |     - For local: Playwright and Firefox must be installed
 12 | 
 13 | Usage:
 14 |     python examples/browser_tool_example.py
 15 | """
 16 | 
 17 | import asyncio
 18 | import logging
 19 | import sys
 20 | from pathlib import Path
 21 | 
 22 | # Add the libs path to sys.path
 23 | libs_path = Path(__file__).parent.parent / "libs" / "python"
 24 | sys.path.insert(0, str(libs_path))
 25 | 
 26 | from agent.tools.browser_tool import BrowserTool
 27 | 
 28 | # Import Computer interface and BrowserTool
 29 | from computer import Computer
 30 | 
 31 | # Configure logging to see what's happening
 32 | logging.basicConfig(level=logging.INFO)
 33 | logger = logging.getLogger(__name__)
 34 | 
 35 | 
 36 | async def test_browser_tool():
 37 |     """Test the BrowserTool with various commands."""
 38 | 
 39 |     # Initialize the computer interface
 40 |     # For local testing, use provider_type="docker"
 41 |     # For provider_type="cloud", provide name and api_key
 42 |     computer = Computer(provider_type="docker", os_type="linux", image="cua-xfce:dev")
 43 |     await computer.run()
 44 | 
 45 |     # Initialize the browser tool with the computer interface
 46 |     browser = BrowserTool(interface=computer)
 47 | 
 48 |     logger.info("Testing Browser Tool...")
 49 | 
 50 |     try:
 51 |         # Test 0: Take a screenshot (pre-init)
 52 |         logger.info("Test 0: Taking a screenshot...")
 53 |         screenshot_bytes = await browser.screenshot()
 54 |         screenshot_path = Path(__file__).parent / "browser_screenshot_init.png"
 55 |         with open(screenshot_path, "wb") as f:
 56 |             f.write(screenshot_bytes)
 57 |         logger.info(f"Screenshot captured: {len(screenshot_bytes)} bytes")
 58 | 
 59 |         # Test 1: Visit a URL
 60 |         logger.info("Test 1: Visiting a URL...")
 61 |         result = await browser.visit_url("https://www.trycua.com")
 62 |         logger.info(f"Visit URL result: {result}")
 63 | 
 64 |         # Wait a bit for the page to load
 65 |         await asyncio.sleep(2)
 66 | 
 67 |         # Test 2: Take a screenshot
 68 |         logger.info("Test 2: Taking a screenshot...")
 69 |         screenshot_bytes = await browser.screenshot()
 70 |         screenshot_path = Path(__file__).parent / "browser_screenshot.png"
 71 |         with open(screenshot_path, "wb") as f:
 72 |             f.write(screenshot_bytes)
 73 |         logger.info(f"Screenshot captured: {len(screenshot_bytes)} bytes")
 74 | 
 75 |         # Wait a bit
 76 |         await asyncio.sleep(1)
 77 | 
 78 |         # Test 3: Visit bot detector
 79 |         logger.info("Test 3: Visiting bot detector...")
 80 |         result = await browser.visit_url("https://bot-detector.rebrowser.net/")
 81 |         logger.info(f"Visit URL result: {result}")
 82 | 
 83 |         # Test 2: Web search
 84 |         logger.info("Test 2: Performing a web search...")
 85 |         result = await browser.web_search("Python programming")
 86 |         logger.info(f"Web search result: {result}")
 87 | 
 88 |         # Wait a bit
 89 |         await asyncio.sleep(2)
 90 | 
 91 |         # Test 3: Scroll
 92 |         logger.info("Test 3: Scrolling the page...")
 93 |         result = await browser.scroll(delta_x=0, delta_y=500)
 94 |         logger.info(f"Scroll result: {result}")
 95 | 
 96 |         # Wait a bit
 97 |         await asyncio.sleep(1)
 98 | 
 99 |         # Test 4: Click (example coordinates - adjust based on your screen)
100 |         logger.info("Test 4: Clicking at coordinates...")
101 |         result = await browser.click(x=500, y=300)
102 |         logger.info(f"Click result: {result}")
103 | 
104 |         # Wait a bit
105 |         await asyncio.sleep(1)
106 | 
107 |         # Test 5: Type text (if there's a focused input field)
108 |         logger.info("Test 5: Typing text...")
109 |         result = await browser.type("Hello from BrowserTool!")
110 |         logger.info(f"Type result: {result}")
111 | 
112 |         logger.info("All tests completed!")
113 | 
114 |     except Exception as e:
115 |         logger.error(f"Error during testing: {e}", exc_info=True)
116 | 
117 | 
118 | if __name__ == "__main__":
119 |     asyncio.run(test_browser_tool())
120 | 
```

--------------------------------------------------------------------------------
/libs/python/agent/agent/adapters/models/opencua.py:
--------------------------------------------------------------------------------

```python
  1 | import base64
  2 | import re
  3 | from io import BytesIO
  4 | from typing import Any, Dict, List
  5 | 
  6 | try:
  7 |     import blobfile as _  # assert blobfile is installed
  8 |     import torch  # type: ignore
  9 |     from PIL import Image  # type: ignore
 10 |     from transformers import (  # type: ignore
 11 |         AutoImageProcessor,
 12 |         AutoModel,
 13 |         AutoTokenizer,
 14 |     )
 15 | 
 16 |     OPENCUA_AVAILABLE = True
 17 | except Exception:
 18 |     OPENCUA_AVAILABLE = False
 19 | 
 20 | 
 21 | class OpenCUAModel:
 22 |     """OpenCUA model handler using AutoTokenizer, AutoModel and AutoImageProcessor."""
 23 | 
 24 |     def __init__(
 25 |         self, model_name: str, device: str = "auto", trust_remote_code: bool = False
 26 |     ) -> None:
 27 |         if not OPENCUA_AVAILABLE:
 28 |             raise ImportError(
 29 |                 'OpenCUA requirements not found. Install with: pip install "cua-agent[opencua-hf]"'
 30 |             )
 31 |         self.model_name = model_name
 32 |         self.device = device
 33 |         self.model = None
 34 |         self.tokenizer = None
 35 |         self.image_processor = None
 36 |         self.trust_remote_code = trust_remote_code
 37 |         self._load()
 38 | 
 39 |     def _load(self) -> None:
 40 |         self.tokenizer = AutoTokenizer.from_pretrained(
 41 |             self.model_name, trust_remote_code=self.trust_remote_code
 42 |         )
 43 |         self.model = AutoModel.from_pretrained(
 44 |             self.model_name,
 45 |             torch_dtype="auto",
 46 |             device_map=self.device,
 47 |             trust_remote_code=self.trust_remote_code,
 48 |             attn_implementation="sdpa",
 49 |         )
 50 |         self.image_processor = AutoImageProcessor.from_pretrained(
 51 |             self.model_name, trust_remote_code=self.trust_remote_code
 52 |         )
 53 | 
 54 |     @staticmethod
 55 |     def _extract_last_image_b64(messages: List[Dict[str, Any]]) -> str:
 56 |         # Expect HF-format messages with content items type: "image" with data URL
 57 |         for msg in reversed(messages):
 58 |             for item in reversed(msg.get("content", [])):
 59 |                 if isinstance(item, dict) and item.get("type") == "image":
 60 |                     url = item.get("image", "")
 61 |                     if isinstance(url, str) and url.startswith("data:image/"):
 62 |                         return url.split(",", 1)[1]
 63 |         return ""
 64 | 
 65 |     def generate(self, messages: List[Dict[str, Any]], max_new_tokens: int = 512) -> str:
 66 |         assert (
 67 |             self.model is not None
 68 |             and self.tokenizer is not None
 69 |             and self.image_processor is not None
 70 |         )
 71 | 
 72 |         # Tokenize text side using chat template
 73 |         input_ids = self.tokenizer.apply_chat_template(
 74 |             messages, tokenize=True, add_generation_prompt=True
 75 |         )
 76 |         input_ids = torch.tensor([input_ids]).to(self.model.device)
 77 | 
 78 |         # Prepare image inputs from last data URL image
 79 |         image_b64 = self._extract_last_image_b64(messages)
 80 |         pixel_values = None
 81 |         grid_thws = None
 82 |         if image_b64:
 83 |             image = Image.open(BytesIO(base64.b64decode(image_b64))).convert("RGB")
 84 |             image_info = self.image_processor.preprocess(images=[image])
 85 |             pixel_values = torch.tensor(image_info["pixel_values"]).to(
 86 |                 dtype=torch.bfloat16, device=self.model.device
 87 |             )
 88 |             grid_thws = (
 89 |                 torch.tensor(image_info["image_grid_thw"])
 90 |                 if "image_grid_thw" in image_info
 91 |                 else None
 92 |             )
 93 | 
 94 |         gen_kwargs: Dict[str, Any] = {
 95 |             "max_new_tokens": max_new_tokens,
 96 |             "temperature": 0,
 97 |         }
 98 |         if pixel_values is not None:
 99 |             gen_kwargs["pixel_values"] = pixel_values
100 |         if grid_thws is not None:
101 |             gen_kwargs["grid_thws"] = grid_thws
102 | 
103 |         with torch.no_grad():
104 |             generated_ids = self.model.generate(
105 |                 input_ids,
106 |                 **gen_kwargs,
107 |             )
108 | 
109 |         # Remove prompt tokens
110 |         prompt_len = input_ids.shape[1]
111 |         generated_ids = generated_ids[:, prompt_len:]
112 |         output_text = self.tokenizer.batch_decode(
113 |             generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
114 |         )[0]
115 |         return output_text
116 | 
```

--------------------------------------------------------------------------------
/libs/python/som/som/models.py:
--------------------------------------------------------------------------------

```python
  1 | from typing import Any, Dict, List, Literal, Optional, Tuple, Union
  2 | 
  3 | from pydantic import BaseModel, Field, validator
  4 | 
  5 | 
  6 | class BoundingBox(BaseModel):
  7 |     """Normalized bounding box coordinates."""
  8 | 
  9 |     x1: float = Field(..., description="Normalized left coordinate")
 10 |     y1: float = Field(..., description="Normalized top coordinate")
 11 |     x2: float = Field(..., description="Normalized right coordinate")
 12 |     y2: float = Field(..., description="Normalized bottom coordinate")
 13 | 
 14 |     @property
 15 |     def coordinates(self) -> List[float]:
 16 |         """Get coordinates as a list [x1, y1, x2, y2]."""
 17 |         return [self.x1, self.y1, self.x2, self.y2]
 18 | 
 19 | 
 20 | class UIElement(BaseModel):
 21 |     """Base class for UI elements."""
 22 | 
 23 |     id: Optional[int] = Field(None, description="Unique identifier for the element (1-indexed)")
 24 |     type: Literal["icon", "text"]
 25 |     bbox: BoundingBox
 26 |     interactivity: bool = Field(default=False, description="Whether the element is interactive")
 27 |     confidence: float = Field(default=1.0, description="Detection confidence score")
 28 | 
 29 | 
 30 | class IconElement(UIElement):
 31 |     """An interactive icon element."""
 32 | 
 33 |     type: Literal["icon"] = "icon"
 34 |     interactivity: bool = True
 35 |     scale: Optional[int] = Field(None, description="Detection scale used")
 36 | 
 37 | 
 38 | class TextElement(UIElement):
 39 |     """A text element."""
 40 | 
 41 |     type: Literal["text"] = "text"
 42 |     content: str = Field(..., description="The text content")
 43 |     interactivity: bool = False
 44 | 
 45 | 
 46 | class ImageData(BaseModel):
 47 |     """Image data with dimensions."""
 48 | 
 49 |     base64: str = Field(..., description="Base64 encoded image data")
 50 |     width: int = Field(..., description="Image width in pixels")
 51 |     height: int = Field(..., description="Image height in pixels")
 52 | 
 53 |     @validator("width", "height")
 54 |     def dimensions_must_be_positive(cls, v):
 55 |         if v <= 0:
 56 |             raise ValueError("Dimensions must be positive")
 57 |         return v
 58 | 
 59 | 
 60 | class ParserMetadata(BaseModel):
 61 |     """Metadata about the parsing process."""
 62 | 
 63 |     image_size: Tuple[int, int] = Field(
 64 |         ..., description="Original image dimensions (width, height)"
 65 |     )
 66 |     num_icons: int = Field(..., description="Number of icons detected")
 67 |     num_text: int = Field(..., description="Number of text elements detected")
 68 |     device: str = Field(..., description="Device used for detection (cpu/cuda/mps)")
 69 |     ocr_enabled: bool = Field(..., description="Whether OCR was enabled")
 70 |     latency: float = Field(..., description="Total processing time in seconds")
 71 | 
 72 |     @property
 73 |     def width(self) -> int:
 74 |         """Get image width from image_size."""
 75 |         return self.image_size[0]
 76 | 
 77 |     @property
 78 |     def height(self) -> int:
 79 |         """Get image height from image_size."""
 80 |         return self.image_size[1]
 81 | 
 82 | 
 83 | class ParseResult(BaseModel):
 84 |     """Result of parsing a UI screenshot."""
 85 | 
 86 |     elements: List[UIElement] = Field(..., description="Detected UI elements")
 87 |     annotated_image_base64: str = Field(..., description="Base64 encoded annotated image")
 88 |     metadata: ParserMetadata = Field(..., description="Processing metadata")
 89 |     screen_info: Optional[List[str]] = Field(
 90 |         None, description="Human-readable descriptions of elements"
 91 |     )
 92 |     parsed_content_list: Optional[List[Dict[str, Any]]] = Field(
 93 |         None, description="Parsed elements as dictionaries"
 94 |     )
 95 | 
 96 |     @property
 97 |     def image(self) -> ImageData:
 98 |         """Get image data as a convenience property."""
 99 |         return ImageData(
100 |             base64=self.annotated_image_base64,
101 |             width=self.metadata.width,
102 |             height=self.metadata.height,
103 |         )
104 | 
105 |     @property
106 |     def width(self) -> int:
107 |         """Get image width from metadata."""
108 |         return self.metadata.width
109 | 
110 |     @property
111 |     def height(self) -> int:
112 |         """Get image height from metadata."""
113 |         return self.metadata.height
114 | 
115 |     def model_dump(self) -> Dict[str, Any]:
116 |         """Convert model to dict for compatibility with older code."""
117 |         result = super().model_dump()
118 |         # Add image data dict for backward compatibility
119 |         result["image"] = self.image.model_dump()
120 |         return result
121 | 
```
Page 5/28FirstPrevNextLast